libc: add db code from netbsd

This commit is contained in:
Ben Gras 2010-07-14 17:46:18 +00:00
parent 8d24932c80
commit 2639ae9b17
57 changed files with 13200 additions and 0 deletions

View file

@ -6,6 +6,7 @@ LIB= c
.include "${.CURDIR}/ansi/Makefile.inc"
.include "${.CURDIR}/asyn/Makefile.inc"
.include "${.CURDIR}/db/Makefile.inc"
.include "${.CURDIR}/ip/Makefile.inc"
.include "${.CURDIR}/math/Makefile.inc"
.include "${.CURDIR}/other/Makefile.inc"

11
lib/libc/db/Makefile.inc Normal file
View file

@ -0,0 +1,11 @@
# $NetBSD: Makefile.inc,v 1.6 1997/10/22 23:14:11 lukem Exp $
# @(#)Makefile.inc 8.2 (Berkeley) 2/21/94
#
CPPFLAGS+=-D__DBINTERFACE_PRIVATE
.include "${.CURDIR}/db/btree/Makefile.inc"
.include "${.CURDIR}/db/db/Makefile.inc"
.include "${.CURDIR}/db/hash/Makefile.inc"
.include "${.CURDIR}/db/man/Makefile.inc"
.include "${.CURDIR}/db/mpool/Makefile.inc"
.include "${.CURDIR}/db/recno/Makefile.inc"

41
lib/libc/db/README Normal file
View file

@ -0,0 +1,41 @@
# $NetBSD: README,v 1.3 1996/05/03 21:17:07 cgd Exp $
# @(#)README 8.27 (Berkeley) 9/1/94
This is version 1.85 of the Berkeley DB code.
For information on compiling and installing this software, see the file
PORT/README.
Newer versions of this software will periodically be made available by
anonymous ftp from ftp.cs.berkeley.edu. An archive in compressed format
is in ucb/4bsd/db.tar.Z, or in gzip format in ucb/4bsd/db.tar.gz. If
you'd like to receive announcements of future releases of this software,
send email to the contact address below.
Email questions may be addressed to Keith Bostic at bostic@cs.berkeley.edu.
============================================
Distribution contents:
Makefile.inc Ignore this, it's the 4.4BSD subsystem Makefile.
PORT The per OS/architecture directories to use to build
libdb.a, if you're not running 4.4BSD. See the file
PORT/README for more information.
README This file.
btree The B+tree routines.
changelog List of changes, per version.
db The dbopen(3) interface routine.
docs Various USENIX papers, and the formatted manual pages.
hash The extended linear hashing routines.
man The unformatted manual pages.
mpool The memory pool routines.
recno The fixed/variable length record routines.
test Test package.
============================================
Debugging:
If you're running a memory checker (e.g. Purify) on DB, make sure that
you recompile it with "-DPURIFY" in the CFLAGS, first. By default,
allocated pages are not initialized by the DB code, and they will show
up as reads of uninitialized memory in the buffer write routines.

View file

@ -0,0 +1,8 @@
# $NetBSD: Makefile.inc,v 1.6 1996/05/03 21:50:36 cgd Exp $
# @(#)Makefile.inc 8.2 (Berkeley) 7/14/94
.PATH: ${.CURDIR}/db/btree
SRCS+= bt_close.c bt_conv.c bt_debug.c bt_delete.c bt_get.c bt_open.c \
bt_overflow.c bt_page.c bt_put.c bt_search.c bt_seq.c bt_split.c \
bt_utils.c

View file

@ -0,0 +1,184 @@
/* $NetBSD: bt_close.c,v 1.14 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_close.c,v 1.14 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <db.h>
#include "btree.h"
static int bt_meta(BTREE *);
/*
* BT_CLOSE -- Close a btree.
*
* Parameters:
* dbp: pointer to access method
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__bt_close(DB *dbp)
{
BTREE *t;
int fd;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Sync the tree. */
if (__bt_sync(dbp, 0) == RET_ERROR)
return (RET_ERROR);
/* Close the memory pool. */
if (mpool_close(t->bt_mp) == RET_ERROR)
return (RET_ERROR);
/* Free random memory. */
if (t->bt_cursor.key.data != NULL) {
free(t->bt_cursor.key.data);
t->bt_cursor.key.size = 0;
t->bt_cursor.key.data = NULL;
}
if (t->bt_rkey.data) {
free(t->bt_rkey.data);
t->bt_rkey.size = 0;
t->bt_rkey.data = NULL;
}
if (t->bt_rdata.data) {
free(t->bt_rdata.data);
t->bt_rdata.size = 0;
t->bt_rdata.data = NULL;
}
fd = t->bt_fd;
free(t);
free(dbp);
return (close(fd) ? RET_ERROR : RET_SUCCESS);
}
/*
* BT_SYNC -- sync the btree to disk.
*
* Parameters:
* dbp: pointer to access method
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__bt_sync(const DB *dbp, u_int flags)
{
BTREE *t;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Sync doesn't currently take any flags. */
if (flags != 0) {
errno = EINVAL;
return (RET_ERROR);
}
if (F_ISSET(t, B_INMEM | B_RDONLY) || !F_ISSET(t, B_MODIFIED))
return (RET_SUCCESS);
if (F_ISSET(t, B_METADIRTY) && bt_meta(t) == RET_ERROR)
return (RET_ERROR);
if ((status = mpool_sync(t->bt_mp)) == RET_SUCCESS)
F_CLR(t, B_MODIFIED);
return (status);
}
/*
* BT_META -- write the tree meta data to disk.
*
* Parameters:
* t: tree
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
static int
bt_meta(BTREE *t)
{
BTMETA m;
void *p;
if ((p = mpool_get(t->bt_mp, P_META, 0)) == NULL)
return (RET_ERROR);
/* Fill in metadata. */
m.magic = BTREEMAGIC;
m.version = BTREEVERSION;
m.psize = t->bt_psize;
m.free = t->bt_free;
m.nrecs = t->bt_nrecs;
m.flags = F_ISSET(t, SAVEMETA);
memmove(p, &m, sizeof(BTMETA));
mpool_put(t->bt_mp, p, MPOOL_DIRTY);
return (RET_SUCCESS);
}

216
lib/libc/db/btree/bt_conv.c Normal file
View file

@ -0,0 +1,216 @@
/* $NetBSD: bt_conv.c,v 1.14 2008/09/10 17:52:35 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_conv.c,v 1.14 2008/09/10 17:52:35 joerg Exp $");
#endif
#include <assert.h>
#include <stdio.h>
#include <db.h>
#include "btree.h"
static void mswap(PAGE *);
/*
* __BT_BPGIN, __BT_BPGOUT --
* Convert host-specific number layout to/from the host-independent
* format stored on disk.
*
* Parameters:
* t: tree
* pg: page number
* h: page to convert
*/
void
__bt_pgin(void *t, pgno_t pg, void *pp)
{
PAGE *h;
indx_t i, top;
uint8_t flags;
char *p;
if (!F_ISSET(((BTREE *)t), B_NEEDSWAP))
return;
if (pg == P_META) {
mswap(pp);
return;
}
h = pp;
M_32_SWAP(h->pgno);
M_32_SWAP(h->prevpg);
M_32_SWAP(h->nextpg);
M_32_SWAP(h->flags);
M_16_SWAP(h->lower);
M_16_SWAP(h->upper);
top = NEXTINDEX(h);
if ((h->flags & P_TYPE) == P_BINTERNAL)
for (i = 0; i < top; i++) {
M_16_SWAP(h->linp[i]);
p = (char *)(void *)GETBINTERNAL(h, i);
P_32_SWAP(p);
p += sizeof(uint32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
if (*(uint8_t *)p & P_BIGKEY) {
p += sizeof(uint8_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
}
else if ((h->flags & P_TYPE) == P_BLEAF)
for (i = 0; i < top; i++) {
M_16_SWAP(h->linp[i]);
p = (char *)(void *)GETBLEAF(h, i);
P_32_SWAP(p);
p += sizeof(uint32_t);
P_32_SWAP(p);
p += sizeof(uint32_t);
flags = *(uint8_t *)p;
if (flags & (P_BIGKEY | P_BIGDATA)) {
p += sizeof(uint8_t);
if (flags & P_BIGKEY) {
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
if (flags & P_BIGDATA) {
p += sizeof(uint32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
}
}
}
void
__bt_pgout(void *t, pgno_t pg, void *pp)
{
PAGE *h;
indx_t i, top;
uint8_t flags;
char *p;
if (!F_ISSET(((BTREE *)t), B_NEEDSWAP))
return;
if (pg == P_META) {
mswap(pp);
return;
}
h = pp;
top = NEXTINDEX(h);
if ((h->flags & P_TYPE) == P_BINTERNAL)
for (i = 0; i < top; i++) {
p = (char *)(void *)GETBINTERNAL(h, i);
P_32_SWAP(p);
p += sizeof(uint32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
if (*(uint8_t *)p & P_BIGKEY) {
p += sizeof(uint8_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
M_16_SWAP(h->linp[i]);
}
else if ((h->flags & P_TYPE) == P_BLEAF)
for (i = 0; i < top; i++) {
p = (char *)(void *)GETBLEAF(h, i);
P_32_SWAP(p);
p += sizeof(uint32_t);
P_32_SWAP(p);
p += sizeof(uint32_t);
flags = *(uint8_t *)p;
if (flags & (P_BIGKEY | P_BIGDATA)) {
p += sizeof(uint8_t);
if (flags & P_BIGKEY) {
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
if (flags & P_BIGDATA) {
p += sizeof(uint32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
}
M_16_SWAP(h->linp[i]);
}
M_32_SWAP(h->pgno);
M_32_SWAP(h->prevpg);
M_32_SWAP(h->nextpg);
M_32_SWAP(h->flags);
M_16_SWAP(h->lower);
M_16_SWAP(h->upper);
}
/*
* MSWAP -- Actually swap the bytes on the meta page.
*
* Parameters:
* p: page to convert
*/
static void
mswap(PAGE *pg)
{
char *p;
p = (char *)(void *)pg;
P_32_SWAP(p); /* magic */
p += sizeof(uint32_t);
P_32_SWAP(p); /* version */
p += sizeof(uint32_t);
P_32_SWAP(p); /* psize */
p += sizeof(uint32_t);
P_32_SWAP(p); /* free */
p += sizeof(uint32_t);
P_32_SWAP(p); /* nrecs */
p += sizeof(uint32_t);
P_32_SWAP(p); /* flags */
p += sizeof(uint32_t);
}

View file

@ -0,0 +1,326 @@
/* $NetBSD: bt_debug.c,v 1.15 2008/09/10 17:52:35 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_debug.c,v 1.15 2008/09/10 17:52:35 joerg Exp $");
#endif
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "btree.h"
#ifdef DEBUG
/*
* BT_DUMP -- Dump the tree
*
* Parameters:
* dbp: pointer to the DB
*/
void
__bt_dump(DB *dbp)
{
BTREE *t;
PAGE *h;
pgno_t i;
const char *sep;
t = dbp->internal;
(void)fprintf(stderr, "%s: pgsz %d",
F_ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize);
if (F_ISSET(t, R_RECNO))
(void)fprintf(stderr, " keys %lu", (unsigned long) t->bt_nrecs);
#undef X
#define X(flag, name) \
if (F_ISSET(t, flag)) { \
(void)fprintf(stderr, "%s%s", sep, name); \
sep = ", "; \
}
if (t->flags != 0) {
sep = " flags (";
X(R_FIXLEN, "FIXLEN");
X(B_INMEM, "INMEM");
X(B_NODUPS, "NODUPS");
X(B_RDONLY, "RDONLY");
X(R_RECNO, "RECNO");
X(B_METADIRTY,"METADIRTY");
(void)fprintf(stderr, ")\n");
}
#undef X
for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) {
__bt_dpage(h);
(void)mpool_put(t->bt_mp, h, 0);
}
}
/*
* BT_DMPAGE -- Dump the meta page
*
* Parameters:
* h: pointer to the PAGE
*/
void
__bt_dmpage(PAGE *h)
{
BTMETA *m;
const char *sep;
m = (BTMETA *)(void *)h;
(void)fprintf(stderr, "magic %lx\n", (unsigned long) m->magic);
(void)fprintf(stderr, "version %lu\n", (unsigned long) m->version);
(void)fprintf(stderr, "psize %lu\n", (unsigned long) m->psize);
(void)fprintf(stderr, "free %lu\n", (unsigned long) m->free);
(void)fprintf(stderr, "nrecs %lu\n", (unsigned long) m->nrecs);
(void)fprintf(stderr, "flags %lu", (unsigned long) m->flags);
#undef X
#define X(flag, name) \
if (m->flags & flag) { \
(void)fprintf(stderr, "%s%s", sep, name); \
sep = ", "; \
}
if (m->flags) {
sep = " (";
X(B_NODUPS, "NODUPS");
X(R_RECNO, "RECNO");
(void)fprintf(stderr, ")");
}
}
/*
* BT_DNPAGE -- Dump the page
*
* Parameters:
* n: page number to dump.
*/
void
__bt_dnpage(DB *dbp, pgno_t pgno)
{
BTREE *t;
PAGE *h;
t = dbp->internal;
if ((h = mpool_get(t->bt_mp, pgno, 0)) != NULL) {
__bt_dpage(h);
(void)mpool_put(t->bt_mp, h, 0);
}
}
/*
* BT_DPAGE -- Dump the page
*
* Parameters:
* h: pointer to the PAGE
*/
void
__bt_dpage(PAGE *h)
{
BINTERNAL *bi;
BLEAF *bl;
RINTERNAL *ri;
RLEAF *rl;
indx_t cur, top;
const char *sep;
(void)fprintf(stderr, " page %d: (", h->pgno);
#undef X
#define X(flag, name) \
if (h->flags & flag) { \
(void)fprintf(stderr, "%s%s", sep, name); \
sep = ", "; \
}
sep = "";
X(P_BINTERNAL, "BINTERNAL") /* types */
X(P_BLEAF, "BLEAF")
X(P_RINTERNAL, "RINTERNAL") /* types */
X(P_RLEAF, "RLEAF")
X(P_OVERFLOW, "OVERFLOW")
X(P_PRESERVE, "PRESERVE");
(void)fprintf(stderr, ")\n");
#undef X
(void)fprintf(stderr, "\tprev %2d next %2d", h->prevpg, h->nextpg);
if (h->flags & P_OVERFLOW)
return;
top = NEXTINDEX(h);
(void)fprintf(stderr, " lower %3d upper %3d nextind %d\n",
h->lower, h->upper, top);
for (cur = 0; cur < top; cur++) {
(void)fprintf(stderr, "\t[%03d] %4d ", cur, h->linp[cur]);
switch (h->flags & P_TYPE) {
case P_BINTERNAL:
bi = GETBINTERNAL(h, cur);
(void)fprintf(stderr,
"size %03d pgno %03d", bi->ksize, bi->pgno);
if (bi->flags & P_BIGKEY)
(void)fprintf(stderr, " (indirect)");
else if (bi->ksize)
(void)fprintf(stderr,
" {%.*s}", (int)bi->ksize, bi->bytes);
break;
case P_RINTERNAL:
ri = GETRINTERNAL(h, cur);
(void)fprintf(stderr, "entries %03d pgno %03d",
ri->nrecs, ri->pgno);
break;
case P_BLEAF:
bl = GETBLEAF(h, cur);
if (bl->flags & P_BIGKEY)
(void)fprintf(stderr,
"big key page %lu size %u/",
(unsigned long) *(pgno_t *)(void *)bl->bytes,
*(uint32_t *)(void *)(bl->bytes + sizeof(pgno_t)));
else if (bl->ksize)
(void)fprintf(stderr, "%s/", bl->bytes);
if (bl->flags & P_BIGDATA)
(void)fprintf(stderr,
"big data page %lu size %u",
(unsigned long) *(pgno_t *)(void *)(bl->bytes + bl->ksize),
*(uint32_t *)(void *)(bl->bytes + bl->ksize +
sizeof(pgno_t)));
else if (bl->dsize)
(void)fprintf(stderr, "%.*s",
(int)bl->dsize, bl->bytes + bl->ksize);
break;
case P_RLEAF:
rl = GETRLEAF(h, cur);
if (rl->flags & P_BIGDATA)
(void)fprintf(stderr,
"big data page %lu size %u",
(unsigned long) *(pgno_t *)(void *)rl->bytes,
*(uint32_t *)(void *)(rl->bytes + sizeof(pgno_t)));
else if (rl->dsize)
(void)fprintf(stderr,
"%.*s", (int)rl->dsize, rl->bytes);
break;
}
(void)fprintf(stderr, "\n");
}
}
#endif
#ifdef STATISTICS
/*
* BT_STAT -- Gather/print the tree statistics
*
* Parameters:
* dbp: pointer to the DB
*/
void
__bt_stat(DB *dbp)
{
extern unsigned long bt_cache_hit, bt_cache_miss, bt_pfxsaved, bt_rootsplit;
extern unsigned long bt_sortsplit, bt_split;
BTREE *t;
PAGE *h;
pgno_t i, pcont, pinternal, pleaf;
unsigned long ifree, lfree, nkeys;
int levels;
t = dbp->internal;
pcont = pinternal = pleaf = 0;
nkeys = ifree = lfree = 0;
for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) {
switch (h->flags & P_TYPE) {
case P_BINTERNAL:
case P_RINTERNAL:
++pinternal;
ifree += h->upper - h->lower;
break;
case P_BLEAF:
case P_RLEAF:
++pleaf;
lfree += h->upper - h->lower;
nkeys += NEXTINDEX(h);
break;
case P_OVERFLOW:
++pcont;
break;
}
(void)mpool_put(t->bt_mp, h, 0);
}
/* Count the levels of the tree. */
for (i = P_ROOT, levels = 0 ;; ++levels) {
h = mpool_get(t->bt_mp, i, 0);
if (h->flags & (P_BLEAF|P_RLEAF)) {
if (levels == 0)
levels = 1;
(void)mpool_put(t->bt_mp, h, 0);
break;
}
i = F_ISSET(t, R_RECNO) ?
GETRINTERNAL(h, 0)->pgno :
GETBINTERNAL(h, 0)->pgno;
(void)mpool_put(t->bt_mp, h, 0);
}
(void)fprintf(stderr, "%d level%s with %ld keys",
levels, levels == 1 ? "" : "s", nkeys);
if (F_ISSET(t, R_RECNO))
(void)fprintf(stderr, " (%ld header count)", (long)t->bt_nrecs);
(void)fprintf(stderr,
"\n%lu pages (leaf %ld, internal %ld, overflow %ld)\n",
(long)pinternal + pleaf + pcont, (long)pleaf, (long)pinternal,
(long)pcont);
(void)fprintf(stderr, "%ld cache hits, %ld cache misses\n",
bt_cache_hit, bt_cache_miss);
(void)fprintf(stderr, "%ld splits (%ld root splits, %ld sort splits)\n",
bt_split, bt_rootsplit, bt_sortsplit);
pleaf *= t->bt_psize - BTDATAOFF;
if (pleaf)
(void)fprintf(stderr,
"%.0f%% leaf fill (%ld bytes used, %ld bytes free)\n",
((double)(pleaf - lfree) / pleaf) * 100,
pleaf - lfree, lfree);
pinternal *= t->bt_psize - BTDATAOFF;
if (pinternal)
(void)fprintf(stderr,
"%.0f%% internal fill (%ld bytes used, %ld bytes free\n",
((double)(pinternal - ifree) / pinternal) * 100,
pinternal - ifree, ifree);
if (bt_pfxsaved)
(void)fprintf(stderr, "prefix checking removed %lu bytes.\n",
bt_pfxsaved);
}
#endif

View file

@ -0,0 +1,646 @@
/* $NetBSD: bt_delete.c,v 1.17 2009/01/29 02:02:36 lukem Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_delete.c,v 1.17 2009/01/29 02:02:36 lukem Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <db.h>
#include "btree.h"
static int __bt_bdelete(BTREE *, const DBT *);
static int __bt_curdel(BTREE *, const DBT *, PAGE *, u_int);
static int __bt_pdelete(BTREE *, PAGE *);
static int __bt_relink(BTREE *, PAGE *);
static int __bt_stkacq(BTREE *, PAGE **, CURSOR *);
/*
* __bt_delete
* Delete the item(s) referenced by a key.
*
* Return RET_SPECIAL if the key is not found.
*/
int
__bt_delete(const DB *dbp, const DBT *key, u_int flags)
{
BTREE *t;
CURSOR *c;
PAGE *h;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Check for change to a read-only tree. */
if (F_ISSET(t, B_RDONLY)) {
errno = EPERM;
return (RET_ERROR);
}
switch (flags) {
case 0:
status = __bt_bdelete(t, key);
break;
case R_CURSOR:
/*
* If flags is R_CURSOR, delete the cursor. Must already
* have started a scan and not have already deleted it.
*/
c = &t->bt_cursor;
if (F_ISSET(c, CURS_INIT)) {
if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
return (RET_SPECIAL);
if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
return (RET_ERROR);
/*
* If the page is about to be emptied, we'll need to
* delete it, which means we have to acquire a stack.
*/
if (NEXTINDEX(h) == 1)
if (__bt_stkacq(t, &h, &t->bt_cursor))
return (RET_ERROR);
status = __bt_dleaf(t, NULL, h, (u_int)c->pg.index);
if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
if (__bt_pdelete(t, h))
return (RET_ERROR);
} else
mpool_put(t->bt_mp, h,
(u_int)(status == RET_SUCCESS ?
MPOOL_DIRTY : 0));
break;
}
/* FALLTHROUGH */
default:
errno = EINVAL;
return (RET_ERROR);
}
if (status == RET_SUCCESS)
F_SET(t, B_MODIFIED);
return (status);
}
/*
* __bt_stkacq --
* Acquire a stack so we can delete a cursor entry.
*
* Parameters:
* t: tree
* hp: pointer to current, pinned PAGE pointer
* c: pointer to the cursor
*
* Returns:
* 0 on success, 1 on failure
*/
static int
__bt_stkacq(BTREE *t, PAGE **hp, CURSOR *c)
{
BINTERNAL *bi;
EPG *e;
EPGNO *parent;
PAGE *h;
indx_t idx = 0; /* Pacify gcc */
pgno_t pgno;
recno_t nextpg, prevpg;
int exact, level;
/*
* Find the first occurrence of the key in the tree. Toss the
* currently locked page so we don't hit an already-locked page.
*/
h = *hp;
mpool_put(t->bt_mp, h, 0);
if ((e = __bt_search(t, &c->key, &exact)) == NULL)
return (1);
h = e->page;
/* See if we got it in one shot. */
if (h->pgno == c->pg.pgno)
goto ret;
/*
* Move right, looking for the page. At each move we have to move
* up the stack until we don't have to move to the next page. If
* we have to change pages at an internal level, we have to fix the
* stack back up.
*/
while (h->pgno != c->pg.pgno) {
if ((nextpg = h->nextpg) == P_INVALID)
break;
mpool_put(t->bt_mp, h, 0);
/* Move up the stack. */
for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
/* Get the parent page. */
if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
return (1);
/* Move to the next index. */
if (parent->index != NEXTINDEX(h) - 1) {
idx = parent->index + 1;
BT_PUSH(t, h->pgno, idx);
break;
}
mpool_put(t->bt_mp, h, 0);
}
/* Restore the stack. */
while (level--) {
/* Push the next level down onto the stack. */
bi = GETBINTERNAL(h, idx);
pgno = bi->pgno;
BT_PUSH(t, pgno, 0);
/* Lose the currently pinned page. */
mpool_put(t->bt_mp, h, 0);
/* Get the next level down. */
if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
return (1);
idx = 0;
}
mpool_put(t->bt_mp, h, 0);
if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
return (1);
}
if (h->pgno == c->pg.pgno)
goto ret;
/* Reacquire the original stack. */
mpool_put(t->bt_mp, h, 0);
if ((e = __bt_search(t, &c->key, &exact)) == NULL)
return (1);
h = e->page;
/*
* Move left, looking for the page. At each move we have to move
* up the stack until we don't have to change pages to move to the
* next page. If we have to change pages at an internal level, we
* have to fix the stack back up.
*/
while (h->pgno != c->pg.pgno) {
if ((prevpg = h->prevpg) == P_INVALID)
break;
mpool_put(t->bt_mp, h, 0);
/* Move up the stack. */
for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
/* Get the parent page. */
if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
return (1);
/* Move to the next index. */
if (parent->index != 0) {
idx = parent->index - 1;
BT_PUSH(t, h->pgno, idx);
break;
}
mpool_put(t->bt_mp, h, 0);
}
/* Restore the stack. */
while (level--) {
/* Push the next level down onto the stack. */
bi = GETBINTERNAL(h, idx);
pgno = bi->pgno;
/* Lose the currently pinned page. */
mpool_put(t->bt_mp, h, 0);
/* Get the next level down. */
if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
return (1);
idx = NEXTINDEX(h) - 1;
BT_PUSH(t, pgno, idx);
}
mpool_put(t->bt_mp, h, 0);
if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
return (1);
}
ret: mpool_put(t->bt_mp, h, 0);
return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
}
/*
* __bt_bdelete --
* Delete all key/data pairs matching the specified key.
*
* Parameters:
* t: tree
* key: key to delete
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
*/
static int
__bt_bdelete(BTREE *t, const DBT *key)
{
EPG *e;
PAGE *h;
int deleted, exact, redo;
deleted = 0;
/* Find any matching record; __bt_search pins the page. */
loop: if ((e = __bt_search(t, key, &exact)) == NULL)
return (deleted ? RET_SUCCESS : RET_ERROR);
if (!exact) {
mpool_put(t->bt_mp, e->page, 0);
return (deleted ? RET_SUCCESS : RET_SPECIAL);
}
/*
* Delete forward, then delete backward, from the found key. If
* there are duplicates and we reach either side of the page, do
* the key search again, so that we get them all.
*/
redo = 0;
h = e->page;
do {
if (__bt_dleaf(t, key, h, (u_int)e->index)) {
mpool_put(t->bt_mp, h, 0);
return (RET_ERROR);
}
if (F_ISSET(t, B_NODUPS)) {
if (NEXTINDEX(h) == 0) {
if (__bt_pdelete(t, h))
return (RET_ERROR);
} else
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
deleted = 1;
} while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
/* Check for right-hand edge of the page. */
if (e->index == NEXTINDEX(h))
redo = 1;
/* Delete from the key to the beginning of the page. */
while (e->index-- > 0) {
if (__bt_cmp(t, key, e) != 0)
break;
if (__bt_dleaf(t, key, h, (u_int)e->index) == RET_ERROR) {
mpool_put(t->bt_mp, h, 0);
return (RET_ERROR);
}
if (e->index == 0)
redo = 1;
}
/* Check for an empty page. */
if (NEXTINDEX(h) == 0) {
if (__bt_pdelete(t, h))
return (RET_ERROR);
goto loop;
}
/* Put the page. */
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
if (redo)
goto loop;
return (RET_SUCCESS);
}
/*
* __bt_pdelete --
* Delete a single page from the tree.
*
* Parameters:
* t: tree
* h: leaf page
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*
* Side-effects:
* mpool_put's the page
*/
static int
__bt_pdelete(BTREE *t, PAGE *h)
{
BINTERNAL *bi;
PAGE *pg;
EPGNO *parent;
indx_t cnt, idx, *ip, offset;
uint32_t nksize;
char *from;
/*
* Walk the parent page stack -- a LIFO stack of the pages that were
* traversed when we searched for the page where the delete occurred.
* Each stack entry is a page number and a page index offset. The
* offset is for the page traversed on the search. We've just deleted
* a page, so we have to delete the key from the parent page.
*
* If the delete from the parent page makes it empty, this process may
* continue all the way up the tree. We stop if we reach the root page
* (which is never deleted, it's just not worth the effort) or if the
* delete does not empty the page.
*/
while ((parent = BT_POP(t)) != NULL) {
/* Get the parent page. */
if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
return (RET_ERROR);
idx = parent->index;
bi = GETBINTERNAL(pg, idx);
/* Free any overflow pages. */
if (bi->flags & P_BIGKEY &&
__ovfl_delete(t, bi->bytes) == RET_ERROR) {
mpool_put(t->bt_mp, pg, 0);
return (RET_ERROR);
}
/*
* Free the parent if it has only the one key and it's not the
* root page. If it's the rootpage, turn it back into an empty
* leaf page.
*/
if (NEXTINDEX(pg) == 1) {
if (pg->pgno == P_ROOT) {
pg->lower = BTDATAOFF;
pg->upper = t->bt_psize;
pg->flags = P_BLEAF;
} else {
if (__bt_relink(t, pg) || __bt_free(t, pg))
return (RET_ERROR);
continue;
}
} else {
/* Pack remaining key items at the end of the page. */
nksize = NBINTERNAL(bi->ksize);
from = (char *)(void *)pg + pg->upper;
memmove(from + nksize, from,
(size_t)((char *)(void *)bi - from));
pg->upper += nksize;
/* Adjust indices' offsets, shift the indices down. */
offset = pg->linp[idx];
for (cnt = idx, ip = &pg->linp[0]; cnt--; ++ip)
if (ip[0] < offset)
ip[0] += nksize;
for (cnt = NEXTINDEX(pg) - idx; --cnt; ++ip)
ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
pg->lower -= sizeof(indx_t);
}
mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
break;
}
/* Free the leaf page, as long as it wasn't the root. */
if (h->pgno == P_ROOT) {
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
return (__bt_relink(t, h) || __bt_free(t, h));
}
/*
* __bt_dleaf --
* Delete a single record from a leaf page.
*
* Parameters:
* t: tree
* key: referenced key
* h: page
* idx: index on page to delete
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__bt_dleaf(BTREE *t, const DBT *key, PAGE *h, u_int idx)
{
BLEAF *bl;
indx_t cnt, *ip, offset;
uint32_t nbytes;
void *to;
char *from;
/* If this record is referenced by the cursor, delete the cursor. */
if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
!F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == idx &&
__bt_curdel(t, key, h, idx))
return (RET_ERROR);
/* If the entry uses overflow pages, make them available for reuse. */
to = bl = GETBLEAF(h, idx);
if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
return (RET_ERROR);
if (bl->flags & P_BIGDATA &&
__ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
return (RET_ERROR);
/* Pack the remaining key/data items at the end of the page. */
nbytes = NBLEAF(bl);
from = (char *)(void *)h + h->upper;
memmove(from + nbytes, from, (size_t)((char *)(void *)to - from));
h->upper += nbytes;
/* Adjust the indices' offsets, shift the indices down. */
offset = h->linp[idx];
for (cnt = idx, ip = &h->linp[0]; cnt--; ++ip)
if (ip[0] < offset)
ip[0] += nbytes;
for (cnt = NEXTINDEX(h) - idx; --cnt; ++ip)
ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
h->lower -= sizeof(indx_t);
/* If the cursor is on this page, adjust it as necessary. */
if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
!F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > idx)
--t->bt_cursor.pg.index;
return (RET_SUCCESS);
}
/*
* __bt_curdel --
* Delete the cursor.
*
* Parameters:
* t: tree
* key: referenced key (or NULL)
* h: page
* idx: index on page to delete
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
static int
__bt_curdel(BTREE *t, const DBT *key, PAGE *h, u_int idx)
{
CURSOR *c;
EPG e;
PAGE *pg;
int curcopy, status;
/*
* If there are duplicates, move forward or backward to one.
* Otherwise, copy the key into the cursor area.
*/
c = &t->bt_cursor;
F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
curcopy = 0;
if (!F_ISSET(t, B_NODUPS)) {
/*
* We're going to have to do comparisons. If we weren't
* provided a copy of the key, i.e. the user is deleting
* the current cursor position, get one.
*/
if (key == NULL) {
e.page = h;
e.index = idx;
if ((status = __bt_ret(t, &e,
&c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
return (status);
curcopy = 1;
key = &c->key;
}
/* Check previous key, if not at the beginning of the page. */
if (idx > 0) {
e.page = h;
e.index = idx - 1;
if (__bt_cmp(t, key, &e) == 0) {
F_SET(c, CURS_BEFORE);
goto dup2;
}
}
/* Check next key, if not at the end of the page. */
if (idx < (unsigned)(NEXTINDEX(h) - 1)) {
e.page = h;
e.index = idx + 1;
if (__bt_cmp(t, key, &e) == 0) {
F_SET(c, CURS_AFTER);
goto dup2;
}
}
/* Check previous key if at the beginning of the page. */
if (idx == 0 && h->prevpg != P_INVALID) {
if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
return (RET_ERROR);
e.page = pg;
e.index = NEXTINDEX(pg) - 1;
if (__bt_cmp(t, key, &e) == 0) {
F_SET(c, CURS_BEFORE);
goto dup1;
}
mpool_put(t->bt_mp, pg, 0);
}
/* Check next key if at the end of the page. */
if (idx == (unsigned)(NEXTINDEX(h) - 1) && h->nextpg != P_INVALID) {
if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
return (RET_ERROR);
e.page = pg;
e.index = 0;
if (__bt_cmp(t, key, &e) == 0) {
F_SET(c, CURS_AFTER);
dup1: mpool_put(t->bt_mp, pg, 0);
dup2: c->pg.pgno = e.page->pgno;
c->pg.index = e.index;
return (RET_SUCCESS);
}
mpool_put(t->bt_mp, pg, 0);
}
}
e.page = h;
e.index = idx;
if (curcopy || (status =
__bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
F_SET(c, CURS_ACQUIRE);
return (RET_SUCCESS);
}
return (status);
}
/*
* __bt_relink --
* Link around a deleted page.
*
* Parameters:
* t: tree
* h: page to be deleted
*/
static int
__bt_relink(BTREE *t, PAGE *h)
{
PAGE *pg;
if (h->nextpg != P_INVALID) {
if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
return (RET_ERROR);
pg->prevpg = h->prevpg;
mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
}
if (h->prevpg != P_INVALID) {
if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
return (RET_ERROR);
pg->nextpg = h->nextpg;
mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
}
return (0);
}

108
lib/libc/db/btree/bt_get.c Normal file
View file

@ -0,0 +1,108 @@
/* $NetBSD: bt_get.c,v 1.13 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_get.c,v 1.13 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <db.h>
#include "btree.h"
/*
* __BT_GET -- Get a record from the btree.
*
* Parameters:
* dbp: pointer to access method
* key: key to find
* data: data to return
* flag: currently unused
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
*/
int
__bt_get(const DB *dbp, const DBT *key, DBT *data, u_int flags)
{
BTREE *t;
EPG *e;
int exact, status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Get currently doesn't take any flags. */
if (flags) {
errno = EINVAL;
return (RET_ERROR);
}
if ((e = __bt_search(t, key, &exact)) == NULL)
return (RET_ERROR);
if (!exact) {
mpool_put(t->bt_mp, e->page, 0);
return (RET_SPECIAL);
}
status = __bt_ret(t, e, NULL, NULL, data, &t->bt_rdata, 0);
/*
* If the user is doing concurrent access, we copied the
* key/data, toss the page.
*/
if (F_ISSET(t, B_DB_LOCK))
mpool_put(t->bt_mp, e->page, 0);
else
t->bt_pinned = e->page;
return (status);
}

484
lib/libc/db/btree/bt_open.c Normal file
View file

@ -0,0 +1,484 @@
/* $NetBSD: bt_open.c,v 1.24 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_open.c,v 1.24 2008/09/11 12:58:00 joerg Exp $");
#endif
/*
* Implementation of btree access method for 4.4BSD.
*
* The design here was originally based on that of the btree access method
* used in the Postgres database system at UC Berkeley. This implementation
* is wholly independent of the Postgres code.
*/
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/stat.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#ifndef __minix
#include <paths.h>
#else
#define _PATH_TMP "/tmp/"
#endif
#include <db.h>
#include "btree.h"
#ifdef DEBUG
#undef MINPSIZE
#define MINPSIZE 128
#endif
#ifndef LITTLE_ENDIAN
# define LITTLE_ENDIAN 1234
#endif
#ifndef BIG_ENDIAN
# define BIG_ENDIAN 4321
#endif
#ifndef BYTE_ORDER
#define BYTE_ORDER LITTLE_ENDIAN
#endif
static int byteorder(void);
static int nroot(BTREE *);
static int tmp(void);
/*
* __BT_OPEN -- Open a btree.
*
* Creates and fills a DB struct, and calls the routine that actually
* opens the btree.
*
* Parameters:
* fname: filename (NULL for in-memory trees)
* flags: open flag bits
* mode: open permission bits
* b: BTREEINFO pointer
*
* Returns:
* NULL on failure, pointer to DB on success.
*
*/
DB *
__bt_open(const char *fname, int flags, mode_t mode, const BTREEINFO *openinfo,
int dflags)
{
struct stat sb;
BTMETA m;
BTREE *t;
BTREEINFO b;
DB *dbp;
pgno_t ncache;
ssize_t nr;
size_t temp;
int machine_lorder;
t = NULL;
/*
* Intention is to make sure all of the user's selections are okay
* here and then use them without checking. Can't be complete, since
* we don't know the right page size, lorder or flags until the backing
* file is opened. Also, the file's page size can cause the cachesize
* to change.
*/
machine_lorder = byteorder();
if (openinfo) {
b = *openinfo;
/* Flags: R_DUP. */
if (b.flags & ~(R_DUP))
goto einval;
/*
* Page size must be indx_t aligned and >= MINPSIZE. Default
* page size is set farther on, based on the underlying file
* transfer size.
*/
if (b.psize &&
(b.psize < MINPSIZE || b.psize > MAX_PAGE_OFFSET + 1 ||
b.psize & (sizeof(indx_t) - 1)))
goto einval;
/* Minimum number of keys per page; absolute minimum is 2. */
if (b.minkeypage) {
if (b.minkeypage < 2)
goto einval;
} else
b.minkeypage = DEFMINKEYPAGE;
/* If no comparison, use default comparison and prefix. */
if (b.compare == NULL) {
b.compare = __bt_defcmp;
if (b.prefix == NULL)
b.prefix = __bt_defpfx;
}
if (b.lorder == 0)
b.lorder = machine_lorder;
} else {
b.compare = __bt_defcmp;
b.cachesize = 0;
b.flags = 0;
b.lorder = machine_lorder;
b.minkeypage = DEFMINKEYPAGE;
b.prefix = __bt_defpfx;
b.psize = 0;
}
/* Check for the ubiquitous PDP-11. */
if (b.lorder != BIG_ENDIAN && b.lorder != LITTLE_ENDIAN)
goto einval;
/* Allocate and initialize DB and BTREE structures. */
if ((t = (BTREE *)malloc(sizeof(BTREE))) == NULL)
goto err;
memset(t, 0, sizeof(BTREE));
t->bt_fd = -1; /* Don't close unopened fd on error. */
t->bt_lorder = b.lorder;
t->bt_order = NOT;
t->bt_cmp = b.compare;
t->bt_pfx = b.prefix;
t->bt_rfd = -1;
if ((t->bt_dbp = dbp = (DB *)malloc(sizeof(DB))) == NULL)
goto err;
memset(t->bt_dbp, 0, sizeof(DB));
if (t->bt_lorder != machine_lorder)
F_SET(t, B_NEEDSWAP);
dbp->type = DB_BTREE;
dbp->internal = t;
dbp->close = __bt_close;
dbp->del = __bt_delete;
dbp->fd = __bt_fd;
dbp->get = __bt_get;
dbp->put = __bt_put;
dbp->seq = __bt_seq;
dbp->sync = __bt_sync;
/*
* If no file name was supplied, this is an in-memory btree and we
* open a backing temporary file. Otherwise, it's a disk-based tree.
*/
if (fname) {
switch (flags & O_ACCMODE) {
case O_RDONLY:
F_SET(t, B_RDONLY);
break;
case O_RDWR:
break;
case O_WRONLY:
default:
goto einval;
}
if ((t->bt_fd = open(fname, flags, mode)) == -1)
goto err;
if (fcntl(t->bt_fd, F_SETFD, FD_CLOEXEC) == -1)
goto err;
} else {
if ((flags & O_ACCMODE) != O_RDWR)
goto einval;
if ((t->bt_fd = tmp()) == -1)
goto err;
F_SET(t, B_INMEM);
}
if (fcntl(t->bt_fd, F_SETFD, FD_CLOEXEC) == -1)
goto err;
if (fstat(t->bt_fd, &sb))
goto err;
if (sb.st_size) {
if ((nr = read(t->bt_fd, &m, sizeof(BTMETA))) < 0)
goto err;
if (nr != sizeof(BTMETA))
goto eftype;
/*
* Read in the meta-data. This can change the notion of what
* the lorder, page size and flags are, and, when the page size
* changes, the cachesize value can change too. If the user
* specified the wrong byte order for an existing database, we
* don't bother to return an error, we just clear the NEEDSWAP
* bit.
*/
if (m.magic == BTREEMAGIC)
F_CLR(t, B_NEEDSWAP);
else {
F_SET(t, B_NEEDSWAP);
M_32_SWAP(m.magic);
M_32_SWAP(m.version);
M_32_SWAP(m.psize);
M_32_SWAP(m.free);
M_32_SWAP(m.nrecs);
M_32_SWAP(m.flags);
}
if (m.magic != BTREEMAGIC || m.version != BTREEVERSION)
goto eftype;
if (m.psize < MINPSIZE || m.psize > MAX_PAGE_OFFSET + 1 ||
m.psize & (sizeof(indx_t) - 1))
goto eftype;
if (m.flags & ~SAVEMETA)
goto eftype;
b.psize = m.psize;
F_SET(t, m.flags);
t->bt_free = m.free;
t->bt_nrecs = m.nrecs;
} else {
/*
* Set the page size to the best value for I/O to this file.
* Don't overflow the page offset type.
*/
if (b.psize == 0) {
#ifndef __minix
b.psize = sb.st_blksize;
#else
b.psize = 4096;
#endif
if (b.psize < MINPSIZE)
b.psize = MINPSIZE;
if (b.psize > MAX_PAGE_OFFSET + 1)
b.psize = MAX_PAGE_OFFSET + 1;
}
/* Set flag if duplicates permitted. */
if (!(b.flags & R_DUP))
F_SET(t, B_NODUPS);
t->bt_free = P_INVALID;
t->bt_nrecs = 0;
F_SET(t, B_METADIRTY);
}
t->bt_psize = b.psize;
/* Set the cache size; must be a multiple of the page size. */
if (b.cachesize && b.cachesize & (b.psize - 1))
b.cachesize += (~b.cachesize & (b.psize - 1)) + 1;
if (b.cachesize < b.psize * MINCACHE)
b.cachesize = b.psize * MINCACHE;
/* Calculate number of pages to cache. */
ncache = (b.cachesize + t->bt_psize - 1) / t->bt_psize;
/*
* The btree data structure requires that at least two keys can fit on
* a page, but other than that there's no fixed requirement. The user
* specified a minimum number per page, and we translated that into the
* number of bytes a key/data pair can use before being placed on an
* overflow page. This calculation includes the page header, the size
* of the index referencing the leaf item and the size of the leaf item
* structure. Also, don't let the user specify a minkeypage such that
* a key/data pair won't fit even if both key and data are on overflow
* pages.
*/
temp = (t->bt_psize - BTDATAOFF) / b.minkeypage -
(sizeof(indx_t) + NBLEAFDBT(0, 0));
_DBFIT(temp, indx_t);
t->bt_ovflsize = (indx_t)temp;
if (t->bt_ovflsize < NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t))
t->bt_ovflsize =
NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t);
/* Initialize the buffer pool. */
if ((t->bt_mp =
mpool_open(NULL, t->bt_fd, t->bt_psize, ncache)) == NULL)
goto err;
if (!F_ISSET(t, B_INMEM))
mpool_filter(t->bt_mp, __bt_pgin, __bt_pgout, t);
/* Create a root page if new tree. */
if (nroot(t) == RET_ERROR)
goto err;
/* Global flags. */
if (dflags & DB_LOCK)
F_SET(t, B_DB_LOCK);
if (dflags & DB_SHMEM)
F_SET(t, B_DB_SHMEM);
if (dflags & DB_TXN)
F_SET(t, B_DB_TXN);
return (dbp);
einval: errno = EINVAL;
goto err;
eftype: errno = EFTYPE;
goto err;
err: if (t) {
if (t->bt_dbp)
free(t->bt_dbp);
if (t->bt_fd != -1)
(void)close(t->bt_fd);
free(t);
}
return (NULL);
}
/*
* NROOT -- Create the root of a new tree.
*
* Parameters:
* t: tree
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
static int
nroot(BTREE *t)
{
PAGE *meta, *root;
pgno_t npg;
if ((meta = mpool_get(t->bt_mp, 0, 0)) != NULL) {
mpool_put(t->bt_mp, meta, 0);
return (RET_SUCCESS);
}
if (errno != EINVAL) /* It's OK to not exist. */
return (RET_ERROR);
errno = 0;
if ((meta = mpool_new(t->bt_mp, &npg)) == NULL)
return (RET_ERROR);
if ((root = mpool_new(t->bt_mp, &npg)) == NULL)
return (RET_ERROR);
if (npg != P_ROOT)
return (RET_ERROR);
root->pgno = npg;
root->prevpg = root->nextpg = P_INVALID;
root->lower = BTDATAOFF;
root->upper = t->bt_psize;
root->flags = P_BLEAF;
memset(meta, 0, t->bt_psize);
mpool_put(t->bt_mp, meta, MPOOL_DIRTY);
mpool_put(t->bt_mp, root, MPOOL_DIRTY);
return (RET_SUCCESS);
}
static int
tmp(void)
{
sigset_t set, oset;
size_t len;
int fd;
char *envtmp;
char path[PATH_MAX];
#ifndef __minix
if (issetugid())
envtmp = NULL;
else
envtmp = getenv("TMPDIR");
#else
envtmp = getenv("TMPDIR");
#endif
len = snprintf(path,
sizeof(path), "%s/bt.XXXXXX", envtmp ? envtmp : _PATH_TMP);
if (len >= sizeof(path))
return -1;
(void)sigfillset(&set);
(void)sigprocmask(SIG_BLOCK, &set, &oset);
if ((fd = mkstemp(path)) != -1) {
(void)unlink(path);
(void)fcntl(fd, F_SETFD, FD_CLOEXEC);
}
(void)sigprocmask(SIG_SETMASK, &oset, NULL);
return(fd);
}
static int
byteorder(void)
{
uint32_t x;
uint8_t *p;
x = 0x01020304;
p = (uint8_t *)(void *)&x;
switch (*p) {
case 1:
return (BIG_ENDIAN);
case 4:
return (LITTLE_ENDIAN);
default:
return (0);
}
}
int
__bt_fd(const DB *dbp)
{
BTREE *t;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* In-memory database can't have a file descriptor. */
if (F_ISSET(t, B_INMEM)) {
errno = ENOENT;
return (-1);
}
return (t->bt_fd);
}

View file

@ -0,0 +1,239 @@
/* $NetBSD: bt_overflow.c,v 1.16 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_overflow.c,v 1.16 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/param.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "btree.h"
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
/*
* Big key/data code.
*
* Big key and data entries are stored on linked lists of pages. The initial
* reference is byte string stored with the key or data and is the page number
* and size. The actual record is stored in a chain of pages linked by the
* nextpg field of the PAGE header.
*
* The first page of the chain has a special property. If the record is used
* by an internal page, it cannot be deleted and the P_PRESERVE bit will be set
* in the header.
*
* XXX
* A single DBT is written to each chain, so a lot of space on the last page
* is wasted. This is a fairly major bug for some data sets.
*/
/*
* __OVFL_GET -- Get an overflow key/data item.
*
* Parameters:
* t: tree
* p: pointer to { pgno_t, uint32_t }
* buf: storage address
* bufsz: storage size
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__ovfl_get(BTREE *t, void *p, size_t *ssz, void **buf, size_t *bufsz)
{
PAGE *h;
pgno_t pg;
uint32_t sz, nb, plen;
size_t temp;
memmove(&pg, p, sizeof(pgno_t));
memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(uint32_t));
*ssz = sz;
#ifdef DEBUG
if (pg == P_INVALID || sz == 0)
abort();
#endif
/* Make the buffer bigger as necessary. */
if (*bufsz < sz) {
*buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz));
if (*buf == NULL)
return (RET_ERROR);
*bufsz = sz;
}
/*
* Step through the linked list of pages, copying the data on each one
* into the buffer. Never copy more than the data's length.
*/
temp = t->bt_psize - BTDATAOFF;
_DBFIT(temp, uint32_t);
plen = (uint32_t)temp;
for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
nb = MIN(sz, plen);
memmove(p, (char *)(void *)h + BTDATAOFF, nb);
mpool_put(t->bt_mp, h, 0);
if ((sz -= nb) == 0)
break;
}
return (RET_SUCCESS);
}
/*
* __OVFL_PUT -- Store an overflow key/data item.
*
* Parameters:
* t: tree
* data: DBT to store
* pgno: storage page number
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__ovfl_put(BTREE *t, const DBT *dbt, pgno_t *pg)
{
PAGE *h, *last;
void *p;
pgno_t npg;
uint32_t sz, nb, plen;
size_t temp;
/*
* Allocate pages and copy the key/data record into them. Store the
* number of the first page in the chain.
*/
temp = t->bt_psize - BTDATAOFF;
_DBFIT(temp, uint32_t);
plen = (uint32_t)temp;
last = NULL;
p = dbt->data;
temp = dbt->size;
_DBFIT(temp, uint32_t);
sz = temp;
for (;; p = (char *)p + plen, last = h) {
if ((h = __bt_new(t, &npg)) == NULL)
return (RET_ERROR);
h->pgno = npg;
h->nextpg = h->prevpg = P_INVALID;
h->flags = P_OVERFLOW;
h->lower = h->upper = 0;
nb = MIN(sz, plen);
(void)memmove((char *)(void *)h + BTDATAOFF, p, (size_t)nb);
if (last) {
last->nextpg = h->pgno;
mpool_put(t->bt_mp, last, MPOOL_DIRTY);
} else
*pg = h->pgno;
if ((sz -= nb) == 0) {
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
break;
}
}
return (RET_SUCCESS);
}
/*
* __OVFL_DELETE -- Delete an overflow chain.
*
* Parameters:
* t: tree
* p: pointer to { pgno_t, uint32_t }
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__ovfl_delete(BTREE *t, void *p)
{
PAGE *h;
pgno_t pg;
uint32_t sz, plen;
size_t temp;
(void)memmove(&pg, p, sizeof(pgno_t));
(void)memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(uint32_t));
#ifdef DEBUG
if (pg == P_INVALID || sz == 0)
abort();
#endif
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
/* Don't delete chains used by internal pages. */
if (h->flags & P_PRESERVE) {
mpool_put(t->bt_mp, h, 0);
return (RET_SUCCESS);
}
/* Step through the chain, calling the free routine for each page. */
temp = t->bt_psize - BTDATAOFF;
_DBFIT(temp, uint32_t);
plen = (uint32_t)temp;
for (;; sz -= plen) {
pg = h->nextpg;
__bt_free(t, h);
if (sz <= plen)
break;
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
}
return (RET_SUCCESS);
}

103
lib/libc/db/btree/bt_page.c Normal file
View file

@ -0,0 +1,103 @@
/* $NetBSD: bt_page.c,v 1.13 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_page.c,v 1.13 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <stdio.h>
#include <db.h>
#include "btree.h"
/*
* __bt_free --
* Put a page on the freelist.
*
* Parameters:
* t: tree
* h: page to free
*
* Returns:
* RET_ERROR, RET_SUCCESS
*
* Side-effect:
* mpool_put's the page.
*/
int
__bt_free(BTREE *t, PAGE *h)
{
/* Insert the page at the head of the free list. */
h->prevpg = P_INVALID;
h->nextpg = t->bt_free;
t->bt_free = h->pgno;
F_SET(t, B_METADIRTY);
/* Make sure the page gets written back. */
return (mpool_put(t->bt_mp, h, MPOOL_DIRTY));
}
/*
* __bt_new --
* Get a new page, preferably from the freelist.
*
* Parameters:
* t: tree
* npg: storage for page number.
*
* Returns:
* Pointer to a page, NULL on error.
*/
PAGE *
__bt_new(BTREE *t, pgno_t *npg)
{
PAGE *h;
if (t->bt_free != P_INVALID &&
(h = mpool_get(t->bt_mp, t->bt_free, 0)) != NULL) {
*npg = t->bt_free;
t->bt_free = h->nextpg;
F_SET(t, B_METADIRTY);
return (h);
}
return (mpool_new(t->bt_mp, npg));
}

323
lib/libc/db/btree/bt_put.c Normal file
View file

@ -0,0 +1,323 @@
/* $NetBSD: bt_put.c,v 1.19 2009/02/12 06:40:14 lukem Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_put.c,v 1.19 2009/02/12 06:40:14 lukem Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "btree.h"
static EPG *bt_fast(BTREE *, const DBT *, const DBT *, int *);
/*
* __BT_PUT -- Add a btree item to the tree.
*
* Parameters:
* dbp: pointer to access method
* key: key
* data: data
* flag: R_NOOVERWRITE
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the
* tree and R_NOOVERWRITE specified.
*/
int
__bt_put(const DB *dbp, DBT *key, const DBT *data, u_int flags)
{
BTREE *t;
DBT tkey, tdata;
EPG *e = NULL; /* pacify gcc */
PAGE *h;
indx_t idx, nxtindex;
pgno_t pg;
uint32_t nbytes, temp;
int dflags, exact, status;
char *dest, db[NOVFLSIZE], kb[NOVFLSIZE];
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Check for change to a read-only tree. */
if (F_ISSET(t, B_RDONLY)) {
errno = EPERM;
return (RET_ERROR);
}
switch (flags) {
case 0:
case R_NOOVERWRITE:
break;
case R_CURSOR:
/*
* If flags is R_CURSOR, put the cursor. Must already
* have started a scan and not have already deleted it.
*/
if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
!F_ISSET(&t->bt_cursor,
CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
break;
/* FALLTHROUGH */
default:
errno = EINVAL;
return (RET_ERROR);
}
/*
* If the key/data pair won't fit on a page, store it on overflow
* pages. Only put the key on the overflow page if the pair are
* still too big after moving the data to an overflow page.
*
* XXX
* If the insert fails later on, the overflow pages aren't recovered.
*/
dflags = 0;
if (key->size + data->size > t->bt_ovflsize) {
if (key->size > t->bt_ovflsize) {
storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR)
return (RET_ERROR);
tkey.data = kb;
tkey.size = NOVFLSIZE;
memmove(kb, &pg, sizeof(pgno_t));
memmove(kb + sizeof(pgno_t),
&key->size, sizeof(uint32_t));
dflags |= P_BIGKEY;
key = &tkey;
}
if (key->size + data->size > t->bt_ovflsize) {
if (__ovfl_put(t, data, &pg) == RET_ERROR)
return (RET_ERROR);
tdata.data = db;
tdata.size = NOVFLSIZE;
memmove(db, &pg, sizeof(pgno_t));
_DBFIT(data->size, uint32_t);
temp = (uint32_t)data->size;
(void)memmove(db + sizeof(pgno_t),
&temp, sizeof(uint32_t));
dflags |= P_BIGDATA;
data = &tdata;
}
if (key->size + data->size > t->bt_ovflsize)
goto storekey;
}
/* Replace the cursor. */
if (flags == R_CURSOR) {
if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL)
return (RET_ERROR);
idx = t->bt_cursor.pg.index;
goto delete;
}
/*
* Find the key to delete, or, the location at which to insert.
* Bt_fast and __bt_search both pin the returned page.
*/
if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL)
if ((e = __bt_search(t, key, &exact)) == NULL)
return (RET_ERROR);
h = e->page;
idx = e->index;
/*
* Add the key/data pair to the tree. If an identical key is already
* in the tree, and R_NOOVERWRITE is set, an error is returned. If
* R_NOOVERWRITE is not set, the key is either added (if duplicates are
* permitted) or an error is returned.
*/
switch (flags) {
case R_NOOVERWRITE:
if (!exact)
break;
mpool_put(t->bt_mp, h, 0);
return (RET_SPECIAL);
default:
if (!exact || !F_ISSET(t, B_NODUPS))
break;
/*
* !!!
* Note, the delete may empty the page, so we need to put a
* new entry into the page immediately.
*/
delete: if (__bt_dleaf(t, key, h, (u_int)idx) == RET_ERROR) {
mpool_put(t->bt_mp, h, 0);
return (RET_ERROR);
}
break;
}
/*
* If not enough room, or the user has put a ceiling on the number of
* keys permitted in the page, split the page. The split code will
* insert the key and data and unpin the current page. If inserting
* into the offset array, shift the pointers up.
*/
nbytes = NBLEAFDBT(key->size, data->size);
if ((uint32_t)h->upper - (uint32_t)h->lower < nbytes + sizeof(indx_t)) {
if ((status = __bt_split(t, h, key,
data, dflags, nbytes, (u_int)idx)) != RET_SUCCESS)
return (status);
goto success;
}
if (idx < (nxtindex = NEXTINDEX(h)))
memmove(h->linp + idx + 1, h->linp + idx,
(nxtindex - idx) * sizeof(indx_t));
h->lower += sizeof(indx_t);
h->linp[idx] = h->upper -= nbytes;
dest = (char *)(void *)h + h->upper;
WR_BLEAF(dest, key, data, dflags);
/* If the cursor is on this page, adjust it as necessary. */
if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
!F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= idx)
++t->bt_cursor.pg.index;
if (t->bt_order == NOT) {
if (h->nextpg == P_INVALID) {
if (idx == NEXTINDEX(h) - 1) {
t->bt_order = FORWARD;
t->bt_last.index = idx;
t->bt_last.pgno = h->pgno;
}
} else if (h->prevpg == P_INVALID) {
if (idx == 0) {
t->bt_order = BACK;
t->bt_last.index = 0;
t->bt_last.pgno = h->pgno;
}
}
}
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
success:
if (flags == R_SETCURSOR)
__bt_setcur(t, e->page->pgno, (u_int)e->index);
F_SET(t, B_MODIFIED);
return (RET_SUCCESS);
}
#ifdef STATISTICS
unsigned long bt_cache_hit, bt_cache_miss;
#endif
/*
* BT_FAST -- Do a quick check for sorted data.
*
* Parameters:
* t: tree
* key: key to insert
*
* Returns:
* EPG for new record or NULL if not found.
*/
static EPG *
bt_fast(BTREE *t, const DBT *key, const DBT *data, int *exactp)
{
PAGE *h;
uint32_t nbytes;
int cmp;
if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) {
t->bt_order = NOT;
return (NULL);
}
t->bt_cur.page = h;
t->bt_cur.index = t->bt_last.index;
/*
* If won't fit in this page or have too many keys in this page,
* have to search to get split stack.
*/
nbytes = NBLEAFDBT(key->size, data->size);
if ((uint32_t)h->upper - (uint32_t)h->lower < nbytes + sizeof(indx_t))
goto miss;
if (t->bt_order == FORWARD) {
if (t->bt_cur.page->nextpg != P_INVALID)
goto miss;
if (t->bt_cur.index != NEXTINDEX(h) - 1)
goto miss;
if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0)
goto miss;
t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index;
} else {
if (t->bt_cur.page->prevpg != P_INVALID)
goto miss;
if (t->bt_cur.index != 0)
goto miss;
if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0)
goto miss;
t->bt_last.index = 0;
}
*exactp = cmp == 0;
#ifdef STATISTICS
++bt_cache_hit;
#endif
return (&t->bt_cur);
miss:
#ifdef STATISTICS
++bt_cache_miss;
#endif
t->bt_order = NOT;
mpool_put(t->bt_mp, h, 0);
return (NULL);
}

View file

@ -0,0 +1,209 @@
/* $NetBSD: bt_search.c,v 1.17 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_search.c,v 1.17 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <stdio.h>
#include <db.h>
#include "btree.h"
static int __bt_snext(BTREE *, PAGE *, const DBT *, int *);
static int __bt_sprev(BTREE *, PAGE *, const DBT *, int *);
/*
* __bt_search --
* Search a btree for a key.
*
* Parameters:
* t: tree to search
* key: key to find
* exactp: pointer to exact match flag
*
* Returns:
* The EPG for matching record, if any, or the EPG for the location
* of the key, if it were inserted into the tree, is entered into
* the bt_cur field of the tree. A pointer to the field is returned.
*/
EPG *
__bt_search(BTREE *t, const DBT *key, int *exactp)
{
PAGE *h;
indx_t base, idx, lim;
pgno_t pg;
int cmp;
BT_CLR(t);
for (pg = P_ROOT;;) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (NULL);
/* Do a binary search on the current page. */
t->bt_cur.page = h;
for (base = 0, lim = NEXTINDEX(h); lim; lim >>= 1) {
t->bt_cur.index = idx = base + ((uint32_t)lim >> 1);
if ((cmp = __bt_cmp(t, key, &t->bt_cur)) == 0) {
if (h->flags & P_BLEAF) {
*exactp = 1;
return (&t->bt_cur);
}
goto next;
}
if (cmp > 0) {
base = idx + 1;
--lim;
}
}
/*
* If it's a leaf page, we're almost done. If no duplicates
* are allowed, or we have an exact match, we're done. Else,
* it's possible that there were matching keys on this page,
* which later deleted, and we're on a page with no matches
* while there are matches on other pages. If at the start or
* end of a page, check the adjacent page.
*/
if (h->flags & P_BLEAF) {
if (!F_ISSET(t, B_NODUPS)) {
if (base == 0 &&
h->prevpg != P_INVALID &&
__bt_sprev(t, h, key, exactp))
return (&t->bt_cur);
if (base == NEXTINDEX(h) &&
h->nextpg != P_INVALID &&
__bt_snext(t, h, key, exactp))
return (&t->bt_cur);
}
*exactp = 0;
t->bt_cur.index = base;
return (&t->bt_cur);
}
/*
* No match found. Base is the smallest index greater than
* key and may be zero or a last + 1 index. If it's non-zero,
* decrement by one, and record the internal page which should
* be a parent page for the key. If a split later occurs, the
* inserted page will be to the right of the saved page.
*/
idx = base ? base - 1 : base;
next: BT_PUSH(t, h->pgno, idx);
pg = GETBINTERNAL(h, idx)->pgno;
mpool_put(t->bt_mp, h, 0);
}
}
/*
* __bt_snext --
* Check for an exact match after the key.
*
* Parameters:
* t: tree
* h: current page
* key: key
* exactp: pointer to exact match flag
*
* Returns:
* If an exact match found.
*/
static int
__bt_snext(BTREE *t, PAGE *h, const DBT *key, int *exactp)
{
EPG e;
/*
* Get the next page. The key is either an exact
* match, or not as good as the one we already have.
*/
if ((e.page = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
return (0);
e.index = 0;
if (__bt_cmp(t, key, &e) == 0) {
mpool_put(t->bt_mp, h, 0);
t->bt_cur = e;
*exactp = 1;
return (1);
}
mpool_put(t->bt_mp, e.page, 0);
return (0);
}
/*
* __bt_sprev --
* Check for an exact match before the key.
*
* Parameters:
* t: tree
* h: current page
* key: key
* exactp: pointer to exact match flag
*
* Returns:
* If an exact match found.
*/
static int
__bt_sprev(BTREE *t, PAGE *h, const DBT *key, int *exactp)
{
EPG e;
/*
* Get the previous page. The key is either an exact
* match, or not as good as the one we already have.
*/
if ((e.page = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
return (0);
e.index = NEXTINDEX(e.page) - 1;
if (__bt_cmp(t, key, &e) == 0) {
mpool_put(t->bt_mp, h, 0);
t->bt_cur = e;
*exactp = 1;
return (1);
}
mpool_put(t->bt_mp, e.page, 0);
return (0);
}

446
lib/libc/db/btree/bt_seq.c Normal file
View file

@ -0,0 +1,446 @@
/* $NetBSD: bt_seq.c,v 1.17 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_seq.c,v 1.17 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <db.h>
#include "btree.h"
static int __bt_first(BTREE *, const DBT *, EPG *, int *);
static int __bt_seqadv(BTREE *, EPG *, int);
static int __bt_seqset(BTREE *, EPG *, DBT *, int);
/*
* Sequential scan support.
*
* The tree can be scanned sequentially, starting from either end of the
* tree or from any specific key. A scan request before any scanning is
* done is initialized as starting from the least node.
*/
/*
* __bt_seq --
* Btree sequential scan interface.
*
* Parameters:
* dbp: pointer to access method
* key: key for positioning and return value
* data: data return value
* flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
*
* Returns:
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
*/
int
__bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags)
{
BTREE *t;
EPG e;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/*
* If scan unitialized as yet, or starting at a specific record, set
* the scan to a specific key. Both __bt_seqset and __bt_seqadv pin
* the page the cursor references if they're successful.
*/
switch (flags) {
case R_NEXT:
case R_PREV:
if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
status = __bt_seqadv(t, &e, (int)flags);
break;
}
/* FALLTHROUGH */
case R_FIRST:
case R_LAST:
case R_CURSOR:
status = __bt_seqset(t, &e, key, (int)flags);
break;
default:
errno = EINVAL;
return (RET_ERROR);
}
if (status == RET_SUCCESS) {
__bt_setcur(t, e.page->pgno, (u_int)e.index);
status =
__bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);
/*
* If the user is doing concurrent access, we copied the
* key/data, toss the page.
*/
if (F_ISSET(t, B_DB_LOCK))
mpool_put(t->bt_mp, e.page, 0);
else
t->bt_pinned = e.page;
}
return (status);
}
/*
* __bt_seqset --
* Set the sequential scan to a specific key.
*
* Parameters:
* t: tree
* ep: storage for returned key
* key: key for initial scan position
* flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
*
* Side effects:
* Pins the page the cursor references.
*
* Returns:
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
*/
static int
__bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags)
{
PAGE *h;
pgno_t pg;
int exact;
/*
* Find the first, last or specific key in the tree and point the
* cursor at it. The cursor may not be moved until a new key has
* been found.
*/
switch (flags) {
case R_CURSOR: /* Keyed scan. */
/*
* Find the first instance of the key or the smallest key
* which is greater than or equal to the specified key.
*/
if (key->data == NULL || key->size == 0) {
errno = EINVAL;
return (RET_ERROR);
}
return (__bt_first(t, key, ep, &exact));
case R_FIRST: /* First record. */
case R_NEXT:
/* Walk down the left-hand side of the tree. */
for (pg = P_ROOT;;) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
/* Check for an empty tree. */
if (NEXTINDEX(h) == 0) {
mpool_put(t->bt_mp, h, 0);
return (RET_SPECIAL);
}
if (h->flags & (P_BLEAF | P_RLEAF))
break;
pg = GETBINTERNAL(h, 0)->pgno;
mpool_put(t->bt_mp, h, 0);
}
ep->page = h;
ep->index = 0;
break;
case R_LAST: /* Last record. */
case R_PREV:
/* Walk down the right-hand side of the tree. */
for (pg = P_ROOT;;) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
/* Check for an empty tree. */
if (NEXTINDEX(h) == 0) {
mpool_put(t->bt_mp, h, 0);
return (RET_SPECIAL);
}
if (h->flags & (P_BLEAF | P_RLEAF))
break;
pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
mpool_put(t->bt_mp, h, 0);
}
ep->page = h;
ep->index = NEXTINDEX(h) - 1;
break;
}
return (RET_SUCCESS);
}
/*
* __bt_seqadvance --
* Advance the sequential scan.
*
* Parameters:
* t: tree
* flags: R_NEXT, R_PREV
*
* Side effects:
* Pins the page the new key/data record is on.
*
* Returns:
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
*/
static int
__bt_seqadv(BTREE *t, EPG *ep, int flags)
{
CURSOR *c;
PAGE *h;
indx_t idx = 0; /* pacify gcc */
pgno_t pg;
int exact;
/*
* There are a couple of states that we can be in. The cursor has
* been initialized by the time we get here, but that's all we know.
*/
c = &t->bt_cursor;
/*
* The cursor was deleted where there weren't any duplicate records,
* so the key was saved. Find out where that key would go in the
* current tree. It doesn't matter if the returned key is an exact
* match or not -- if it's an exact match, the record was added after
* the delete so we can just return it. If not, as long as there's
* a record there, return it.
*/
if (F_ISSET(c, CURS_ACQUIRE))
return (__bt_first(t, &c->key, ep, &exact));
/* Get the page referenced by the cursor. */
if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
return (RET_ERROR);
/*
* Find the next/previous record in the tree and point the cursor at
* it. The cursor may not be moved until a new key has been found.
*/
switch (flags) {
case R_NEXT: /* Next record. */
/*
* The cursor was deleted in duplicate records, and moved
* forward to a record that has yet to be returned. Clear
* that flag, and return the record.
*/
if (F_ISSET(c, CURS_AFTER))
goto usecurrent;
idx = c->pg.index;
if (++idx == NEXTINDEX(h)) {
pg = h->nextpg;
mpool_put(t->bt_mp, h, 0);
if (pg == P_INVALID)
return (RET_SPECIAL);
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
idx = 0;
}
break;
case R_PREV: /* Previous record. */
/*
* The cursor was deleted in duplicate records, and moved
* backward to a record that has yet to be returned. Clear
* that flag, and return the record.
*/
if (F_ISSET(c, CURS_BEFORE)) {
usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE);
ep->page = h;
ep->index = c->pg.index;
return (RET_SUCCESS);
}
idx = c->pg.index;
if (idx == 0) {
pg = h->prevpg;
mpool_put(t->bt_mp, h, 0);
if (pg == P_INVALID)
return (RET_SPECIAL);
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
idx = NEXTINDEX(h) - 1;
} else
--idx;
break;
}
ep->page = h;
ep->index = idx;
return (RET_SUCCESS);
}
/*
* __bt_first --
* Find the first entry.
*
* Parameters:
* t: the tree
* key: the key
* erval: return EPG
* exactp: pointer to exact match flag
*
* Returns:
* The first entry in the tree greater than or equal to key,
* or RET_SPECIAL if no such key exists.
*/
static int
__bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp)
{
PAGE *h;
EPG *ep, save;
pgno_t pg;
/*
* Find any matching record; __bt_search pins the page.
*
* If it's an exact match and duplicates are possible, walk backwards
* in the tree until we find the first one. Otherwise, make sure it's
* a valid key (__bt_search may return an index just past the end of a
* page) and return it.
*/
if ((ep = __bt_search(t, key, exactp)) == NULL)
return (0);
if (*exactp) {
if (F_ISSET(t, B_NODUPS)) {
*erval = *ep;
return (RET_SUCCESS);
}
/*
* Walk backwards, as long as the entry matches and there are
* keys left in the tree. Save a copy of each match in case
* we go too far.
*/
save = *ep;
h = ep->page;
do {
if (save.page->pgno != ep->page->pgno) {
mpool_put(t->bt_mp, save.page, 0);
save = *ep;
} else
save.index = ep->index;
/*
* Don't unpin the page the last (or original) match
* was on, but make sure it's unpinned if an error
* occurs.
*/
if (ep->index == 0) {
if (h->prevpg == P_INVALID)
break;
if (h->pgno != save.page->pgno)
mpool_put(t->bt_mp, h, 0);
if ((h = mpool_get(t->bt_mp,
h->prevpg, 0)) == NULL)
return (RET_ERROR);
ep->page = h;
ep->index = NEXTINDEX(h);
}
--ep->index;
} while (__bt_cmp(t, key, ep) == 0);
/*
* Reach here with the last page that was looked at pinned,
* which may or may not be the same as the last (or original)
* match page. If it's not useful, release it.
*/
if (h->pgno != save.page->pgno)
mpool_put(t->bt_mp, h, 0);
*erval = save;
return (RET_SUCCESS);
}
/* If at the end of a page, find the next entry. */
if (ep->index == NEXTINDEX(ep->page)) {
h = ep->page;
pg = h->nextpg;
mpool_put(t->bt_mp, h, 0);
if (pg == P_INVALID)
return (RET_SPECIAL);
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
ep->index = 0;
ep->page = h;
}
*erval = *ep;
return (RET_SUCCESS);
}
/*
* __bt_setcur --
* Set the cursor to an entry in the tree.
*
* Parameters:
* t: the tree
* pgno: page number
* idx: page index
*/
void
__bt_setcur(BTREE *t, pgno_t pgno, u_int idx)
{
/* Lose any already deleted key. */
if (t->bt_cursor.key.data != NULL) {
free(t->bt_cursor.key.data);
t->bt_cursor.key.size = 0;
t->bt_cursor.key.data = NULL;
}
F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);
/* Update the cursor. */
t->bt_cursor.pg.pgno = pgno;
t->bt_cursor.pg.index = idx;
F_SET(&t->bt_cursor, CURS_INIT);
}

View file

@ -0,0 +1,831 @@
/* $NetBSD: bt_split.c,v 1.19 2009/04/22 18:44:06 christos Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_split.c,v 1.19 2009/04/22 18:44:06 christos Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "btree.h"
static int bt_broot(BTREE *, PAGE *, PAGE *, PAGE *);
static PAGE *bt_page(BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t);
static int bt_preserve(BTREE *, pgno_t);
static PAGE *bt_psplit(BTREE *, PAGE *, PAGE *, PAGE *, indx_t *, size_t);
static PAGE *bt_root(BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t);
static int bt_rroot(BTREE *, PAGE *, PAGE *, PAGE *);
static recno_t rec_total(PAGE *);
#ifdef STATISTICS
unsigned long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved;
#endif
/*
* __BT_SPLIT -- Split the tree.
*
* Parameters:
* t: tree
* sp: page to split
* key: key to insert
* data: data to insert
* flags: BIGKEY/BIGDATA flags
* ilen: insert length
* skip: index to leave open
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__bt_split(BTREE *t, PAGE *sp, const DBT *key, const DBT *data, int flags,
size_t ilen, uint32_t argskip)
{
BINTERNAL *bi = NULL; /* pacify gcc */
BLEAF *bl = NULL, *tbl; /* pacify gcc */
DBT a, b;
EPGNO *parent;
PAGE *h, *l, *r, *lchild, *rchild;
indx_t nxtindex;
uint16_t skip;
uint32_t n, nbytes, nksize = 0; /* pacify gcc */
int parentsplit;
char *dest;
/*
* Split the page into two pages, l and r. The split routines return
* a pointer to the page into which the key should be inserted and with
* skip set to the offset which should be used. Additionally, l and r
* are pinned.
*/
skip = argskip;
h = sp->pgno == P_ROOT ?
bt_root(t, sp, &l, &r, &skip, ilen) :
bt_page(t, sp, &l, &r, &skip, ilen);
if (h == NULL)
return (RET_ERROR);
/*
* Insert the new key/data pair into the leaf page. (Key inserts
* always cause a leaf page to split first.)
*/
_DBFIT(ilen, indx_t);
h->upper -= (indx_t)ilen;
h->linp[skip] = h->upper;
dest = (char *)(void *)h + h->upper;
if (F_ISSET(t, R_RECNO))
WR_RLEAF(dest, data, flags);
else
WR_BLEAF(dest, key, data, flags);
/* If the root page was split, make it look right. */
if (sp->pgno == P_ROOT &&
(F_ISSET(t, R_RECNO) ?
bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
goto err2;
/*
* Now we walk the parent page stack -- a LIFO stack of the pages that
* were traversed when we searched for the page that split. Each stack
* entry is a page number and a page index offset. The offset is for
* the page traversed on the search. We've just split a page, so we
* have to insert a new key into the parent page.
*
* If the insert into the parent page causes it to split, may have to
* continue splitting all the way up the tree. We stop if the root
* splits or the page inserted into didn't have to split to hold the
* new key. Some algorithms replace the key for the old page as well
* as the new page. We don't, as there's no reason to believe that the
* first key on the old page is any better than the key we have, and,
* in the case of a key being placed at index 0 causing the split, the
* key is unavailable.
*
* There are a maximum of 5 pages pinned at any time. We keep the left
* and right pages pinned while working on the parent. The 5 are the
* two children, left parent and right parent (when the parent splits)
* and the root page or the overflow key page when calling bt_preserve.
* This code must make sure that all pins are released other than the
* root page or overflow page which is unlocked elsewhere.
*/
while ((parent = BT_POP(t)) != NULL) {
lchild = l;
rchild = r;
/* Get the parent page. */
if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
goto err2;
/*
* The new key goes ONE AFTER the index, because the split
* was to the right.
*/
skip = parent->index + 1;
/*
* Calculate the space needed on the parent page.
*
* Prefix trees: space hack when inserting into BINTERNAL
* pages. Retain only what's needed to distinguish between
* the new entry and the LAST entry on the page to its left.
* If the keys compare equal, retain the entire key. Note,
* we don't touch overflow keys, and the entire key must be
* retained for the next-to-left most key on the leftmost
* page of each level, or the search will fail. Applicable
* ONLY to internal pages that have leaf pages as children.
* Further reduction of the key between pairs of internal
* pages loses too much information.
*/
switch (rchild->flags & P_TYPE) {
case P_BINTERNAL:
bi = GETBINTERNAL(rchild, 0);
nbytes = NBINTERNAL(bi->ksize);
break;
case P_BLEAF:
bl = GETBLEAF(rchild, 0);
nbytes = NBINTERNAL(bl->ksize);
if (t->bt_pfx && !(bl->flags & P_BIGKEY) &&
(h->prevpg != P_INVALID || skip > 1)) {
size_t temp;
tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1);
a.size = tbl->ksize;
a.data = tbl->bytes;
b.size = bl->ksize;
b.data = bl->bytes;
temp = t->bt_pfx(&a, &b);
_DBFIT(temp, uint32_t);
nksize = (uint32_t)temp;
n = NBINTERNAL(nksize);
if (n < nbytes) {
#ifdef STATISTICS
bt_pfxsaved += nbytes - n;
#endif
nbytes = n;
} else
nksize = 0;
} else
nksize = 0;
break;
case P_RINTERNAL:
case P_RLEAF:
nbytes = NRINTERNAL;
break;
default:
abort();
}
/* Split the parent page if necessary or shift the indices. */
if ((uint32_t)h->upper - (uint32_t)h->lower < nbytes + sizeof(indx_t)) {
sp = h;
h = h->pgno == P_ROOT ?
bt_root(t, h, &l, &r, &skip, nbytes) :
bt_page(t, h, &l, &r, &skip, nbytes);
if (h == NULL)
goto err1;
parentsplit = 1;
} else {
if (skip < (nxtindex = NEXTINDEX(h)))
memmove(h->linp + skip + 1, h->linp + skip,
(nxtindex - skip) * sizeof(indx_t));
h->lower += sizeof(indx_t);
parentsplit = 0;
}
/* Insert the key into the parent page. */
switch (rchild->flags & P_TYPE) {
case P_BINTERNAL:
h->linp[skip] = h->upper -= nbytes;
dest = (char *)(void *)h + h->linp[skip];
memmove(dest, bi, nbytes);
((BINTERNAL *)(void *)dest)->pgno = rchild->pgno;
break;
case P_BLEAF:
h->linp[skip] = h->upper -= nbytes;
dest = (char *)(void *)h + h->linp[skip];
WR_BINTERNAL(dest, nksize ? nksize : bl->ksize,
rchild->pgno, bl->flags & P_BIGKEY);
memmove(dest, bl->bytes, nksize ? nksize : bl->ksize);
if (bl->flags & P_BIGKEY &&
bt_preserve(t, *(pgno_t *)(void *)bl->bytes) ==
RET_ERROR)
goto err1;
break;
case P_RINTERNAL:
/*
* Update the left page count. If split
* added at index 0, fix the correct page.
*/
if (skip > 0)
dest = (char *)(void *)h + h->linp[skip - 1];
else
dest = (char *)(void *)l + l->linp[NEXTINDEX(l) - 1];
((RINTERNAL *)(void *)dest)->nrecs = rec_total(lchild);
((RINTERNAL *)(void *)dest)->pgno = lchild->pgno;
/* Update the right page count. */
h->linp[skip] = h->upper -= nbytes;
dest = (char *)(void *)h + h->linp[skip];
((RINTERNAL *)(void *)dest)->nrecs = rec_total(rchild);
((RINTERNAL *)(void *)dest)->pgno = rchild->pgno;
break;
case P_RLEAF:
/*
* Update the left page count. If split
* added at index 0, fix the correct page.
*/
if (skip > 0)
dest = (char *)(void *)h + h->linp[skip - 1];
else
dest = (char *)(void *)l + l->linp[NEXTINDEX(l) - 1];
((RINTERNAL *)(void *)dest)->nrecs = NEXTINDEX(lchild);
((RINTERNAL *)(void *)dest)->pgno = lchild->pgno;
/* Update the right page count. */
h->linp[skip] = h->upper -= nbytes;
dest = (char *)(void *)h + h->linp[skip];
((RINTERNAL *)(void *)dest)->nrecs = NEXTINDEX(rchild);
((RINTERNAL *)(void *)dest)->pgno = rchild->pgno;
break;
default:
abort();
}
/* Unpin the held pages. */
if (!parentsplit) {
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
break;
}
/* If the root page was split, make it look right. */
if (sp->pgno == P_ROOT &&
(F_ISSET(t, R_RECNO) ?
bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
goto err1;
mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
}
/* Unpin the held pages. */
mpool_put(t->bt_mp, l, MPOOL_DIRTY);
mpool_put(t->bt_mp, r, MPOOL_DIRTY);
/* Clear any pages left on the stack. */
return (RET_SUCCESS);
/*
* If something fails in the above loop we were already walking back
* up the tree and the tree is now inconsistent. Nothing much we can
* do about it but release any memory we're holding.
*/
err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
err2: mpool_put(t->bt_mp, l, 0);
mpool_put(t->bt_mp, r, 0);
__dbpanic(t->bt_dbp);
return (RET_ERROR);
}
/*
* BT_PAGE -- Split a non-root page of a btree.
*
* Parameters:
* t: tree
* h: root page
* lp: pointer to left page pointer
* rp: pointer to right page pointer
* skip: pointer to index to leave open
* ilen: insert length
*
* Returns:
* Pointer to page in which to insert or NULL on error.
*/
static PAGE *
bt_page(BTREE *t, PAGE *h, PAGE **lp, PAGE **rp, indx_t *skip, size_t ilen)
{
PAGE *l, *r, *tp;
pgno_t npg;
#ifdef STATISTICS
++bt_split;
#endif
/* Put the new right page for the split into place. */
if ((r = __bt_new(t, &npg)) == NULL)
return (NULL);
r->pgno = npg;
r->lower = BTDATAOFF;
r->upper = t->bt_psize;
r->nextpg = h->nextpg;
r->prevpg = h->pgno;
r->flags = h->flags & P_TYPE;
/*
* If we're splitting the last page on a level because we're appending
* a key to it (skip is NEXTINDEX()), it's likely that the data is
* sorted. Adding an empty page on the side of the level is less work
* and can push the fill factor much higher than normal. If we're
* wrong it's no big deal, we'll just do the split the right way next
* time. It may look like it's equally easy to do a similar hack for
* reverse sorted data, that is, split the tree left, but it's not.
* Don't even try.
*/
if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) {
#ifdef STATISTICS
++bt_sortsplit;
#endif
h->nextpg = r->pgno;
r->lower = BTDATAOFF + sizeof(indx_t);
*skip = 0;
*lp = h;
*rp = r;
return (r);
}
/* Put the new left page for the split into place. */
if ((l = calloc(1, t->bt_psize)) == NULL) {
mpool_put(t->bt_mp, r, 0);
return (NULL);
}
#ifdef PURIFY
memset(l, 0xff, t->bt_psize);
#endif
l->pgno = h->pgno;
l->nextpg = r->pgno;
l->prevpg = h->prevpg;
l->lower = BTDATAOFF;
l->upper = t->bt_psize;
l->flags = h->flags & P_TYPE;
/* Fix up the previous pointer of the page after the split page. */
if (h->nextpg != P_INVALID) {
if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) {
free(l);
/* XXX mpool_free(t->bt_mp, r->pgno); */
return (NULL);
}
tp->prevpg = r->pgno;
mpool_put(t->bt_mp, tp, MPOOL_DIRTY);
}
/*
* Split right. The key/data pairs aren't sorted in the btree page so
* it's simpler to copy the data from the split page onto two new pages
* instead of copying half the data to the right page and compacting
* the left page in place. Since the left page can't change, we have
* to swap the original and the allocated left page after the split.
*/
tp = bt_psplit(t, h, l, r, skip, ilen);
/* Move the new left page onto the old left page. */
memmove(h, l, t->bt_psize);
if (tp == l)
tp = h;
free(l);
*lp = h;
*rp = r;
return (tp);
}
/*
* BT_ROOT -- Split the root page of a btree.
*
* Parameters:
* t: tree
* h: root page
* lp: pointer to left page pointer
* rp: pointer to right page pointer
* skip: pointer to index to leave open
* ilen: insert length
*
* Returns:
* Pointer to page in which to insert or NULL on error.
*/
static PAGE *
bt_root(BTREE *t, PAGE *h, PAGE **lp, PAGE **rp, indx_t *skip, size_t ilen)
{
PAGE *l, *r, *tp;
pgno_t lnpg, rnpg;
#ifdef STATISTICS
++bt_split;
++bt_rootsplit;
#endif
/* Put the new left and right pages for the split into place. */
if ((l = __bt_new(t, &lnpg)) == NULL ||
(r = __bt_new(t, &rnpg)) == NULL)
return (NULL);
l->pgno = lnpg;
r->pgno = rnpg;
l->nextpg = r->pgno;
r->prevpg = l->pgno;
l->prevpg = r->nextpg = P_INVALID;
l->lower = r->lower = BTDATAOFF;
l->upper = r->upper = t->bt_psize;
l->flags = r->flags = h->flags & P_TYPE;
/* Split the root page. */
tp = bt_psplit(t, h, l, r, skip, ilen);
*lp = l;
*rp = r;
return (tp);
}
/*
* BT_RROOT -- Fix up the recno root page after it has been split.
*
* Parameters:
* t: tree
* h: root page
* l: left page
* r: right page
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
static int
bt_rroot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
{
char *dest;
uint32_t sz;
size_t temp;
temp = t->bt_psize - NRINTERNAL;
_DBFIT(temp, uint32_t);
sz = (uint32_t)temp;
/* Insert the left and right keys, set the header information. */
_DBFIT(sz, indx_t);
h->linp[0] = h->upper = (indx_t)sz;
dest = (char *)(void *)h + h->upper;
WR_RINTERNAL(dest,
l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno);
h->linp[1] = h->upper -= NRINTERNAL;
dest = (char *)(void *)h + h->upper;
WR_RINTERNAL(dest,
r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno);
h->lower = BTDATAOFF + 2 * sizeof(indx_t);
/* Unpin the root page, set to recno internal page. */
h->flags &= ~P_TYPE;
h->flags |= P_RINTERNAL;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
/*
* BT_BROOT -- Fix up the btree root page after it has been split.
*
* Parameters:
* t: tree
* h: root page
* l: left page
* r: right page
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
static int
bt_broot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
{
BINTERNAL *bi = NULL; /* pacify gcc */
BLEAF *bl;
uint32_t nbytes;
char *dest;
/*
* If the root page was a leaf page, change it into an internal page.
* We copy the key we split on (but not the key's data, in the case of
* a leaf page) to the new root page.
*
* The btree comparison code guarantees that the left-most key on any
* level of the tree is never used, so it doesn't need to be filled in.
*/
nbytes = NBINTERNAL(0);
h->linp[0] = h->upper = t->bt_psize - nbytes;
dest = (char *)(void *)h + h->upper;
WR_BINTERNAL(dest, 0, l->pgno, 0);
switch (h->flags & P_TYPE) {
case P_BLEAF:
bl = GETBLEAF(r, 0);
nbytes = NBINTERNAL(bl->ksize);
h->linp[1] = h->upper -= nbytes;
dest = (char *)(void *)h + h->upper;
WR_BINTERNAL(dest, bl->ksize, r->pgno, 0);
memmove(dest, bl->bytes, bl->ksize);
/*
* If the key is on an overflow page, mark the overflow chain
* so it isn't deleted when the leaf copy of the key is deleted.
*/
if (bl->flags & P_BIGKEY &&
bt_preserve(t, *(pgno_t *)(void *)bl->bytes) == RET_ERROR)
return (RET_ERROR);
break;
case P_BINTERNAL:
bi = GETBINTERNAL(r, 0);
nbytes = NBINTERNAL(bi->ksize);
h->linp[1] = h->upper -= nbytes;
dest = (char *)(void *)h + h->upper;
memmove(dest, bi, nbytes);
((BINTERNAL *)(void *)dest)->pgno = r->pgno;
break;
default:
abort();
}
/* There are two keys on the page. */
h->lower = BTDATAOFF + 2 * sizeof(indx_t);
/* Unpin the root page, set to btree internal page. */
h->flags &= ~P_TYPE;
h->flags |= P_BINTERNAL;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
/*
* BT_PSPLIT -- Do the real work of splitting the page.
*
* Parameters:
* t: tree
* h: page to be split
* l: page to put lower half of data
* r: page to put upper half of data
* pskip: pointer to index to leave open
* ilen: insert length
*
* Returns:
* Pointer to page in which to insert.
*/
static PAGE *
bt_psplit(BTREE *t, PAGE *h, PAGE *l, PAGE *r, indx_t *pskip, size_t ilen)
{
BINTERNAL *bi;
BLEAF *bl;
CURSOR *c;
RLEAF *rl;
PAGE *rval;
void *src = NULL; /* pacify gcc */
indx_t full, half, nxt, off, skip, top, used;
uint32_t nbytes;
size_t temp;
int bigkeycnt, isbigkey;
/*
* Split the data to the left and right pages. Leave the skip index
* open. Additionally, make some effort not to split on an overflow
* key. This makes internal page processing faster and can save
* space as overflow keys used by internal pages are never deleted.
*/
bigkeycnt = 0;
skip = *pskip;
temp = t->bt_psize - BTDATAOFF;
_DBFIT(temp, indx_t);
full = (indx_t)temp;
half = full / 2;
used = 0;
for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) {
if (skip == off) {
_DBFIT(ilen, uint32_t);
nbytes = (uint32_t)ilen;
isbigkey = 0; /* XXX: not really known. */
} else
switch (h->flags & P_TYPE) {
case P_BINTERNAL:
src = bi = GETBINTERNAL(h, nxt);
nbytes = NBINTERNAL(bi->ksize);
isbigkey = bi->flags & P_BIGKEY;
break;
case P_BLEAF:
src = bl = GETBLEAF(h, nxt);
nbytes = NBLEAF(bl);
isbigkey = bl->flags & P_BIGKEY;
break;
case P_RINTERNAL:
src = GETRINTERNAL(h, nxt);
nbytes = NRINTERNAL;
isbigkey = 0;
break;
case P_RLEAF:
src = rl = GETRLEAF(h, nxt);
nbytes = NRLEAF(rl);
isbigkey = 0;
break;
default:
abort();
}
/*
* If the key/data pairs are substantial fractions of the max
* possible size for the page, it's possible to get situations
* where we decide to try and copy too much onto the left page.
* Make sure that doesn't happen.
*/
if ((skip <= off && used + nbytes + sizeof(indx_t) >= full) ||
nxt == top - 1) {
--off;
break;
}
/* Copy the key/data pair, if not the skipped index. */
if (skip != off) {
++nxt;
l->linp[off] = l->upper -= nbytes;
memmove((char *)(void *)l + l->upper, src, nbytes);
}
temp = nbytes + sizeof(indx_t);
_DBFIT(temp, indx_t);
used += (indx_t)temp;
if (used >= half) {
if (!isbigkey || bigkeycnt == 3)
break;
else
++bigkeycnt;
}
}
/*
* Off is the last offset that's valid for the left page.
* Nxt is the first offset to be placed on the right page.
*/
temp = (off + 1) * sizeof(indx_t);
_DBFIT(temp, indx_t);
l->lower += (indx_t)temp;
/*
* If splitting the page that the cursor was on, the cursor has to be
* adjusted to point to the same record as before the split. If the
* cursor is at or past the skipped slot, the cursor is incremented by
* one. If the cursor is on the right page, it is decremented by the
* number of records split to the left page.
*/
c = &t->bt_cursor;
if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) {
if (c->pg.index >= skip)
++c->pg.index;
if (c->pg.index < nxt) /* Left page. */
c->pg.pgno = l->pgno;
else { /* Right page. */
c->pg.pgno = r->pgno;
c->pg.index -= nxt;
}
}
/*
* If the skipped index was on the left page, just return that page.
* Otherwise, adjust the skip index to reflect the new position on
* the right page.
*/
if (skip <= off) {
skip = MAX_PAGE_OFFSET;
rval = l;
} else {
rval = r;
*pskip -= nxt;
}
for (off = 0; nxt < top; ++off) {
if (skip == nxt) {
++off;
skip = MAX_PAGE_OFFSET;
}
switch (h->flags & P_TYPE) {
case P_BINTERNAL:
src = bi = GETBINTERNAL(h, nxt);
nbytes = NBINTERNAL(bi->ksize);
break;
case P_BLEAF:
src = bl = GETBLEAF(h, nxt);
nbytes = NBLEAF(bl);
break;
case P_RINTERNAL:
src = GETRINTERNAL(h, nxt);
nbytes = NRINTERNAL;
break;
case P_RLEAF:
src = rl = GETRLEAF(h, nxt);
nbytes = NRLEAF(rl);
break;
default:
abort();
}
++nxt;
r->linp[off] = r->upper -= nbytes;
memmove((char *)(void *)r + r->upper, src, nbytes);
}
temp = off * sizeof(indx_t);
_DBFIT(temp, indx_t);
r->lower += (indx_t)temp;
/* If the key is being appended to the page, adjust the index. */
if (skip == top)
r->lower += sizeof(indx_t);
return (rval);
}
/*
* BT_PRESERVE -- Mark a chain of pages as used by an internal node.
*
* Chains of indirect blocks pointed to by leaf nodes get reclaimed when the
* record that references them gets deleted. Chains pointed to by internal
* pages never get deleted. This routine marks a chain as pointed to by an
* internal page.
*
* Parameters:
* t: tree
* pg: page number of first page in the chain.
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
static int
bt_preserve(BTREE *t, pgno_t pg)
{
PAGE *h;
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
h->flags |= P_PRESERVE;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
/*
* REC_TOTAL -- Return the number of recno entries below a page.
*
* Parameters:
* h: page
*
* Returns:
* The number of recno entries below a page.
*
* XXX
* These values could be set by the bt_psplit routine. The problem is that the
* entry has to be popped off of the stack etc. or the values have to be passed
* all the way back to bt_split/bt_rroot and it's not very clean.
*/
static recno_t
rec_total(PAGE *h)
{
recno_t recs;
indx_t nxt, top;
for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt)
recs += GETRINTERNAL(h, nxt)->nrecs;
return (recs);
}

View file

@ -0,0 +1,258 @@
/* $NetBSD: bt_utils.c,v 1.13 2008/09/10 17:52:35 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: bt_utils.c,v 1.13 2008/09/10 17:52:35 joerg Exp $");
#endif
#include <sys/param.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "btree.h"
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
/*
* __bt_ret --
* Build return key/data pair.
*
* Parameters:
* t: tree
* e: key/data pair to be returned
* key: user's key structure (NULL if not to be filled in)
* rkey: memory area to hold key
* data: user's data structure (NULL if not to be filled in)
* rdata: memory area to hold data
* copy: always copy the key/data item
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__bt_ret(BTREE *t, EPG *e, DBT *key, DBT *rkey, DBT *data, DBT *rdata, int copy)
{
BLEAF *bl;
void *p;
bl = GETBLEAF(e->page, e->index);
/*
* We must copy big keys/data to make them contigous. Otherwise,
* leave the page pinned and don't copy unless the user specified
* concurrent access.
*/
if (key == NULL)
goto dataonly;
if (bl->flags & P_BIGKEY) {
if (__ovfl_get(t, bl->bytes,
&key->size, &rkey->data, &rkey->size))
return (RET_ERROR);
key->data = rkey->data;
} else if (copy || F_ISSET(t, B_DB_LOCK)) {
if (bl->ksize > rkey->size) {
p = (void *)(rkey->data == NULL ?
malloc(bl->ksize) : realloc(rkey->data, bl->ksize));
if (p == NULL)
return (RET_ERROR);
rkey->data = p;
rkey->size = bl->ksize;
}
memmove(rkey->data, bl->bytes, bl->ksize);
key->size = bl->ksize;
key->data = rkey->data;
} else {
key->size = bl->ksize;
key->data = bl->bytes;
}
dataonly:
if (data == NULL)
return (RET_SUCCESS);
if (bl->flags & P_BIGDATA) {
if (__ovfl_get(t, bl->bytes + bl->ksize,
&data->size, &rdata->data, &rdata->size))
return (RET_ERROR);
data->data = rdata->data;
} else if (copy || F_ISSET(t, B_DB_LOCK)) {
/* Use +1 in case the first record retrieved is 0 length. */
if (bl->dsize + 1 > rdata->size) {
p = (void *)(rdata->data == NULL ?
malloc(bl->dsize + 1) :
realloc(rdata->data, bl->dsize + 1));
if (p == NULL)
return (RET_ERROR);
rdata->data = p;
rdata->size = bl->dsize + 1;
}
memmove(rdata->data, bl->bytes + bl->ksize, bl->dsize);
data->size = bl->dsize;
data->data = rdata->data;
} else {
data->size = bl->dsize;
data->data = bl->bytes + bl->ksize;
}
return (RET_SUCCESS);
}
/*
* __BT_CMP -- Compare a key to a given record.
*
* Parameters:
* t: tree
* k1: DBT pointer of first arg to comparison
* e: pointer to EPG for comparison
*
* Returns:
* < 0 if k1 is < record
* = 0 if k1 is = record
* > 0 if k1 is > record
*/
int
__bt_cmp(BTREE *t, const DBT *k1, EPG *e)
{
BINTERNAL *bi;
BLEAF *bl;
DBT k2;
PAGE *h;
void *bigkey;
/*
* The left-most key on internal pages, at any level of the tree, is
* guaranteed by the following code to be less than any user key.
* This saves us from having to update the leftmost key on an internal
* page when the user inserts a new key in the tree smaller than
* anything we've yet seen.
*/
h = e->page;
if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & P_BLEAF))
return (1);
bigkey = NULL;
if (h->flags & P_BLEAF) {
bl = GETBLEAF(h, e->index);
if (bl->flags & P_BIGKEY)
bigkey = bl->bytes;
else {
k2.data = bl->bytes;
k2.size = bl->ksize;
}
} else {
bi = GETBINTERNAL(h, e->index);
if (bi->flags & P_BIGKEY)
bigkey = bi->bytes;
else {
k2.data = bi->bytes;
k2.size = bi->ksize;
}
}
if (bigkey) {
if (__ovfl_get(t, bigkey,
&k2.size, &t->bt_rdata.data, &t->bt_rdata.size))
return (RET_ERROR);
k2.data = t->bt_rdata.data;
}
return ((*t->bt_cmp)(k1, &k2));
}
/*
* __BT_DEFCMP -- Default comparison routine.
*
* Parameters:
* a: DBT #1
* b: DBT #2
*
* Returns:
* < 0 if a is < b
* = 0 if a is = b
* > 0 if a is > b
*/
int
__bt_defcmp(const DBT *a, const DBT *b)
{
size_t len;
uint8_t *p1, *p2;
/*
* XXX
* If a size_t doesn't fit in an int, this routine can lose.
* What we need is a integral type which is guaranteed to be
* larger than a size_t, and there is no such thing.
*/
len = MIN(a->size, b->size);
for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2)
if (*p1 != *p2)
return ((int)*p1 - (int)*p2);
return ((int)a->size - (int)b->size);
}
/*
* __BT_DEFPFX -- Default prefix routine.
*
* Parameters:
* a: DBT #1
* b: DBT #2
*
* Returns:
* Number of bytes needed to distinguish b from a.
*/
size_t
__bt_defpfx(const DBT *a, const DBT *b)
{
uint8_t *p1, *p2;
size_t cnt, len;
cnt = 1;
len = MIN(a->size, b->size);
for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt)
if (*p1 != *p2)
return (cnt);
/* a->size must be <= b->size, or they wouldn't be in this order. */
return (a->size < b->size ? a->size + 1 : a->size);
}

433
lib/libc/db/btree/btree.h Normal file
View file

@ -0,0 +1,433 @@
/* $NetBSD: btree.h,v 1.16 2008/08/26 21:18:38 joerg Exp $ */
/*-
* Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)btree.h 8.11 (Berkeley) 8/17/94
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
/* Macros to set/clear/test flags. */
#define F_SET(p, f) (p)->flags |= (f)
#define F_CLR(p, f) (p)->flags &= ~(f)
#define F_ISSET(p, f) ((p)->flags & (f))
#include <mpool.h>
#define DEFMINKEYPAGE (2) /* Minimum keys per page */
#define MINCACHE (5) /* Minimum cached pages */
#define MINPSIZE (512) /* Minimum page size */
/*
* Page 0 of a btree file contains a copy of the meta-data. This page is also
* used as an out-of-band page, i.e. page pointers that point to nowhere point
* to page 0. Page 1 is the root of the btree.
*/
#define P_INVALID 0 /* Invalid tree page number. */
#define P_META 0 /* Tree metadata page number. */
#define P_ROOT 1 /* Tree root page number. */
/*
* There are five page layouts in the btree: btree internal pages (BINTERNAL),
* btree leaf pages (BLEAF), recno internal pages (RINTERNAL), recno leaf pages
* (RLEAF) and overflow pages. All five page types have a page header (PAGE).
* This implementation requires that values within structures NOT be padded.
* (ANSI C permits random padding.) If your compiler pads randomly you'll have
* to do some work to get this package to run.
*/
typedef struct _page {
pgno_t pgno; /* this page's page number */
pgno_t prevpg; /* left sibling */
pgno_t nextpg; /* right sibling */
#define P_BINTERNAL 0x01 /* btree internal page */
#define P_BLEAF 0x02 /* leaf page */
#define P_OVERFLOW 0x04 /* overflow page */
#define P_RINTERNAL 0x08 /* recno internal page */
#define P_RLEAF 0x10 /* leaf page */
#define P_TYPE 0x1f /* type mask */
#define P_PRESERVE 0x20 /* never delete this chain of pages */
uint32_t flags;
indx_t lower; /* lower bound of free space on page */
indx_t upper; /* upper bound of free space on page */
indx_t linp[1]; /* indx_t-aligned VAR. LENGTH DATA */
} PAGE;
/* First and next index. */
#define BTDATAOFF \
(sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \
sizeof(uint32_t) + sizeof(indx_t) + sizeof(indx_t))
#define _NEXTINDEX(p) (((p)->lower - BTDATAOFF) / sizeof(indx_t))
#ifdef _DIAGNOSTIC
static __inline indx_t
NEXTINDEX(const PAGE *p) {
size_t x = _NEXTINDEX(p);
_DBFIT(x, indx_t);
return (indx_t)x;
}
#else
#define NEXTINDEX(p) (indx_t)_NEXTINDEX(p)
#endif
/*
* For pages other than overflow pages, there is an array of offsets into the
* rest of the page immediately following the page header. Each offset is to
* an item which is unique to the type of page. The h_lower offset is just
* past the last filled-in index. The h_upper offset is the first item on the
* page. Offsets are from the beginning of the page.
*
* If an item is too big to store on a single page, a flag is set and the item
* is a { page, size } pair such that the page is the first page of an overflow
* chain with size bytes of item. Overflow pages are simply bytes without any
* external structure.
*
* The page number and size fields in the items are pgno_t-aligned so they can
* be manipulated without copying. (This presumes that 32 bit items can be
* manipulated on this system.)
*/
#define BTLALIGN(n) (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1))
#define NOVFLSIZE (sizeof(pgno_t) + sizeof(uint32_t))
/*
* For the btree internal pages, the item is a key. BINTERNALs are {key, pgno}
* pairs, such that the key compares less than or equal to all of the records
* on that page. For a tree without duplicate keys, an internal page with two
* consecutive keys, a and b, will have all records greater than or equal to a
* and less than b stored on the page associated with a. Duplicate keys are
* somewhat special and can cause duplicate internal and leaf page records and
* some minor modifications of the above rule.
*/
typedef struct _binternal {
uint32_t ksize; /* key size */
pgno_t pgno; /* page number stored on */
#define P_BIGDATA 0x01 /* overflow data */
#define P_BIGKEY 0x02 /* overflow key */
uint8_t flags;
char bytes[1]; /* data */
} BINTERNAL;
/* Get the page's BINTERNAL structure at index indx. */
#define GETBINTERNAL(pg, indx) \
((BINTERNAL *)(void *)((char *)(void *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
#define _NBINTERNAL(len) \
BTLALIGN(sizeof(uint32_t) + sizeof(pgno_t) + sizeof(uint8_t) + (len))
#ifdef _DIAGNOSTIC
static __inline uint32_t
NBINTERNAL(uint32_t len) {
size_t x = _NBINTERNAL(len);
_DBFIT(x, uint32_t);
return (uint32_t)x;
}
#else
#define NBINTERNAL(len) (uint32_t)_NBINTERNAL(len)
#endif
/* Copy a BINTERNAL entry to the page. */
#define WR_BINTERNAL(p, size, pgno, flags) do { \
_DBFIT(size, uint32_t); \
*(uint32_t *)(void *)p = (uint32_t)size; \
p += sizeof(uint32_t); \
*(pgno_t *)(void *)p = pgno; \
p += sizeof(pgno_t); \
*(uint8_t *)(void *)p = flags; \
p += sizeof(uint8_t); \
} while (/*CONSTCOND*/0)
/*
* For the recno internal pages, the item is a page number with the number of
* keys found on that page and below.
*/
typedef struct _rinternal {
recno_t nrecs; /* number of records */
pgno_t pgno; /* page number stored below */
} RINTERNAL;
/* Get the page's RINTERNAL structure at index indx. */
#define GETRINTERNAL(pg, indx) \
((RINTERNAL *)(void *)((char *)(void *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
#define NRINTERNAL \
BTLALIGN(sizeof(recno_t) + sizeof(pgno_t))
/* Copy a RINTERAL entry to the page. */
#define WR_RINTERNAL(p, nrecs, pgno) do { \
*(recno_t *)(void *)p = nrecs; \
p += sizeof(recno_t); \
*(pgno_t *)(void *)p = pgno; \
} while (/*CONSTCOND*/0)
/* For the btree leaf pages, the item is a key and data pair. */
typedef struct _bleaf {
uint32_t ksize; /* size of key */
uint32_t dsize; /* size of data */
uint8_t flags; /* P_BIGDATA, P_BIGKEY */
char bytes[1]; /* data */
} BLEAF;
/* Get the page's BLEAF structure at index indx. */
#define GETBLEAF(pg, indx) \
((BLEAF *)(void *)((char *)(void *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the user's key/data pair. */
#define _NBLEAFDBT(ksize, dsize) \
BTLALIGN(sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint8_t) + \
(ksize) + (dsize))
#ifdef _DIAGNOSTIC
static __inline uint32_t
NBLEAFDBT(size_t k, size_t d) {
size_t x = _NBLEAFDBT(k, d);
_DBFIT(x, uint32_t);
return (uint32_t)x;
}
#else
#define NBLEAFDBT(p, q) (uint32_t)_NBLEAFDBT(p, q)
#endif
/* Get the number of bytes in the entry. */
#define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize)
/* Copy a BLEAF entry to the page. */
#define WR_BLEAF(p, key, data, flags) do { \
_DBFIT(key->size, uint32_t); \
*(uint32_t *)(void *)p = (uint32_t)key->size; \
p += sizeof(uint32_t); \
_DBFIT(data->size, uint32_t); \
*(uint32_t *)(void *)p = (uint32_t)data->size; \
p += sizeof(uint32_t); \
*(uint8_t *)(void *)p = flags; \
p += sizeof(uint8_t); \
(void)memmove(p, key->data, key->size); \
p += key->size; \
(void)memmove(p, data->data, data->size); \
} while (/*CONSTCOND*/0)
/* For the recno leaf pages, the item is a data entry. */
typedef struct _rleaf {
uint32_t dsize; /* size of data */
uint8_t flags; /* P_BIGDATA */
char bytes[1];
} RLEAF;
/* Get the page's RLEAF structure at index indx. */
#define GETRLEAF(pg, indx) \
((RLEAF *)(void *)((char *)(void *)(pg) + (pg)->linp[indx]))
#define _NRLEAFDBT(dsize) \
BTLALIGN(sizeof(uint32_t) + sizeof(uint8_t) + (dsize))
#ifdef _DIAGNOSTIC
static __inline uint32_t
NRLEAFDBT(size_t d) {
size_t x = _NRLEAFDBT(d);
_DBFIT(x, uint32_t);
return (uint32_t)x;
}
#else
#define NRLEAFDBT(d) (uint32_t)_NRLEAFDBT(d)
#endif
/* Get the number of bytes in the entry. */
#define NRLEAF(p) NRLEAFDBT((p)->dsize)
/* Get the number of bytes from the user's data. */
/* Copy a RLEAF entry to the page. */
#define WR_RLEAF(p, data, flags) do { \
_DBFIT(data->size, uint32_t); \
*(uint32_t *)(void *)p = (uint32_t)data->size; \
p += sizeof(uint32_t); \
*(uint8_t *)(void *)p = flags; \
p += sizeof(uint8_t); \
memmove(p, data->data, data->size); \
} while (/*CONSTCOND*/0)
/*
* A record in the tree is either a pointer to a page and an index in the page
* or a page number and an index. These structures are used as a cursor, stack
* entry and search returns as well as to pass records to other routines.
*
* One comment about searches. Internal page searches must find the largest
* record less than key in the tree so that descents work. Leaf page searches
* must find the smallest record greater than key so that the returned index
* is the record's correct position for insertion.
*/
typedef struct _epgno {
pgno_t pgno; /* the page number */
indx_t index; /* the index on the page */
} EPGNO;
typedef struct _epg {
PAGE *page; /* the (pinned) page */
indx_t index; /* the index on the page */
} EPG;
/*
* About cursors. The cursor (and the page that contained the key/data pair
* that it referenced) can be deleted, which makes things a bit tricky. If
* there are no duplicates of the cursor key in the tree (i.e. B_NODUPS is set
* or there simply aren't any duplicates of the key) we copy the key that it
* referenced when it's deleted, and reacquire a new cursor key if the cursor
* is used again. If there are duplicates keys, we move to the next/previous
* key, and set a flag so that we know what happened. NOTE: if duplicate (to
* the cursor) keys are added to the tree during this process, it is undefined
* if they will be returned or not in a cursor scan.
*
* The flags determine the possible states of the cursor:
*
* CURS_INIT The cursor references *something*.
* CURS_ACQUIRE The cursor was deleted, and a key has been saved so that
* we can reacquire the right position in the tree.
* CURS_AFTER, CURS_BEFORE
* The cursor was deleted, and now references a key/data pair
* that has not yet been returned, either before or after the
* deleted key/data pair.
* XXX
* This structure is broken out so that we can eventually offer multiple
* cursors as part of the DB interface.
*/
typedef struct _cursor {
EPGNO pg; /* B: Saved tree reference. */
DBT key; /* B: Saved key, or key.data == NULL. */
recno_t rcursor; /* R: recno cursor (1-based) */
#define CURS_ACQUIRE 0x01 /* B: Cursor needs to be reacquired. */
#define CURS_AFTER 0x02 /* B: Unreturned cursor after key. */
#define CURS_BEFORE 0x04 /* B: Unreturned cursor before key. */
#define CURS_INIT 0x08 /* RB: Cursor initialized. */
uint8_t flags;
} CURSOR;
/*
* The metadata of the tree. The nrecs field is used only by the RECNO code.
* This is because the btree doesn't really need it and it requires that every
* put or delete call modify the metadata.
*/
typedef struct _btmeta {
uint32_t magic; /* magic number */
uint32_t version; /* version */
uint32_t psize; /* page size */
uint32_t free; /* page number of first free page */
uint32_t nrecs; /* R: number of records */
#define SAVEMETA (B_NODUPS | R_RECNO)
uint32_t flags; /* bt_flags & SAVEMETA */
} BTMETA;
/* The in-memory btree/recno data structure. */
typedef struct _btree {
MPOOL *bt_mp; /* memory pool cookie */
DB *bt_dbp; /* pointer to enclosing DB */
EPG bt_cur; /* current (pinned) page */
PAGE *bt_pinned; /* page pinned across calls */
CURSOR bt_cursor; /* cursor */
#define BT_PUSH(t, p, i) { \
t->bt_sp->pgno = p; \
t->bt_sp->index = i; \
++t->bt_sp; \
}
#define BT_POP(t) (t->bt_sp == t->bt_stack ? NULL : --t->bt_sp)
#define BT_CLR(t) (t->bt_sp = t->bt_stack)
EPGNO bt_stack[50]; /* stack of parent pages */
EPGNO *bt_sp; /* current stack pointer */
DBT bt_rkey; /* returned key */
DBT bt_rdata; /* returned data */
int bt_fd; /* tree file descriptor */
pgno_t bt_free; /* next free page */
uint32_t bt_psize; /* page size */
indx_t bt_ovflsize; /* cut-off for key/data overflow */
int bt_lorder; /* byte order */
/* sorted order */
enum { NOT, BACK, FORWARD } bt_order;
EPGNO bt_last; /* last insert */
/* B: key comparison function */
int (*bt_cmp)(const DBT *, const DBT *);
/* B: prefix comparison function */
size_t (*bt_pfx)(const DBT *, const DBT *);
/* R: recno input function */
int (*bt_irec)(struct _btree *, recno_t);
FILE *bt_rfp; /* R: record FILE pointer */
int bt_rfd; /* R: record file descriptor */
caddr_t bt_cmap; /* R: current point in mapped space */
caddr_t bt_smap; /* R: start of mapped space */
caddr_t bt_emap; /* R: end of mapped space */
size_t bt_msize; /* R: size of mapped region. */
recno_t bt_nrecs; /* R: number of records */
size_t bt_reclen; /* R: fixed record length */
uint8_t bt_bval; /* R: delimiting byte/pad character */
/*
* NB:
* B_NODUPS and R_RECNO are stored on disk, and may not be changed.
*/
#define B_INMEM 0x00001 /* in-memory tree */
#define B_METADIRTY 0x00002 /* need to write metadata */
#define B_MODIFIED 0x00004 /* tree modified */
#define B_NEEDSWAP 0x00008 /* if byte order requires swapping */
#define B_RDONLY 0x00010 /* read-only tree */
#define B_NODUPS 0x00020 /* no duplicate keys permitted */
#define R_RECNO 0x00080 /* record oriented tree */
#define R_CLOSEFP 0x00040 /* opened a file pointer */
#define R_EOF 0x00100 /* end of input file reached. */
#define R_FIXLEN 0x00200 /* fixed length records */
#define R_MEMMAPPED 0x00400 /* memory mapped file. */
#define R_INMEM 0x00800 /* in-memory file */
#define R_MODIFIED 0x01000 /* modified file */
#define R_RDONLY 0x02000 /* read-only file */
#define B_DB_LOCK 0x04000 /* DB_LOCK specified. */
#define B_DB_SHMEM 0x08000 /* DB_SHMEM specified. */
#define B_DB_TXN 0x10000 /* DB_TXN specified. */
uint32_t flags;
} BTREE;
#include "extern.h"

View file

@ -0,0 +1,72 @@
/* $NetBSD: extern.h,v 1.12 2008/09/26 11:41:06 tsutsui Exp $ */
/*-
* Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)extern.h 8.10 (Berkeley) 7/20/94
*/
#ifndef _BTREE_EXTERN_H_
#define _BTREE_EXTERN_H_
int __bt_close(DB *);
int __bt_cmp(BTREE *, const DBT *, EPG *);
int __bt_crsrdel(BTREE *, EPGNO *);
int __bt_defcmp(const DBT *, const DBT *);
size_t __bt_defpfx(const DBT *, const DBT *);
int __bt_delete(const DB *, const DBT *, unsigned int);
int __bt_dleaf(BTREE *, const DBT *, PAGE *, unsigned int);
int __bt_fd(const DB *);
int __bt_free(BTREE *, PAGE *);
int __bt_get(const DB *, const DBT *, DBT *, unsigned int);
PAGE *__bt_new(BTREE *, pgno_t *);
void __bt_pgin(void *, pgno_t, void *);
void __bt_pgout(void *, pgno_t, void *);
int __bt_push(BTREE *, pgno_t, int);
int __bt_put(const DB *dbp, DBT *, const DBT *, unsigned int);
int __bt_ret(BTREE *, EPG *, DBT *, DBT *, DBT *, DBT *, int);
EPG *__bt_search(BTREE *, const DBT *, int *);
int __bt_seq(const DB *, DBT *, DBT *, unsigned int);
void __bt_setcur(BTREE *, pgno_t, unsigned int);
int __bt_split(BTREE *, PAGE *,
const DBT *, const DBT *, int, size_t, uint32_t);
int __bt_sync(const DB *, unsigned int);
int __ovfl_delete(BTREE *, void *);
int __ovfl_get(BTREE *, void *, size_t *, void **, size_t *);
int __ovfl_put(BTREE *, const DBT *, pgno_t *);
#ifdef DEBUG
void __bt_dmpage(PAGE *);
void __bt_dnpage(DB *, pgno_t);
void __bt_dpage(PAGE *);
void __bt_dump(DB *);
#endif
#ifdef STATISTICS
void __bt_stat(DB *);
#endif
#endif /* _BTREE_EXTERN_H_ */

105
lib/libc/db/changelog Normal file
View file

@ -0,0 +1,105 @@
# $NetBSD: changelog,v 1.2 1996/05/03 21:20:56 cgd Exp $
1.84 -> 1.85
recno: #ifdef out use of mmap, it's not portable enough.
1.83 -> 1.84 Thu Aug 18 15:46:07 EDT 1994
recno: Rework fixed-length records so that closing and reopening
the file now works. Pad short records on input. Never do
signed comparison in recno input reading functions.
1.82 -> 1.83 Tue Jul 26 15:33:44 EDT 1994
btree: Rework cursor deletion code yet again; bugs with
deleting empty pages that only contained the cursor
record.
1.81 -> 1.82 Sat Jul 16 11:01:50 EDT 1994
btree: Fix bugs introduced by new cursor/deletion code.
Replace return kbuf/dbuf with real DBT's.
1.80 -> 1.81
btree: Fix bugs introduced by new cursor/deletion code.
all: Add #defines for Purify.
1.79 -> 1.80 Wed Jul 13 22:41:54 EDT 1994
btree Change deletion to coalesce empty pages. This is a major
change, cursors and duplicate pages all had to be reworked.
Return to a fixed stack.
recno: Affected by cursor changes. New cursor structures should
permit multiple cursors in the future.
1.78 -> 1.79 Mon Jun 20 17:36:47 EDT 1994
all: Minor cleanups of 1.78 for porting reasons; only
major change was inlining check of NULL pointer
so that __fix_realloc goes away.
1.77 -> 1.78 Thu Jun 16 19:06:43 EDT 1994
all: Move "standard" size typedef's into db.h.
1.76 -> 1.77 Thu Jun 16 16:48:38 EDT 1994
hash: Delete __init_ routine, has special meaning to OSF 2.0.
1.74 -> 1.76
all: Finish up the port to the Alpha.
1.73 -> 1.74
recno: Don't put the record if rec_search fails, in rec_rdelete.
Create fixed-length intermediate records past "end" of DB
correctly.
Realloc bug when reading in fixed records.
all: First cut at port to Alpha (64-bit architecture) using
4.4BSD basic integral types typedef's.
Cast allocation pointers to shut up old compilers.
Rework PORT directory into OS/machine directories.
1.72 -> 1.73
btree: If enough duplicate records were inserted and then deleted
that internal pages had references to empty pages of the
duplicate keys, the search function ended up on the wrong
page.
1.7 -> 1.72 12 Oct 1993
hash: Support NET/2 hash formats.
1.7 -> 1.71 16 Sep 1993
btree/recno:
Fix bug in internal search routines that caused
return of invalid pointers.
1.6 -> 1.7 07 Sep 1993
hash: Fixed big key overflow bugs.
test: Portability hacks, rewrite test script, Makefile.
btree/recno:
Stop copying non-overflow key/data pairs.
PORT: Break PORT directory up into per architecture/OS
subdirectories.
1.5 -> 1.6 06 Jun 1993
hash: In PAIRFITS, the first comparison should look at (P)[2].
The hash_realloc function was walking off the end of memory.
The overflow page number was wrong when bumping splitpoint.
1.4 -> 1.5 23 May 1993
hash: Set hash default fill factor dynamically.
recno: Fixed bug in sorted page splits.
Add page size parameter support.
Allow recno to specify the name of the underlying btree;
used for vi recovery.
btree/recno:
Support 64K pages.
btree/hash/recno:
Provide access to an underlying file descriptor.
Change sync routines to take a flag argument, recno
uses this to sync out the underlying btree.
1.3 -> 1.4 10 May 1993
recno: Delete the R_CURSORLOG flag from the recno interface.
Zero-length record fix for non-mmap reads.
Try and make SIZE_T_MAX test in open portable.
1.2 -> 1.3 01 May 1993
btree: Ignore user byte-order setting when reading already
existing database. Fixes to byte-order conversions.
1.1 -> 1.2 15 Apr 1993
No bug fixes, only compatibility hacks.

View file

@ -0,0 +1,6 @@
# $NetBSD: Makefile.inc,v 1.4 1995/02/27 13:21:22 cgd Exp $
# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
.PATH: ${.CURDIR}/db/db
SRCS+= db.c

114
lib/libc/db/db/db.c Normal file
View file

@ -0,0 +1,114 @@
/* $NetBSD: db.c,v 1.16 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: db.c,v 1.16 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdio.h>
#include <db.h>
static int __dberr(void);
#ifdef __weak_alias
__weak_alias(dbopen,_dbopen)
#endif
DB *
dbopen(const char *fname, int flags, mode_t mode, DBTYPE type,
const void *openinfo)
{
#ifndef O_EXLOCK
#define O_EXLOCK 0
#endif
#ifndef O_SHLOCK
#define O_SHLOCK 0
#endif
#define DB_FLAGS (DB_LOCK | DB_SHMEM | DB_TXN)
#define USE_OPEN_FLAGS \
(O_CREAT | O_EXCL | O_EXLOCK | O_NONBLOCK | O_RDONLY | \
O_RDWR | O_SHLOCK | O_TRUNC)
if ((flags & ~(USE_OPEN_FLAGS | DB_FLAGS)) == 0)
switch (type) {
case DB_BTREE:
return (__bt_open(fname, flags & USE_OPEN_FLAGS,
mode, openinfo, (int)(flags & DB_FLAGS)));
case DB_HASH:
return (__hash_open(fname, flags & USE_OPEN_FLAGS,
mode, openinfo, (int)(flags & DB_FLAGS)));
case DB_RECNO:
return (__rec_open(fname, flags & USE_OPEN_FLAGS,
mode, openinfo, (int)(flags & DB_FLAGS)));
}
errno = EINVAL;
return (NULL);
}
static int
__dberr(void)
{
return (RET_ERROR);
}
/*
* __DBPANIC -- Stop.
*
* Parameters:
* dbp: pointer to the DB structure.
*/
void
__dbpanic(DB *dbp)
{
/* The only thing that can succeed is a close. */
dbp->del = (int (*)(const struct __db *, const DBT*, u_int))__dberr;
dbp->fd = (int (*)(const struct __db *))__dberr;
dbp->get = (int (*)(const struct __db *, const DBT*, DBT *, u_int))__dberr;
dbp->put = (int (*)(const struct __db *, DBT *, const DBT *, u_int))__dberr;
dbp->seq = (int (*)(const struct __db *, DBT *, DBT *, u_int))__dberr;
dbp->sync = (int (*)(const struct __db *, u_int))__dberr;
}

31
lib/libc/db/db2netbsd Executable file
View file

@ -0,0 +1,31 @@
#!/bin/sh
# $NetBSD: db2netbsd,v 1.2 1999/02/16 18:01:37 kleink Exp $
# This version transforms a Berkeley DB distribution into something
# which can be 'cvs import'ed into the NetBSD source repository.
# It is to be run in the untarred Berkeley DB distribution directory
# (e.g. the "db.1.85" directory created by tar xvf), and sets up
# the destination tree in place.
version=`basename $PWD | sed -e 's/db\.//'`
releasetag=`basename $PWD | sed -e 's/\./-/g'`
CLEANFILES="PORT docs hash/search.h test/btree.tests test/hash.tests"
# clean up pieces that we never import
/bin/rm -rf $CLEANFILES
find . -type l -o -name tags | xargs /bin/rm -f
# The include files are already in place
# Put the regression tests in the right place
mkdir -p regress/lib/libc
mv test regress/lib/libc/db
# Put the libc pieces in the right place.
mkdir -p lib/libc/db
mv Makefile.inc README btree changelog db hash man mpool recno lib/libc/db
echo "import with:"
echo "cvs import -m \"Import of Berkeley DB version $version\" \
src CSRG $releasetag"

View file

@ -0,0 +1,7 @@
# $NetBSD: Makefile.inc,v 1.9 2005/09/13 01:44:09 christos Exp $
# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
.PATH: ${.CURDIR}/db/hash
SRCS+= hash.c hash_bigkey.c hash_buf.c hash_func.c hash_log2.c \
hash_page.c ndbmdatum.c ndbm.c

69
lib/libc/db/hash/README Normal file
View file

@ -0,0 +1,69 @@
# $NetBSD: README,v 1.5 1999/02/16 17:59:18 kleink Exp $
# @(#)README 8.1 (Berkeley) 6/4/93
This package implements a superset of the hsearch and dbm/ndbm libraries.
Test Programs:
All test programs which need key/data pairs expect them entered
with key and data on separate lines
tcreat3.c
Takes
bucketsize (bsize),
fill factor (ffactor), and
initial number of elements (nelem).
Creates a hash table named hashtest containing the
keys/data pairs entered from standard in.
thash4.c
Takes
bucketsize (bsize),
fill factor (ffactor),
initial number of elements (nelem)
bytes of cache (ncached), and
file from which to read data (fname)
Creates a table from the key/data pairs on standard in and
then does a read of each key/data in fname
tdel.c
Takes
bucketsize (bsize), and
fill factor (ffactor).
file from which to read data (fname)
Reads each key/data pair from fname and deletes the
key from the hash table hashtest
tseq.c
Reads the key/data pairs in the file hashtest and writes them
to standard out.
tread2.c
Takes
butes of cache (ncached).
Reads key/data pairs from standard in and looks them up
in the file hashtest.
tverify.c
Reads key/data pairs from standard in, looks them up
in the file hashtest, and verifies that the data is
correct.
NOTES:
The man page ../man/db.3 explains the interface to the hashing system.
The file hash.ps is a postscript copy of a paper explaining
the history, implementation, and performance of the hash package.
"bugs" or idiosyncracies
If you have a lot of overflows, it is possible to run out of overflow
pages. Currently, this will cause a message to be printed on stderr.
Eventually, this will be indicated by a return error code.
If you are using the ndbm interface and exit without flushing or closing the
file, you may lose updates since the package buffers all writes. Also,
the db interface only creates a single database file. To avoid overwriting
the user's original file, the suffix ".db" is appended to the file name
passed to dbm_open. Additionally, if your code "knows" about the historic
.dir and .pag files, it will break.
There is a fundamental difference between this package and the old hsearch.
Hsearch requires the user to maintain the keys and data in the application's
allocated memory while hash takes care of all storage management. The down
side is that the byte strings passed in the ENTRY structure must be null
terminated (both the keys and the data).

63
lib/libc/db/hash/extern.h Normal file
View file

@ -0,0 +1,63 @@
/* $NetBSD: extern.h,v 1.9 2008/08/26 21:18:38 joerg Exp $ */
/*-
* Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)extern.h 8.4 (Berkeley) 6/16/94
*/
BUFHEAD *__add_ovflpage(HTAB *, BUFHEAD *);
int __addel(HTAB *, BUFHEAD *, const DBT *, const DBT *);
int __big_delete(HTAB *, BUFHEAD *);
int __big_insert(HTAB *, BUFHEAD *, const DBT *, const DBT *);
int __big_keydata(HTAB *, BUFHEAD *, DBT *, DBT *, int);
int __big_return(HTAB *, BUFHEAD *, int, DBT *, int);
int __big_split(HTAB *, BUFHEAD *, BUFHEAD *, BUFHEAD *,
int, uint32_t, SPLIT_RETURN *);
int __buf_free(HTAB *, int, int);
void __buf_init(HTAB *, u_int);
uint32_t __call_hash(HTAB *, char *, int);
int __delpair(HTAB *, BUFHEAD *, int);
int __expand_table(HTAB *);
int __find_bigpair(HTAB *, BUFHEAD *, int, char *, int);
uint16_t __find_last_page(HTAB *, BUFHEAD **);
void __free_ovflpage(HTAB *, BUFHEAD *);
BUFHEAD *__get_buf(HTAB *, uint32_t, BUFHEAD *, int);
int __get_page(HTAB *, char *, uint32_t, int, int, int);
int __ibitmap(HTAB *, int, int, int);
uint32_t __log2(uint32_t);
int __put_page(HTAB *, char *, uint32_t, int, int);
void __reclaim_buf(HTAB *, BUFHEAD *);
int __split_page(HTAB *, uint32_t, uint32_t);
/* Default hash routine. */
extern uint32_t (*__default_hash)(const void *, size_t);
#ifdef HASH_STATISTICS
extern int hash_accesses, hash_collisions, hash_expansions, hash_overflows;
#endif

999
lib/libc/db/hash/hash.c Normal file
View file

@ -0,0 +1,999 @@
/* $NetBSD: hash.c,v 1.31 2009/02/12 06:35:54 lukem Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: hash.c,v 1.31 2009/02/12 06:35:54 lukem Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/param.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <assert.h>
#include <db.h>
#include "hash.h"
#include "page.h"
#include "extern.h"
#ifndef LITTLE_ENDIAN
# define LITTLE_ENDIAN 1234
#endif
#ifndef BIG_ENDIAN
# define BIG_ENDIAN 4321
#endif
#ifndef BYTE_ORDER
#define BYTE_ORDER LITTLE_ENDIAN
#endif
#ifndef _DIAGASSERT
#define _DIAGASSERT
#endif
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
static int alloc_segs(HTAB *, int);
static int flush_meta(HTAB *);
static int hash_access(HTAB *, ACTION, DBT *, DBT *);
static int hash_close(DB *);
static int hash_delete(const DB *, const DBT *, unsigned int);
static int hash_fd(const DB *);
static int hash_get(const DB *, const DBT *, DBT *, unsigned int);
static int hash_put(const DB *, DBT *, const DBT *, unsigned int);
static void *hash_realloc(SEGMENT **, size_t, size_t);
static int hash_seq(const DB *, DBT *, DBT *, unsigned int);
static int hash_sync(const DB *, unsigned int);
static int hdestroy(HTAB *);
static HTAB *init_hash(HTAB *, const char *, const HASHINFO *);
static int init_htab(HTAB *, size_t);
#if BYTE_ORDER == LITTLE_ENDIAN
static void swap_header(HTAB *);
static void swap_header_copy(HASHHDR *, HASHHDR *);
#endif
/* Fast arithmetic, relying on powers of 2, */
#define MOD(x, y) ((x) & ((y) - 1))
#define RETURN_ERROR(ERR, LOC) { save_errno = ERR; goto LOC; }
/* Return values */
#define SUCCESS (0)
#define ERROR (-1)
#define ABNORMAL (1)
#ifdef HASH_STATISTICS
int hash_accesses, hash_collisions, hash_expansions, hash_overflows;
#endif
/************************** INTERFACE ROUTINES ***************************/
/* OPEN/CLOSE */
/* ARGSUSED */
DB *
__hash_open(const char *file, int flags, mode_t mode, const HASHINFO *info,
int dflags)
{
HTAB *hashp;
struct stat statbuf;
DB *dbp;
int bpages, new_table, nsegs, save_errno;
ssize_t hdrsize;
if ((flags & O_ACCMODE) == O_WRONLY) {
errno = EINVAL;
return (NULL);
}
if (!(hashp = calloc(1, sizeof(HTAB))))
return (NULL);
hashp->fp = -1;
/*
* Even if user wants write only, we need to be able to read
* the actual file, so we need to open it read/write. But, the
* field in the hashp structure needs to be accurate so that
* we can check accesses.
*/
hashp->flags = flags;
new_table = 0;
if (!file || (flags & O_TRUNC) ||
(stat(file, &statbuf) && (errno == ENOENT))) {
if (errno == ENOENT)
errno = 0; /* Just in case someone looks at errno */
new_table = 1;
}
if (file) {
if ((hashp->fp = open(file, flags, mode)) == -1)
RETURN_ERROR(errno, error0);
if (fcntl(hashp->fp, F_SETFD, FD_CLOEXEC) == -1)
RETURN_ERROR(errno, error1);
if (fstat(hashp->fp, &statbuf) == -1)
RETURN_ERROR(errno, error1);
new_table |= statbuf.st_size == 0;
}
if (new_table) {
if (!(hashp = init_hash(hashp, file, info)))
RETURN_ERROR(errno, error1);
} else {
/* Table already exists */
if (info && info->hash)
hashp->hash = info->hash;
else
hashp->hash = __default_hash;
hdrsize = read(hashp->fp, &hashp->hdr, sizeof(HASHHDR));
#if BYTE_ORDER == LITTLE_ENDIAN
swap_header(hashp);
#endif
if (hdrsize == -1)
RETURN_ERROR(errno, error1);
if (hdrsize != sizeof(HASHHDR))
RETURN_ERROR(EFTYPE, error1);
/* Verify file type, versions and hash function */
if (hashp->MAGIC != HASHMAGIC)
RETURN_ERROR(EFTYPE, error1);
#define OLDHASHVERSION 1
if (hashp->VERSION != HASHVERSION &&
hashp->VERSION != OLDHASHVERSION)
RETURN_ERROR(EFTYPE, error1);
if (hashp->hash(CHARKEY, sizeof(CHARKEY)) !=
(uint32_t)hashp->H_CHARKEY)
RETURN_ERROR(EFTYPE, error1);
/*
* Figure out how many segments we need. Max_Bucket is the
* maximum bucket number, so the number of buckets is
* max_bucket + 1.
*/
nsegs = (hashp->MAX_BUCKET + 1 + hashp->SGSIZE - 1) /
hashp->SGSIZE;
hashp->nsegs = 0;
if (alloc_segs(hashp, nsegs))
/*
* If alloc_segs fails, table will have been destroyed
* and errno will have been set.
*/
return (NULL);
/* Read in bitmaps */
bpages = (hashp->SPARES[hashp->OVFL_POINT] +
(unsigned int)(hashp->BSIZE << BYTE_SHIFT) - 1) >>
(hashp->BSHIFT + BYTE_SHIFT);
hashp->nmaps = bpages;
(void)memset(&hashp->mapp[0], 0, bpages * sizeof(uint32_t *));
}
/* Initialize Buffer Manager */
if (info && info->cachesize)
__buf_init(hashp, info->cachesize);
else
__buf_init(hashp, DEF_BUFSIZE);
hashp->new_file = new_table;
hashp->save_file = file && (hashp->flags & O_RDWR);
hashp->cbucket = -1;
if (!(dbp = malloc(sizeof(DB)))) {
save_errno = errno;
hdestroy(hashp);
errno = save_errno;
return (NULL);
}
dbp->internal = hashp;
dbp->close = hash_close;
dbp->del = hash_delete;
dbp->fd = hash_fd;
dbp->get = hash_get;
dbp->put = hash_put;
dbp->seq = hash_seq;
dbp->sync = hash_sync;
dbp->type = DB_HASH;
#ifdef DEBUG
(void)fprintf(stderr,
"%s\n%s%p\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
"init_htab:",
"TABLE POINTER ", hashp,
"BUCKET SIZE ", hashp->BSIZE,
"BUCKET SHIFT ", hashp->BSHIFT,
"DIRECTORY SIZE ", hashp->DSIZE,
"SEGMENT SIZE ", hashp->SGSIZE,
"SEGMENT SHIFT ", hashp->SSHIFT,
"FILL FACTOR ", hashp->FFACTOR,
"MAX BUCKET ", hashp->MAX_BUCKET,
"OVFL POINT ", hashp->OVFL_POINT,
"LAST FREED ", hashp->LAST_FREED,
"HIGH MASK ", hashp->HIGH_MASK,
"LOW MASK ", hashp->LOW_MASK,
"NSEGS ", hashp->nsegs,
"NKEYS ", hashp->NKEYS);
#endif
#ifdef HASH_STATISTICS
hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0;
#endif
return (dbp);
error1:
if (hashp != NULL)
(void)close(hashp->fp);
error0:
free(hashp);
errno = save_errno;
return (NULL);
}
static int
hash_close(DB *dbp)
{
HTAB *hashp;
int retval;
if (!dbp)
return (ERROR);
hashp = dbp->internal;
retval = hdestroy(hashp);
free(dbp);
return (retval);
}
static int
hash_fd(const DB *dbp)
{
HTAB *hashp;
if (!dbp)
return (ERROR);
hashp = dbp->internal;
if (hashp->fp == -1) {
errno = ENOENT;
return (-1);
}
return (hashp->fp);
}
/************************** LOCAL CREATION ROUTINES **********************/
static HTAB *
init_hash(HTAB *hashp, const char *file, const HASHINFO *info)
{
struct stat statbuf;
int nelem;
nelem = 1;
hashp->NKEYS = 0;
hashp->LORDER = BYTE_ORDER;
hashp->BSIZE = DEF_BUCKET_SIZE;
hashp->BSHIFT = DEF_BUCKET_SHIFT;
hashp->SGSIZE = DEF_SEGSIZE;
hashp->SSHIFT = DEF_SEGSIZE_SHIFT;
hashp->DSIZE = DEF_DIRSIZE;
hashp->FFACTOR = DEF_FFACTOR;
hashp->hash = __default_hash;
memset(hashp->SPARES, 0, sizeof(hashp->SPARES));
memset(hashp->BITMAPS, 0, sizeof (hashp->BITMAPS));
/* Fix bucket size to be optimal for file system */
if (file != NULL) {
if (stat(file, &statbuf))
return (NULL);
#ifndef __minix
hashp->BSIZE = MIN(statbuf.st_blksize, MAX_BSIZE);
#else
hashp->BSIZE = MIN(4096, MAX_BSIZE);
#endif
hashp->BSHIFT = __log2((uint32_t)hashp->BSIZE);
}
if (info) {
if (info->bsize) {
/* Round pagesize up to power of 2 */
hashp->BSHIFT = __log2(info->bsize);
hashp->BSIZE = 1 << hashp->BSHIFT;
if (hashp->BSIZE > MAX_BSIZE) {
errno = EINVAL;
return (NULL);
}
}
if (info->ffactor)
hashp->FFACTOR = info->ffactor;
if (info->hash)
hashp->hash = info->hash;
if (info->nelem)
nelem = info->nelem;
if (info->lorder) {
if (info->lorder != BIG_ENDIAN &&
info->lorder != LITTLE_ENDIAN) {
errno = EINVAL;
return (NULL);
}
hashp->LORDER = info->lorder;
}
}
/* init_htab should destroy the table and set errno if it fails */
if (init_htab(hashp, (size_t)nelem))
return (NULL);
else
return (hashp);
}
/*
* This calls alloc_segs which may run out of memory. Alloc_segs will destroy
* the table and set errno, so we just pass the error information along.
*
* Returns 0 on No Error
*/
static int
init_htab(HTAB *hashp, size_t nelem)
{
int nbuckets;
uint32_t nsegs;
int l2;
/*
* Divide number of elements by the fill factor and determine a
* desired number of buckets. Allocate space for the next greater
* power of two number of buckets.
*/
nelem = (nelem - 1) / hashp->FFACTOR + 1;
_DBFIT(nelem, uint32_t);
l2 = __log2(MAX((uint32_t)nelem, 2));
nbuckets = 1 << l2;
hashp->SPARES[l2] = l2 + 1;
hashp->SPARES[l2 + 1] = l2 + 1;
hashp->OVFL_POINT = l2;
hashp->LAST_FREED = 2;
/* First bitmap page is at: splitpoint l2 page offset 1 */
if (__ibitmap(hashp, (int)OADDR_OF(l2, 1), l2 + 1, 0))
return (-1);
hashp->MAX_BUCKET = hashp->LOW_MASK = nbuckets - 1;
hashp->HIGH_MASK = (nbuckets << 1) - 1;
/* LINTED constant in conditional context */
hashp->HDRPAGES = ((MAX(sizeof(HASHHDR), MINHDRSIZE) - 1) >>
hashp->BSHIFT) + 1;
nsegs = (nbuckets - 1) / hashp->SGSIZE + 1;
nsegs = 1 << __log2(nsegs);
if (nsegs > (uint32_t)hashp->DSIZE)
hashp->DSIZE = nsegs;
return (alloc_segs(hashp, (int)nsegs));
}
/********************** DESTROY/CLOSE ROUTINES ************************/
/*
* Flushes any changes to the file if necessary and destroys the hashp
* structure, freeing all allocated space.
*/
static int
hdestroy(HTAB *hashp)
{
int i, save_errno;
save_errno = 0;
#ifdef HASH_STATISTICS
(void)fprintf(stderr, "hdestroy: accesses %d collisions %d\n",
hash_accesses, hash_collisions);
(void)fprintf(stderr, "hdestroy: expansions %d\n",
hash_expansions);
(void)fprintf(stderr, "hdestroy: overflows %d\n",
hash_overflows);
(void)fprintf(stderr, "keys %d maxp %d segmentcount %d\n",
hashp->NKEYS, hashp->MAX_BUCKET, hashp->nsegs);
for (i = 0; i < NCACHED; i++)
(void)fprintf(stderr,
"spares[%d] = %d\n", i, hashp->SPARES[i]);
#endif
/*
* Call on buffer manager to free buffers, and if required,
* write them to disk.
*/
if (__buf_free(hashp, 1, hashp->save_file))
save_errno = errno;
if (hashp->dir) {
free(*hashp->dir); /* Free initial segments */
/* Free extra segments */
while (hashp->exsegs--)
free(hashp->dir[--hashp->nsegs]);
free(hashp->dir);
}
if (flush_meta(hashp) && !save_errno)
save_errno = errno;
/* Free Bigmaps */
for (i = 0; i < hashp->nmaps; i++)
if (hashp->mapp[i])
free(hashp->mapp[i]);
if (hashp->fp != -1)
(void)close(hashp->fp);
free(hashp);
if (save_errno) {
errno = save_errno;
return (ERROR);
}
return (SUCCESS);
}
/*
* Write modified pages to disk
*
* Returns:
* 0 == OK
* -1 ERROR
*/
static int
hash_sync(const DB *dbp, unsigned int flags)
{
HTAB *hashp;
if (flags != 0) {
errno = EINVAL;
return (ERROR);
}
if (!dbp)
return (ERROR);
hashp = dbp->internal;
if (!hashp->save_file)
return (0);
if (__buf_free(hashp, 0, 1) || flush_meta(hashp))
return (ERROR);
hashp->new_file = 0;
return (0);
}
/*
* Returns:
* 0 == OK
* -1 indicates that errno should be set
*/
static int
flush_meta(HTAB *hashp)
{
HASHHDR *whdrp;
#if BYTE_ORDER == LITTLE_ENDIAN
HASHHDR whdr;
#endif
int fp, i;
ssize_t wsize;
if (!hashp->save_file)
return (0);
hashp->MAGIC = HASHMAGIC;
hashp->VERSION = HASHVERSION;
hashp->H_CHARKEY = hashp->hash(CHARKEY, sizeof(CHARKEY));
fp = hashp->fp;
whdrp = &hashp->hdr;
#if BYTE_ORDER == LITTLE_ENDIAN
whdrp = &whdr;
swap_header_copy(&hashp->hdr, whdrp);
#endif
if ((wsize = pwrite(fp, whdrp, sizeof(HASHHDR), (off_t)0)) == -1)
return (-1);
else
if (wsize != sizeof(HASHHDR)) {
errno = EFTYPE;
hashp->err = errno;
return (-1);
}
for (i = 0; i < NCACHED; i++)
if (hashp->mapp[i])
if (__put_page(hashp, (char *)(void *)hashp->mapp[i],
(u_int)hashp->BITMAPS[i], 0, 1))
return (-1);
return (0);
}
/*******************************SEARCH ROUTINES *****************************/
/*
* All the access routines return
*
* Returns:
* 0 on SUCCESS
* 1 to indicate an external ERROR (i.e. key not found, etc)
* -1 to indicate an internal ERROR (i.e. out of memory, etc)
*/
static int
hash_get(const DB *dbp, const DBT *key, DBT *data, unsigned int flag)
{
HTAB *hashp;
hashp = dbp->internal;
if (flag) {
hashp->err = errno = EINVAL;
return (ERROR);
}
return (hash_access(hashp, HASH_GET, __UNCONST(key), data));
}
static int
hash_put(const DB *dbp, DBT *key, const DBT *data, unsigned int flag)
{
HTAB *hashp;
hashp = dbp->internal;
if (flag && flag != R_NOOVERWRITE) {
hashp->err = errno = EINVAL;
return (ERROR);
}
if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
hashp->err = errno = EPERM;
return (ERROR);
}
/* LINTED const castaway */
return (hash_access(hashp, flag == R_NOOVERWRITE ?
HASH_PUTNEW : HASH_PUT, __UNCONST(key), __UNCONST(data)));
}
static int
hash_delete(const DB *dbp, const DBT *key, unsigned int flag)
{
HTAB *hashp;
hashp = dbp->internal;
if (flag && flag != R_CURSOR) {
hashp->err = errno = EINVAL;
return (ERROR);
}
if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
hashp->err = errno = EPERM;
return (ERROR);
}
return hash_access(hashp, HASH_DELETE, __UNCONST(key), NULL);
}
/*
* Assume that hashp has been set in wrapper routine.
*/
static int
hash_access(HTAB *hashp, ACTION action, DBT *key, DBT *val)
{
BUFHEAD *rbufp;
BUFHEAD *bufp, *save_bufp;
uint16_t *bp;
int n, ndx, off;
size_t size;
char *kp;
uint16_t pageno;
#ifdef HASH_STATISTICS
hash_accesses++;
#endif
off = hashp->BSIZE;
size = key->size;
kp = (char *)key->data;
rbufp = __get_buf(hashp, __call_hash(hashp, kp, (int)size), NULL, 0);
if (!rbufp)
return (ERROR);
save_bufp = rbufp;
/* Pin the bucket chain */
rbufp->flags |= BUF_PIN;
for (bp = (uint16_t *)(void *)rbufp->page, n = *bp++, ndx = 1; ndx < n;)
if (bp[1] >= REAL_KEY) {
/* Real key/data pair */
if (size == (size_t)(off - *bp) &&
memcmp(kp, rbufp->page + *bp, size) == 0)
goto found;
off = bp[1];
#ifdef HASH_STATISTICS
hash_collisions++;
#endif
bp += 2;
ndx += 2;
} else if (bp[1] == OVFLPAGE) {
rbufp = __get_buf(hashp, (uint32_t)*bp, rbufp, 0);
if (!rbufp) {
save_bufp->flags &= ~BUF_PIN;
return (ERROR);
}
/* FOR LOOP INIT */
bp = (uint16_t *)(void *)rbufp->page;
n = *bp++;
ndx = 1;
off = hashp->BSIZE;
} else if (bp[1] < REAL_KEY) {
if ((ndx =
__find_bigpair(hashp, rbufp, ndx, kp, (int)size)) > 0)
goto found;
if (ndx == -2) {
bufp = rbufp;
if (!(pageno =
__find_last_page(hashp, &bufp))) {
ndx = 0;
rbufp = bufp;
break; /* FOR */
}
rbufp = __get_buf(hashp, (uint32_t)pageno,
bufp, 0);
if (!rbufp) {
save_bufp->flags &= ~BUF_PIN;
return (ERROR);
}
/* FOR LOOP INIT */
bp = (uint16_t *)(void *)rbufp->page;
n = *bp++;
ndx = 1;
off = hashp->BSIZE;
} else {
save_bufp->flags &= ~BUF_PIN;
return (ERROR);
}
}
/* Not found */
switch (action) {
case HASH_PUT:
case HASH_PUTNEW:
if (__addel(hashp, rbufp, key, val)) {
save_bufp->flags &= ~BUF_PIN;
return (ERROR);
} else {
save_bufp->flags &= ~BUF_PIN;
return (SUCCESS);
}
case HASH_GET:
case HASH_DELETE:
default:
save_bufp->flags &= ~BUF_PIN;
return (ABNORMAL);
}
found:
switch (action) {
case HASH_PUTNEW:
save_bufp->flags &= ~BUF_PIN;
return (ABNORMAL);
case HASH_GET:
bp = (uint16_t *)(void *)rbufp->page;
if (bp[ndx + 1] < REAL_KEY) {
if (__big_return(hashp, rbufp, ndx, val, 0))
return (ERROR);
} else {
val->data = (uint8_t *)rbufp->page + (int)bp[ndx + 1];
val->size = bp[ndx] - bp[ndx + 1];
}
break;
case HASH_PUT:
if ((__delpair(hashp, rbufp, ndx)) ||
(__addel(hashp, rbufp, key, val))) {
save_bufp->flags &= ~BUF_PIN;
return (ERROR);
}
break;
case HASH_DELETE:
if (__delpair(hashp, rbufp, ndx))
return (ERROR);
break;
default:
abort();
}
save_bufp->flags &= ~BUF_PIN;
return (SUCCESS);
}
static int
hash_seq(const DB *dbp, DBT *key, DBT *data, unsigned int flag)
{
uint32_t bucket;
BUFHEAD *bufp = NULL; /* XXX: gcc */
HTAB *hashp;
uint16_t *bp, ndx;
hashp = dbp->internal;
if (flag && flag != R_FIRST && flag != R_NEXT) {
hashp->err = errno = EINVAL;
return (ERROR);
}
#ifdef HASH_STATISTICS
hash_accesses++;
#endif
if ((hashp->cbucket < 0) || (flag == R_FIRST)) {
hashp->cbucket = 0;
hashp->cndx = 1;
hashp->cpage = NULL;
}
for (bp = NULL; !bp || !bp[0]; ) {
if (!(bufp = hashp->cpage)) {
for (bucket = hashp->cbucket;
bucket <= (uint32_t)hashp->MAX_BUCKET;
bucket++, hashp->cndx = 1) {
bufp = __get_buf(hashp, bucket, NULL, 0);
if (!bufp)
return (ERROR);
hashp->cpage = bufp;
bp = (uint16_t *)(void *)bufp->page;
if (bp[0])
break;
}
hashp->cbucket = bucket;
if (hashp->cbucket > hashp->MAX_BUCKET) {
hashp->cbucket = -1;
return (ABNORMAL);
}
} else
bp = (uint16_t *)(void *)hashp->cpage->page;
_DIAGASSERT(bp != NULL);
_DIAGASSERT(bufp != NULL);
while (bp[hashp->cndx + 1] == OVFLPAGE) {
bufp = hashp->cpage =
__get_buf(hashp, (uint32_t)bp[hashp->cndx], bufp,
0);
if (!bufp)
return (ERROR);
bp = (uint16_t *)(void *)(bufp->page);
hashp->cndx = 1;
}
if (!bp[0]) {
hashp->cpage = NULL;
++hashp->cbucket;
}
}
ndx = hashp->cndx;
if (bp[ndx + 1] < REAL_KEY) {
if (__big_keydata(hashp, bufp, key, data, 1))
return (ERROR);
} else {
if (hashp->cpage == NULL)
return (ERROR);
key->data = (uint8_t *)hashp->cpage->page + bp[ndx];
key->size = (ndx > 1 ? bp[ndx - 1] : hashp->BSIZE) - bp[ndx];
data->data = (uint8_t *)hashp->cpage->page + bp[ndx + 1];
data->size = bp[ndx] - bp[ndx + 1];
ndx += 2;
if (ndx > bp[0]) {
hashp->cpage = NULL;
hashp->cbucket++;
hashp->cndx = 1;
} else
hashp->cndx = ndx;
}
return (SUCCESS);
}
/********************************* UTILITIES ************************/
/*
* Returns:
* 0 ==> OK
* -1 ==> Error
*/
int
__expand_table(HTAB *hashp)
{
uint32_t old_bucket, new_bucket;
int new_segnum, spare_ndx;
size_t dirsize;
#ifdef HASH_STATISTICS
hash_expansions++;
#endif
new_bucket = ++hashp->MAX_BUCKET;
old_bucket = (hashp->MAX_BUCKET & hashp->LOW_MASK);
new_segnum = new_bucket >> hashp->SSHIFT;
/* Check if we need a new segment */
if (new_segnum >= hashp->nsegs) {
/* Check if we need to expand directory */
if (new_segnum >= hashp->DSIZE) {
/* Reallocate directory */
dirsize = hashp->DSIZE * sizeof(SEGMENT *);
if (!hash_realloc(&hashp->dir, dirsize, dirsize << 1))
return (-1);
hashp->DSIZE = dirsize << 1;
}
if ((hashp->dir[new_segnum] =
calloc((size_t)hashp->SGSIZE, sizeof(SEGMENT))) == NULL)
return (-1);
hashp->exsegs++;
hashp->nsegs++;
}
/*
* If the split point is increasing (MAX_BUCKET's log base 2
* * increases), we need to copy the current contents of the spare
* split bucket to the next bucket.
*/
spare_ndx = __log2((uint32_t)(hashp->MAX_BUCKET + 1));
if (spare_ndx > hashp->OVFL_POINT) {
hashp->SPARES[spare_ndx] = hashp->SPARES[hashp->OVFL_POINT];
hashp->OVFL_POINT = spare_ndx;
}
if (new_bucket > (uint32_t)hashp->HIGH_MASK) {
/* Starting a new doubling */
hashp->LOW_MASK = hashp->HIGH_MASK;
hashp->HIGH_MASK = new_bucket | hashp->LOW_MASK;
}
/* Relocate records to the new bucket */
return (__split_page(hashp, old_bucket, new_bucket));
}
/*
* If realloc guarantees that the pointer is not destroyed if the realloc
* fails, then this routine can go away.
*/
static void *
hash_realloc(SEGMENT **p_ptr, size_t oldsize, size_t newsize)
{
void *p;
if ((p = malloc(newsize)) != NULL) {
memmove(p, *p_ptr, oldsize);
memset((char *)p + oldsize, 0, newsize - oldsize);
free(*p_ptr);
*p_ptr = p;
}
return (p);
}
uint32_t
__call_hash(HTAB *hashp, char *k, int len)
{
int n, bucket;
n = hashp->hash(k, (size_t)len);
bucket = n & hashp->HIGH_MASK;
if (bucket > hashp->MAX_BUCKET)
bucket = bucket & hashp->LOW_MASK;
return (bucket);
}
/*
* Allocate segment table. On error, destroy the table and set errno.
*
* Returns 0 on success
*/
static int
alloc_segs(HTAB *hashp, int nsegs)
{
int i;
SEGMENT store;
int save_errno;
hashp->dir = calloc((size_t)hashp->DSIZE, sizeof(SEGMENT *));
if (hashp->dir == NULL) {
save_errno = errno;
(void)hdestroy(hashp);
errno = save_errno;
return (-1);
}
hashp->nsegs = nsegs;
if (nsegs == 0)
return 0;
/* Allocate segments */
store = calloc((size_t)(nsegs << hashp->SSHIFT), sizeof(SEGMENT));
if (store == NULL) {
save_errno = errno;
(void)hdestroy(hashp);
errno = save_errno;
return (-1);
}
for (i = 0; i < nsegs; i++)
hashp->dir[i] = &store[i << hashp->SSHIFT];
return (0);
}
#if BYTE_ORDER == LITTLE_ENDIAN
/*
* Hashp->hdr needs to be byteswapped.
*/
static void
swap_header_copy(HASHHDR *srcp, HASHHDR *destp)
{
size_t i;
P_32_COPY(srcp->magic, destp->magic);
P_32_COPY(srcp->version, destp->version);
P_32_COPY(srcp->lorder, destp->lorder);
P_32_COPY(srcp->bsize, destp->bsize);
P_32_COPY(srcp->bshift, destp->bshift);
P_32_COPY(srcp->dsize, destp->dsize);
P_32_COPY(srcp->ssize, destp->ssize);
P_32_COPY(srcp->sshift, destp->sshift);
P_32_COPY(srcp->ovfl_point, destp->ovfl_point);
P_32_COPY(srcp->last_freed, destp->last_freed);
P_32_COPY(srcp->max_bucket, destp->max_bucket);
P_32_COPY(srcp->high_mask, destp->high_mask);
P_32_COPY(srcp->low_mask, destp->low_mask);
P_32_COPY(srcp->ffactor, destp->ffactor);
P_32_COPY(srcp->nkeys, destp->nkeys);
P_32_COPY(srcp->hdrpages, destp->hdrpages);
P_32_COPY(srcp->h_charkey, destp->h_charkey);
for (i = 0; i < NCACHED; i++) {
P_32_COPY(srcp->spares[i], destp->spares[i]);
P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]);
}
}
static void
swap_header(HTAB *hashp)
{
HASHHDR *hdrp;
size_t i;
hdrp = &hashp->hdr;
M_32_SWAP(hdrp->magic);
M_32_SWAP(hdrp->version);
M_32_SWAP(hdrp->lorder);
M_32_SWAP(hdrp->bsize);
M_32_SWAP(hdrp->bshift);
M_32_SWAP(hdrp->dsize);
M_32_SWAP(hdrp->ssize);
M_32_SWAP(hdrp->sshift);
M_32_SWAP(hdrp->ovfl_point);
M_32_SWAP(hdrp->last_freed);
M_32_SWAP(hdrp->max_bucket);
M_32_SWAP(hdrp->high_mask);
M_32_SWAP(hdrp->low_mask);
M_32_SWAP(hdrp->ffactor);
M_32_SWAP(hdrp->nkeys);
M_32_SWAP(hdrp->hdrpages);
M_32_SWAP(hdrp->h_charkey);
for (i = 0; i < NCACHED; i++) {
M_32_SWAP(hdrp->spares[i]);
M_16_SWAP(hdrp->bitmaps[i]);
}
}
#endif

295
lib/libc/db/hash/hash.h Normal file
View file

@ -0,0 +1,295 @@
/* $NetBSD: hash.h,v 1.15 2008/08/26 21:18:38 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)hash.h 8.3 (Berkeley) 5/31/94
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
/* Operations */
typedef enum {
HASH_GET, HASH_PUT, HASH_PUTNEW, HASH_DELETE, HASH_FIRST, HASH_NEXT
} ACTION;
/* Buffer Management structures */
typedef struct _bufhead BUFHEAD;
struct _bufhead {
BUFHEAD *prev; /* LRU links */
BUFHEAD *next; /* LRU links */
BUFHEAD *ovfl; /* Overflow page buffer header */
uint32_t addr; /* Address of this page */
char *page; /* Actual page data */
char flags;
#define BUF_MOD 0x0001
#define BUF_DISK 0x0002
#define BUF_BUCKET 0x0004
#define BUF_PIN 0x0008
};
#define IS_BUCKET(X) ((X) & BUF_BUCKET)
typedef BUFHEAD **SEGMENT;
/* Hash Table Information */
typedef struct hashhdr { /* Disk resident portion */
int32_t magic; /* Magic NO for hash tables */
int32_t version; /* Version ID */
uint32_t lorder; /* Byte Order */
int32_t bsize; /* Bucket/Page Size */
int32_t bshift; /* Bucket shift */
int32_t dsize; /* Directory Size */
int32_t ssize; /* Segment Size */
int32_t sshift; /* Segment shift */
int32_t ovfl_point; /* Where overflow pages are being
* allocated */
int32_t last_freed; /* Last overflow page freed */
int32_t max_bucket; /* ID of Maximum bucket in use */
int32_t high_mask; /* Mask to modulo into entire table */
int32_t low_mask; /* Mask to modulo into lower half of
* table */
int32_t ffactor; /* Fill factor */
int32_t nkeys; /* Number of keys in hash table */
int32_t hdrpages; /* Size of table header */
int32_t h_charkey; /* value of hash(CHARKEY) */
#define NCACHED 32 /* number of bit maps and spare
* points */
int32_t spares[NCACHED];/* spare pages for overflow */
uint16_t bitmaps[NCACHED]; /* address of overflow page
* bitmaps */
} HASHHDR;
typedef struct htab { /* Memory resident data structure */
HASHHDR hdr; /* Header */
int nsegs; /* Number of allocated segments */
int exsegs; /* Number of extra allocated
* segments */
uint32_t (*hash)(const void *, size_t); /* Hash function */
int flags; /* Flag values */
int fp; /* File pointer */
char *tmp_buf; /* Temporary Buffer for BIG data */
char *tmp_key; /* Temporary Buffer for BIG keys */
BUFHEAD *cpage; /* Current page */
int cbucket; /* Current bucket */
int cndx; /* Index of next item on cpage */
int err; /* Error Number -- for DBM
* compatibility */
int new_file; /* Indicates if fd is backing store
* or no */
int save_file; /* Indicates whether we need to flush
* file at
* exit */
uint32_t *mapp[NCACHED]; /* Pointers to page maps */
int nmaps; /* Initial number of bitmaps */
int nbufs; /* Number of buffers left to
* allocate */
BUFHEAD bufhead; /* Header of buffer lru list */
SEGMENT *dir; /* Hash Bucket directory */
} HTAB;
/*
* Constants
*/
#define MAX_BSIZE 65536 /* 2^16 */
#define MIN_BUFFERS 6
#define MINHDRSIZE 512
#define DEF_BUFSIZE 65536 /* 64 K */
#define DEF_BUCKET_SIZE 4096
#define DEF_BUCKET_SHIFT 12 /* log2(BUCKET) */
#define DEF_SEGSIZE 256
#define DEF_SEGSIZE_SHIFT 8 /* log2(SEGSIZE) */
#define DEF_DIRSIZE 256
#define DEF_FFACTOR 65536
#define MIN_FFACTOR 4
#define SPLTMAX 8
#define CHARKEY "%$sniglet^&"
#define NUMKEY 1038583
#define BYTE_SHIFT 3
#define INT_TO_BYTE 2
#define INT_BYTE_SHIFT 5
#define ALL_SET ((uint32_t)0xFFFFFFFF)
#define ALL_CLEAR 0
#define PTROF(X) ((BUFHEAD *)(void *)((u_long)(X)&~0x3))
#define ISMOD(X) ((uint32_t)(u_long)(X)&0x1)
#define DOMOD(X) ((X) = (char *)(void *)((u_long)(X)|0x1))
#define ISDISK(X) ((uint32_t)(u_long)(X)&0x2)
#define DODISK(X) ((X) = (char *)(void *)((u_long)(X)|0x2))
#define BITS_PER_MAP 32
/* Given the address of the beginning of a big map, clear/set the nth bit */
#define CLRBIT(A, N) ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
#define SETBIT(A, N) ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
#define ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
/* Overflow management */
/*
* Overflow page numbers are allocated per split point. At each doubling of
* the table, we can allocate extra pages. So, an overflow page number has
* the top 5 bits indicate which split point and the lower 11 bits indicate
* which page at that split point is indicated (pages within split points are
* numberered starting with 1).
*/
#define SPLITSHIFT 11
#define SPLITMASK 0x7FF
#define SPLITNUM(N) (((uint32_t)(N)) >> SPLITSHIFT)
#define OPAGENUM(N) ((N) & SPLITMASK)
#define OADDR_OF(S,O) ((uint32_t)((uint32_t)(S) << SPLITSHIFT) + (O))
#define BUCKET_TO_PAGE(B) \
(B) + hashp->HDRPAGES + \
((B) ? hashp->SPARES[__log2((uint32_t)((B)+1))-1] : 0)
#define OADDR_TO_PAGE(B) \
BUCKET_TO_PAGE ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B));
/*
* page.h contains a detailed description of the page format.
*
* Normally, keys and data are accessed from offset tables in the top of
* each page which point to the beginning of the key and data. There are
* four flag values which may be stored in these offset tables which indicate
* the following:
*
*
* OVFLPAGE Rather than a key data pair, this pair contains
* the address of an overflow page. The format of
* the pair is:
* OVERFLOW_PAGE_NUMBER OVFLPAGE
*
* PARTIAL_KEY This must be the first key/data pair on a page
* and implies that page contains only a partial key.
* That is, the key is too big to fit on a single page
* so it starts on this page and continues on the next.
* The format of the page is:
* KEY_OFF PARTIAL_KEY OVFL_PAGENO OVFLPAGE
*
* KEY_OFF -- offset of the beginning of the key
* PARTIAL_KEY -- 1
* OVFL_PAGENO - page number of the next overflow page
* OVFLPAGE -- 0
*
* FULL_KEY This must be the first key/data pair on the page. It
* is used in two cases.
*
* Case 1:
* There is a complete key on the page but no data
* (because it wouldn't fit). The next page contains
* the data.
*
* Page format it:
* KEY_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
*
* KEY_OFF -- offset of the beginning of the key
* FULL_KEY -- 2
* OVFL_PAGENO - page number of the next overflow page
* OVFLPAGE -- 0
*
* Case 2:
* This page contains no key, but part of a large
* data field, which is continued on the next page.
*
* Page format it:
* DATA_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
*
* KEY_OFF -- offset of the beginning of the data on
* this page
* FULL_KEY -- 2
* OVFL_PAGENO - page number of the next overflow page
* OVFLPAGE -- 0
*
* FULL_KEY_DATA
* This must be the first key/data pair on the page.
* There are two cases:
*
* Case 1:
* This page contains a key and the beginning of the
* data field, but the data field is continued on the
* next page.
*
* Page format is:
* KEY_OFF FULL_KEY_DATA OVFL_PAGENO DATA_OFF
*
* KEY_OFF -- offset of the beginning of the key
* FULL_KEY_DATA -- 3
* OVFL_PAGENO - page number of the next overflow page
* DATA_OFF -- offset of the beginning of the data
*
* Case 2:
* This page contains the last page of a big data pair.
* There is no key, only the tail end of the data
* on this page.
*
* Page format is:
* DATA_OFF FULL_KEY_DATA <OVFL_PAGENO> <OVFLPAGE>
*
* DATA_OFF -- offset of the beginning of the data on
* this page
* FULL_KEY_DATA -- 3
* OVFL_PAGENO - page number of the next overflow page
* OVFLPAGE -- 0
*
* OVFL_PAGENO and OVFLPAGE are optional (they are
* not present if there is no next page).
*/
#define OVFLPAGE 0
#define PARTIAL_KEY 1
#define FULL_KEY 2
#define FULL_KEY_DATA 3
#define REAL_KEY 4
/* Short hands for accessing structure */
#define BSIZE hdr.bsize
#define BSHIFT hdr.bshift
#define DSIZE hdr.dsize
#define SGSIZE hdr.ssize
#define SSHIFT hdr.sshift
#define LORDER hdr.lorder
#define OVFL_POINT hdr.ovfl_point
#define LAST_FREED hdr.last_freed
#define MAX_BUCKET hdr.max_bucket
#define FFACTOR hdr.ffactor
#define HIGH_MASK hdr.high_mask
#define LOW_MASK hdr.low_mask
#define NKEYS hdr.nkeys
#define HDRPAGES hdr.hdrpages
#define SPARES hdr.spares
#define BITMAPS hdr.bitmaps
#define VERSION hdr.version
#define MAGIC hdr.magic
#define NEXT_FREE hdr.next_free
#define H_CHARKEY hdr.h_charkey

View file

@ -0,0 +1,683 @@
/* $NetBSD: hash_bigkey.c,v 1.23 2009/02/12 06:33:13 lukem Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: hash_bigkey.c,v 1.23 2009/02/12 06:33:13 lukem Exp $");
#endif
/*
* PACKAGE: hash
* DESCRIPTION:
* Big key/data handling for the hashing package.
*
* ROUTINES:
* External
* __big_keydata
* __big_split
* __big_insert
* __big_return
* __big_delete
* __find_last_page
* Internal
* collect_key
* collect_data
*/
#include <sys/param.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <db.h>
#include "hash.h"
#include "page.h"
#include "extern.h"
#ifndef _DIAGASSERT
#define _DIAGASSERT
#endif
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int);
static int collect_data(HTAB *, BUFHEAD *, int, int);
/*
* Big_insert
*
* You need to do an insert and the key/data pair is too big
*
* Returns:
* 0 ==> OK
*-1 ==> ERROR
*/
int
__big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val)
{
uint16_t *p, n;
size_t key_size, val_size;
uint16_t space, move_bytes, off;
char *cp, *key_data, *val_data;
size_t temp;
cp = bufp->page; /* Character pointer of p. */
p = (uint16_t *)(void *)cp;
key_data = (char *)key->data;
_DBFIT(key->size, int);
key_size = key->size;
val_data = (char *)val->data;
_DBFIT(val->size, int);
val_size = val->size;
/* First move the Key */
temp = FREESPACE(p) - BIGOVERHEAD;
_DBFIT(temp, uint16_t);
space = (uint16_t)temp;
while (key_size) {
move_bytes = MIN(space, key_size);
off = OFFSET(p) - move_bytes;
memmove(cp + off, key_data, (size_t)move_bytes);
key_size -= move_bytes;
key_data += move_bytes;
n = p[0];
p[++n] = off;
p[0] = ++n;
temp = off - PAGE_META(n);
_DBFIT(temp, uint16_t);
FREESPACE(p) = (uint16_t)temp;
OFFSET(p) = off;
p[n] = PARTIAL_KEY;
bufp = __add_ovflpage(hashp, bufp);
if (!bufp)
return (-1);
n = p[0];
if (!key_size) {
space = FREESPACE(p);
if (space) {
move_bytes = MIN(space, val_size);
/*
* If the data would fit exactly in the
* remaining space, we must overflow it to the
* next page; otherwise the invariant that the
* data must end on a page with FREESPACE
* non-zero would fail.
*/
if (space == val_size && val_size == val->size)
goto toolarge;
off = OFFSET(p) - move_bytes;
memmove(cp + off, val_data, (size_t)move_bytes);
val_data += move_bytes;
val_size -= move_bytes;
p[n] = off;
p[n - 2] = FULL_KEY_DATA;
FREESPACE(p) = FREESPACE(p) - move_bytes;
OFFSET(p) = off;
} else {
toolarge:
p[n - 2] = FULL_KEY;
}
}
p = (uint16_t *)(void *)bufp->page;
cp = bufp->page;
bufp->flags |= BUF_MOD;
temp = FREESPACE(p) - BIGOVERHEAD;
_DBFIT(temp, uint16_t);
space = (uint16_t)temp;
}
/* Now move the data */
temp = FREESPACE(p) - BIGOVERHEAD;
_DBFIT(temp, uint16_t);
space = (uint16_t)temp;
while (val_size) {
move_bytes = MIN(space, val_size);
/*
* Here's the hack to make sure that if the data ends on the
* same page as the key ends, FREESPACE is at least one.
*/
if (space == val_size && val_size == val->size)
move_bytes--;
off = OFFSET(p) - move_bytes;
memmove(cp + off, val_data, (size_t)move_bytes);
val_size -= move_bytes;
val_data += move_bytes;
n = p[0];
p[++n] = off;
p[0] = ++n;
temp = off - PAGE_META(n);
_DBFIT(temp, uint16_t);
FREESPACE(p) = (uint16_t)temp;
OFFSET(p) = off;
if (val_size) {
p[n] = FULL_KEY;
bufp = __add_ovflpage(hashp, bufp);
if (!bufp)
return (-1);
cp = bufp->page;
p = (uint16_t *)(void *)cp;
} else
p[n] = FULL_KEY_DATA;
bufp->flags |= BUF_MOD;
temp = FREESPACE(p) - BIGOVERHEAD;
_DBFIT(temp, uint16_t);
space = (uint16_t)temp;
}
return (0);
}
/*
* Called when bufp's page contains a partial key (index should be 1)
*
* All pages in the big key/data pair except bufp are freed. We cannot
* free bufp because the page pointing to it is lost and we can't get rid
* of its pointer.
*
* Returns:
* 0 => OK
*-1 => ERROR
*/
int
__big_delete(HTAB *hashp, BUFHEAD *bufp)
{
BUFHEAD *last_bfp, *rbufp;
uint16_t *bp, pageno;
int key_done, n;
size_t temp;
rbufp = bufp;
last_bfp = NULL;
bp = (uint16_t *)(void *)bufp->page;
pageno = 0;
key_done = 0;
while (!key_done || (bp[2] != FULL_KEY_DATA)) {
if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA)
key_done = 1;
/*
* If there is freespace left on a FULL_KEY_DATA page, then
* the data is short and fits entirely on this page, and this
* is the last page.
*/
if (bp[2] == FULL_KEY_DATA && FREESPACE(bp))
break;
pageno = bp[bp[0] - 1];
rbufp->flags |= BUF_MOD;
rbufp = __get_buf(hashp, (uint32_t)pageno, rbufp, 0);
if (last_bfp)
__free_ovflpage(hashp, last_bfp);
last_bfp = rbufp;
if (!rbufp)
return (-1); /* Error. */
bp = (uint16_t *)(void *)rbufp->page;
}
/*
* If we get here then rbufp points to the last page of the big
* key/data pair. Bufp points to the first one -- it should now be
* empty pointing to the next page after this pair. Can't free it
* because we don't have the page pointing to it.
*/
/* This is information from the last page of the pair. */
n = bp[0];
pageno = bp[n - 1];
/* Now, bp is the first page of the pair. */
bp = (uint16_t *)(void *)bufp->page;
if (n > 2) {
/* There is an overflow page. */
bp[1] = pageno;
bp[2] = OVFLPAGE;
bufp->ovfl = rbufp->ovfl;
} else
/* This is the last page. */
bufp->ovfl = NULL;
n -= 2;
bp[0] = n;
temp = hashp->BSIZE - PAGE_META(n);
_DBFIT(temp, uint16_t);
FREESPACE(bp) = (uint16_t)temp;
OFFSET(bp) = hashp->BSIZE;
bufp->flags |= BUF_MOD;
if (rbufp)
__free_ovflpage(hashp, rbufp);
if (last_bfp && last_bfp != rbufp)
__free_ovflpage(hashp, last_bfp);
hashp->NKEYS--;
return (0);
}
/*
* Returns:
* 0 = key not found
* -1 = get next overflow page
* -2 means key not found and this is big key/data
* -3 error
*/
int
__find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, char *key, int size)
{
uint16_t *bp;
char *p;
int ksize;
uint16_t bytes;
char *kkey;
bp = (uint16_t *)(void *)bufp->page;
p = bufp->page;
ksize = size;
kkey = key;
for (bytes = hashp->BSIZE - bp[ndx];
bytes <= size && bp[ndx + 1] == PARTIAL_KEY;
bytes = hashp->BSIZE - bp[ndx]) {
if (memcmp(p + bp[ndx], kkey, (size_t)bytes))
return (-2);
kkey += bytes;
ksize -= bytes;
bufp = __get_buf(hashp, (uint32_t)bp[ndx + 2], bufp, 0);
if (!bufp)
return (-3);
p = bufp->page;
bp = (uint16_t *)(void *)p;
ndx = 1;
}
if (bytes != ksize || memcmp(p + bp[ndx], kkey, (size_t)bytes)) {
#ifdef HASH_STATISTICS
++hash_collisions;
#endif
return (-2);
} else
return (ndx);
}
/*
* Given the buffer pointer of the first overflow page of a big pair,
* find the end of the big pair
*
* This will set bpp to the buffer header of the last page of the big pair.
* It will return the pageno of the overflow page following the last page
* of the pair; 0 if there isn't any (i.e. big pair is the last key in the
* bucket)
*/
uint16_t
__find_last_page(HTAB *hashp, BUFHEAD **bpp)
{
BUFHEAD *bufp;
uint16_t *bp, pageno;
int n;
bufp = *bpp;
bp = (uint16_t *)(void *)bufp->page;
for (;;) {
n = bp[0];
/*
* This is the last page if: the tag is FULL_KEY_DATA and
* either only 2 entries OVFLPAGE marker is explicit there
* is freespace on the page.
*/
if (bp[2] == FULL_KEY_DATA &&
((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp))))
break;
pageno = bp[n - 1];
bufp = __get_buf(hashp, (uint32_t)pageno, bufp, 0);
if (!bufp)
return (0); /* Need to indicate an error! */
bp = (uint16_t *)(void *)bufp->page;
}
*bpp = bufp;
if (bp[0] > 2)
return (bp[3]);
else
return (0);
}
/*
* Return the data for the key/data pair that begins on this page at this
* index (index should always be 1).
*/
int
__big_return(HTAB *hashp, BUFHEAD *bufp, int ndx, DBT *val, int set_current)
{
BUFHEAD *save_p;
uint16_t *bp, len, off, save_addr;
char *tp;
bp = (uint16_t *)(void *)bufp->page;
while (bp[ndx + 1] == PARTIAL_KEY) {
bufp = __get_buf(hashp, (uint32_t)bp[bp[0] - 1], bufp, 0);
if (!bufp)
return (-1);
bp = (uint16_t *)(void *)bufp->page;
ndx = 1;
}
if (bp[ndx + 1] == FULL_KEY) {
bufp = __get_buf(hashp, (uint32_t)bp[bp[0] - 1], bufp, 0);
if (!bufp)
return (-1);
bp = (uint16_t *)(void *)bufp->page;
save_p = bufp;
save_addr = save_p->addr;
off = bp[1];
len = 0;
} else
if (!FREESPACE(bp)) {
/*
* This is a hack. We can't distinguish between
* FULL_KEY_DATA that contains complete data or
* incomplete data, so we require that if the data
* is complete, there is at least 1 byte of free
* space left.
*/
off = bp[bp[0]];
len = bp[1] - off;
save_p = bufp;
save_addr = bufp->addr;
bufp = __get_buf(hashp, (uint32_t)bp[bp[0] - 1], bufp,
0);
if (!bufp)
return (-1);
bp = (uint16_t *)(void *)bufp->page;
} else {
/* The data is all on one page. */
tp = (char *)(void *)bp;
off = bp[bp[0]];
val->data = (uint8_t *)tp + off;
val->size = bp[1] - off;
if (set_current) {
if (bp[0] == 2) { /* No more buckets in
* chain */
hashp->cpage = NULL;
hashp->cbucket++;
hashp->cndx = 1;
} else {
hashp->cpage = __get_buf(hashp,
(uint32_t)bp[bp[0] - 1], bufp, 0);
if (!hashp->cpage)
return (-1);
hashp->cndx = 1;
if (!((uint16_t *)(void *)
hashp->cpage->page)[0]) {
hashp->cbucket++;
hashp->cpage = NULL;
}
}
}
return (0);
}
val->size = collect_data(hashp, bufp, (int)len, set_current);
if (val->size == (size_t)-1)
return (-1);
if (save_p->addr != save_addr) {
/* We are pretty short on buffers. */
errno = EINVAL; /* OUT OF BUFFERS */
return (-1);
}
memmove(hashp->tmp_buf, (save_p->page) + off, (size_t)len);
val->data = (uint8_t *)hashp->tmp_buf;
return (0);
}
/*
* Count how big the total datasize is by recursing through the pages. Then
* allocate a buffer and copy the data as you recurse up.
*/
static int
collect_data(HTAB *hashp, BUFHEAD *bufp, int len, int set)
{
uint16_t *bp;
char *p;
BUFHEAD *xbp;
uint16_t save_addr;
int mylen, totlen;
p = bufp->page;
bp = (uint16_t *)(void *)p;
mylen = hashp->BSIZE - bp[1];
save_addr = bufp->addr;
if (bp[2] == FULL_KEY_DATA) { /* End of Data */
totlen = len + mylen;
if (hashp->tmp_buf)
free(hashp->tmp_buf);
if ((hashp->tmp_buf = calloc(1, (size_t)totlen)) == NULL)
return (-1);
if (set) {
hashp->cndx = 1;
if (bp[0] == 2) { /* No more buckets in chain */
hashp->cpage = NULL;
hashp->cbucket++;
} else {
hashp->cpage =
__get_buf(hashp, (uint32_t)bp[bp[0] - 1],
bufp, 0);
if (!hashp->cpage)
return (-1);
else if (!((uint16_t *)(void *)hashp->cpage->page)[0]) {
hashp->cbucket++;
hashp->cpage = NULL;
}
}
}
} else {
xbp = __get_buf(hashp, (uint32_t)bp[bp[0] - 1], bufp, 0);
if (!xbp || ((totlen =
collect_data(hashp, xbp, len + mylen, set)) < 1))
return (-1);
}
if (bufp->addr != save_addr) {
errno = EINVAL; /* Out of buffers. */
return (-1);
}
memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], (size_t)mylen);
return (totlen);
}
/*
* Fill in the key and data for this big pair.
*/
int
__big_keydata(HTAB *hashp, BUFHEAD *bufp, DBT *key, DBT *val, int set)
{
key->size = collect_key(hashp, bufp, 0, val, set);
if (key->size == (size_t)-1)
return (-1);
key->data = (uint8_t *)hashp->tmp_key;
return (0);
}
/*
* Count how big the total key size is by recursing through the pages. Then
* collect the data, allocate a buffer and copy the key as you recurse up.
*/
static int
collect_key(HTAB *hashp, BUFHEAD *bufp, int len, DBT *val, int set)
{
BUFHEAD *xbp;
char *p;
int mylen, totlen;
uint16_t *bp, save_addr;
p = bufp->page;
bp = (uint16_t *)(void *)p;
mylen = hashp->BSIZE - bp[1];
save_addr = bufp->addr;
totlen = len + mylen;
if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */
if (hashp->tmp_key != NULL)
free(hashp->tmp_key);
if ((hashp->tmp_key = calloc(1, (size_t)totlen)) == NULL)
return (-1);
if (__big_return(hashp, bufp, 1, val, set))
return (-1);
} else {
xbp = __get_buf(hashp, (uint32_t)bp[bp[0] - 1], bufp, 0);
if (!xbp || ((totlen =
collect_key(hashp, xbp, totlen, val, set)) < 1))
return (-1);
}
if (bufp->addr != save_addr) {
errno = EINVAL; /* MIS -- OUT OF BUFFERS */
return (-1);
}
memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], (size_t)mylen);
return (totlen);
}
/*
* Returns:
* 0 => OK
* -1 => error
*/
int
__big_split(
HTAB *hashp,
BUFHEAD *op, /* Pointer to where to put keys that go in old bucket */
BUFHEAD *np, /* Pointer to new bucket page */
/* Pointer to first page containing the big key/data */
BUFHEAD *big_keyp,
int addr, /* Address of big_keyp */
uint32_t obucket,/* Old Bucket */
SPLIT_RETURN *ret
)
{
BUFHEAD *tmpp;
uint16_t *tp;
BUFHEAD *bp;
DBT key, val;
uint32_t change;
uint16_t free_space, n, off;
size_t temp;
bp = big_keyp;
/* Now figure out where the big key/data goes */
if (__big_keydata(hashp, big_keyp, &key, &val, 0))
return (-1);
change = (__call_hash(hashp, key.data, (int)key.size) != obucket);
if ((ret->next_addr = __find_last_page(hashp, &big_keyp)) != 0) {
if (!(ret->nextp =
__get_buf(hashp, (uint32_t)ret->next_addr, big_keyp, 0)))
return (-1);
} else
ret->nextp = NULL;
/* Now make one of np/op point to the big key/data pair */
_DIAGASSERT(np->ovfl == NULL);
if (change)
tmpp = np;
else
tmpp = op;
tmpp->flags |= BUF_MOD;
#ifdef DEBUG1
(void)fprintf(stderr,
"BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
(tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0));
#endif
tmpp->ovfl = bp; /* one of op/np point to big_keyp */
tp = (uint16_t *)(void *)tmpp->page;
_DIAGASSERT(FREESPACE(tp) >= OVFLSIZE);
n = tp[0];
off = OFFSET(tp);
free_space = FREESPACE(tp);
tp[++n] = (uint16_t)addr;
tp[++n] = OVFLPAGE;
tp[0] = n;
OFFSET(tp) = off;
temp = free_space - OVFLSIZE;
_DBFIT(temp, uint16_t);
FREESPACE(tp) = (uint16_t)temp;
/*
* Finally, set the new and old return values. BIG_KEYP contains a
* pointer to the last page of the big key_data pair. Make sure that
* big_keyp has no following page (2 elements) or create an empty
* following page.
*/
ret->newp = np;
ret->oldp = op;
tp = (uint16_t *)(void *)big_keyp->page;
big_keyp->flags |= BUF_MOD;
if (tp[0] > 2) {
/*
* There may be either one or two offsets on this page. If
* there is one, then the overflow page is linked on normally
* and tp[4] is OVFLPAGE. If there are two, tp[4] contains
* the second offset and needs to get stuffed in after the
* next overflow page is added.
*/
n = tp[4];
free_space = FREESPACE(tp);
off = OFFSET(tp);
tp[0] -= 2;
temp = free_space + OVFLSIZE;
_DBFIT(temp, uint16_t);
FREESPACE(tp) = (uint16_t)temp;
OFFSET(tp) = off;
tmpp = __add_ovflpage(hashp, big_keyp);
if (!tmpp)
return (-1);
tp[4] = n;
} else
tmpp = big_keyp;
if (change)
ret->newp = tmpp;
else
ret->oldp = tmpp;
return (0);
}

346
lib/libc/db/hash/hash_buf.c Normal file
View file

@ -0,0 +1,346 @@
/* $NetBSD: hash_buf.c,v 1.18 2009/04/23 22:09:23 christos Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: hash_buf.c,v 1.18 2009/04/23 22:09:23 christos Exp $");
#endif
/*
* PACKAGE: hash
*
* DESCRIPTION:
* Contains buffer management
*
* ROUTINES:
* External
* __buf_init
* __get_buf
* __buf_free
* __reclaim_buf
* Internal
* newbuf
*/
#include <sys/param.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <db.h>
#include "hash.h"
#include "page.h"
#include "extern.h"
static BUFHEAD *newbuf(HTAB *, uint32_t, BUFHEAD *);
/* Unlink B from its place in the lru */
#define BUF_REMOVE(B) { \
(B)->prev->next = (B)->next; \
(B)->next->prev = (B)->prev; \
}
/* Insert B after P */
#define BUF_INSERT(B, P) { \
(B)->next = (P)->next; \
(B)->prev = (P); \
(P)->next = (B); \
(B)->next->prev = (B); \
}
#define MRU hashp->bufhead.next
#define LRU hashp->bufhead.prev
#define MRU_INSERT(B) BUF_INSERT((B), &hashp->bufhead)
#define LRU_INSERT(B) BUF_INSERT((B), LRU)
#ifndef _DIAGASSERT
#define _DIAGASSERT assert
#endif
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
/*
* We are looking for a buffer with address "addr". If prev_bp is NULL, then
* address is a bucket index. If prev_bp is not NULL, then it points to the
* page previous to an overflow page that we are trying to find.
*
* CAVEAT: The buffer header accessed via prev_bp's ovfl field may no longer
* be valid. Therefore, you must always verify that its address matches the
* address you are seeking.
*/
BUFHEAD *
__get_buf(
HTAB *hashp,
uint32_t addr,
BUFHEAD *prev_bp,
int newpage /* If prev_bp set, indicates a new overflow page. */
)
{
BUFHEAD *bp;
uint32_t is_disk_mask;
int is_disk, segment_ndx = 0; /* pacify gcc */
SEGMENT segp = NULL; /* pacify gcc */
is_disk = 0;
is_disk_mask = 0;
if (prev_bp) {
bp = prev_bp->ovfl;
if (!bp || (bp->addr != addr))
bp = NULL;
if (!newpage)
is_disk = BUF_DISK;
} else {
/* Grab buffer out of directory */
segment_ndx = addr & (hashp->SGSIZE - 1);
/* valid segment ensured by __call_hash() */
segp = hashp->dir[addr >> hashp->SSHIFT];
_DIAGASSERT(segp != NULL);
bp = PTROF(segp[segment_ndx]);
is_disk_mask = ISDISK(segp[segment_ndx]);
is_disk = is_disk_mask || !hashp->new_file;
}
if (!bp) {
bp = newbuf(hashp, addr, prev_bp);
if (!bp ||
__get_page(hashp, bp->page, addr, !prev_bp, is_disk, 0))
return (NULL);
if (!prev_bp)
segp[segment_ndx] =
(BUFHEAD *)(void *)((u_long)bp | is_disk_mask);
} else {
BUF_REMOVE(bp);
MRU_INSERT(bp);
}
return (bp);
}
/*
* We need a buffer for this page. Either allocate one, or evict a resident
* one (if we have as many buffers as we're allowed) and put this one in.
*
* If newbuf finds an error (returning NULL), it also sets errno.
*/
static BUFHEAD *
newbuf(HTAB *hashp, uint32_t addr, BUFHEAD *prev_bp)
{
BUFHEAD *bp; /* The buffer we're going to use */
BUFHEAD *xbp; /* Temp pointer */
BUFHEAD *next_xbp;
SEGMENT segp;
int segment_ndx;
uint16_t oaddr, *shortp;
oaddr = 0;
bp = LRU;
/*
* If LRU buffer is pinned, the buffer pool is too small. We need to
* allocate more buffers.
*/
if (hashp->nbufs || (bp->flags & BUF_PIN)) {
/* Allocate a new one */
if ((bp = calloc(1, sizeof(BUFHEAD))) == NULL)
return (NULL);
if ((bp->page = calloc(1, (size_t)hashp->BSIZE)) == NULL) {
free(bp);
return (NULL);
}
if (hashp->nbufs)
hashp->nbufs--;
} else {
/* Kick someone out */
BUF_REMOVE(bp);
/*
* If this is an overflow page with addr 0, it's already been
* flushed back in an overflow chain and initialized.
*/
if ((bp->addr != 0) || (bp->flags & BUF_BUCKET)) {
/*
* Set oaddr before __put_page so that you get it
* before bytes are swapped.
*/
shortp = (uint16_t *)(void *)bp->page;
if (shortp[0])
oaddr = shortp[shortp[0] - 1];
if ((bp->flags & BUF_MOD) && __put_page(hashp, bp->page,
bp->addr, (int)IS_BUCKET(bp->flags), 0))
return (NULL);
/*
* Update the pointer to this page (i.e. invalidate it).
*
* If this is a new file (i.e. we created it at open
* time), make sure that we mark pages which have been
* written to disk so we retrieve them from disk later,
* rather than allocating new pages.
*/
if (IS_BUCKET(bp->flags)) {
segment_ndx = bp->addr & (hashp->SGSIZE - 1);
segp = hashp->dir[bp->addr >> hashp->SSHIFT];
_DIAGASSERT(segp != NULL);
if (hashp->new_file &&
((bp->flags & BUF_MOD) ||
ISDISK(segp[segment_ndx])))
segp[segment_ndx] = (BUFHEAD *)BUF_DISK;
else
segp[segment_ndx] = NULL;
}
/*
* Since overflow pages can only be access by means of
* their bucket, free overflow pages associated with
* this bucket.
*/
for (xbp = bp; xbp->ovfl;) {
next_xbp = xbp->ovfl;
xbp->ovfl = 0;
xbp = next_xbp;
/* Check that ovfl pointer is up date. */
if (IS_BUCKET(xbp->flags) ||
(oaddr != xbp->addr))
break;
shortp = (uint16_t *)(void *)xbp->page;
if (shortp[0])
/* set before __put_page */
oaddr = shortp[shortp[0] - 1];
if ((xbp->flags & BUF_MOD) && __put_page(hashp,
xbp->page, xbp->addr, 0, 0))
return (NULL);
xbp->addr = 0;
xbp->flags = 0;
BUF_REMOVE(xbp);
LRU_INSERT(xbp);
}
}
}
/* Now assign this buffer */
bp->addr = addr;
#ifdef DEBUG1
(void)fprintf(stderr, "NEWBUF1: %d->ovfl was %d is now %d\n",
bp->addr, (bp->ovfl ? bp->ovfl->addr : 0), 0);
#endif
bp->ovfl = NULL;
if (prev_bp) {
/*
* If prev_bp is set, this is an overflow page, hook it in to
* the buffer overflow links.
*/
#ifdef DEBUG1
(void)fprintf(stderr, "NEWBUF2: %d->ovfl was %d is now %d\n",
prev_bp->addr, (prev_bp->ovfl ? prev_bp->ovfl->addr : 0),
(bp ? bp->addr : 0));
#endif
prev_bp->ovfl = bp;
bp->flags = 0;
} else
bp->flags = BUF_BUCKET;
MRU_INSERT(bp);
return (bp);
}
void
__buf_init(HTAB *hashp, u_int nbytes)
{
BUFHEAD *bfp;
int npages;
bfp = &(hashp->bufhead);
npages = (unsigned int)(nbytes + hashp->BSIZE - 1) >> hashp->BSHIFT;
npages = MAX(npages, MIN_BUFFERS);
hashp->nbufs = npages;
bfp->next = bfp;
bfp->prev = bfp;
/*
* This space is calloc'd so these are already null.
*
* bfp->ovfl = NULL;
* bfp->flags = 0;
* bfp->page = NULL;
* bfp->addr = 0;
*/
}
int
__buf_free(HTAB *hashp, int do_free, int to_disk)
{
BUFHEAD *bp;
/* Need to make sure that buffer manager has been initialized */
if (!LRU)
return (0);
for (bp = LRU; bp != &hashp->bufhead;) {
/* Check that the buffer is valid */
if (bp->addr || IS_BUCKET(bp->flags)) {
if (to_disk && (bp->flags & BUF_MOD) &&
__put_page(hashp, bp->page,
bp->addr, IS_BUCKET(bp->flags), 0))
return (-1);
}
/* Check if we are freeing stuff */
if (do_free) {
if (bp->page) {
(void)memset(bp->page, 0, (size_t)hashp->BSIZE);
free(bp->page);
}
BUF_REMOVE(bp);
free(bp);
bp = LRU;
} else
bp = bp->prev;
}
return (0);
}
void
__reclaim_buf(HTAB *hashp, BUFHEAD *bp)
{
bp->ovfl = 0;
bp->addr = 0;
bp->flags = 0;
BUF_REMOVE(bp);
LRU_INSERT(bp);
}

View file

@ -0,0 +1,210 @@
/* $NetBSD: hash_func.c,v 1.13 2008/09/10 17:52:35 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: hash_func.c,v 1.13 2008/09/10 17:52:35 joerg Exp $");
#endif
#include <sys/types.h>
#include <db.h>
#include "hash.h"
#include "page.h"
#include "extern.h"
#if 0
static uint32_t hash1(const void *, size_t) __attribute__((__unused__));
static uint32_t hash2(const void *, size_t) __attribute__((__unused__));
static uint32_t hash3(const void *, size_t) __attribute__((__unused__));
#endif
static uint32_t hash4(const void *, size_t) __attribute__((__unused__));
/* Global default hash function */
uint32_t (*__default_hash)(const void *, size_t) = hash4;
#if 0
/*
* HASH FUNCTIONS
*
* Assume that we've already split the bucket to which this key hashes,
* calculate that bucket, and check that in fact we did already split it.
*
* This came from ejb's hsearch.
*/
#define PRIME1 37
#define PRIME2 1048583
static uint32_t
hash1(const void *keyarg, size_t len)
{
const uint8_t *key;
uint32_t h;
/* Convert string to integer */
for (key = keyarg, h = 0; len--;)
h = h * PRIME1 ^ (*key++ - ' ');
h %= PRIME2;
return (h);
}
/*
* Phong's linear congruential hash
*/
#define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
static uint32_t
hash2(const void *keyarg, size_t len)
{
const uint8_t *e, *key;
uint32_t h;
uint8_t c;
key = keyarg;
e = key + len;
for (h = 0; key != e;) {
c = *key++;
if (!c && key > e)
break;
dcharhash(h, c);
}
return (h);
}
/*
* This is INCREDIBLY ugly, but fast. We break the string up into 8 byte
* units. On the first time through the loop we get the "leftover bytes"
* (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle
* all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If
* this routine is heavily used enough, it's worth the ugly coding.
*
* OZ's original sdbm hash
*/
static uint32_t
hash3(const void *keyarg, size_t len)
{
const uint8_t *key;
size_t loop;
uint32_t h;
#define HASHC h = *key++ + 65599 * h
h = 0;
key = keyarg;
if (len > 0) {
loop = (len + 8 - 1) >> 3;
switch (len & (8 - 1)) {
case 0:
do {
HASHC;
/* FALLTHROUGH */
case 7:
HASHC;
/* FALLTHROUGH */
case 6:
HASHC;
/* FALLTHROUGH */
case 5:
HASHC;
/* FALLTHROUGH */
case 4:
HASHC;
/* FALLTHROUGH */
case 3:
HASHC;
/* FALLTHROUGH */
case 2:
HASHC;
/* FALLTHROUGH */
case 1:
HASHC;
} while (--loop);
}
}
return (h);
}
#endif
/* Hash function from Chris Torek. */
static uint32_t
hash4(const void *keyarg, size_t len)
{
const uint8_t *key;
size_t loop;
uint32_t h;
#define HASH4a h = (h << 5) - h + *key++;
#define HASH4b h = (h << 5) + h + *key++;
#define HASH4 HASH4b
h = 0;
key = keyarg;
if (len > 0) {
loop = (len + 8 - 1) >> 3;
switch (len & (8 - 1)) {
case 0:
do {
HASH4;
/* FALLTHROUGH */
case 7:
HASH4;
/* FALLTHROUGH */
case 6:
HASH4;
/* FALLTHROUGH */
case 5:
HASH4;
/* FALLTHROUGH */
case 4:
HASH4;
/* FALLTHROUGH */
case 3:
HASH4;
/* FALLTHROUGH */
case 2:
HASH4;
/* FALLTHROUGH */
case 1:
HASH4;
} while (--loop);
}
}
return (h);
}

View file

@ -0,0 +1,64 @@
/* $NetBSD: hash_log2.c,v 1.13 2008/09/11 12:33:55 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: hash_log2.c,v 1.13 2008/09/11 12:33:55 joerg Exp $");
#endif
#include <sys/types.h>
#include <db.h>
#include "hash.h"
#include "page.h"
#include "extern.h"
uint32_t
__log2(uint32_t num)
{
uint32_t i, limit;
if (num == 0)
return 0;
--num;
limit = 0;
for (i = 0; limit < num; limit = limit * 2 + 1, i++)
continue;
return (i);
}

View file

@ -0,0 +1,989 @@
/* $NetBSD: hash_page.c,v 1.23 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: hash_page.c,v 1.23 2008/09/11 12:58:00 joerg Exp $");
#endif
/*
* PACKAGE: hashing
*
* DESCRIPTION:
* Page manipulation for hashing package.
*
* ROUTINES:
*
* External
* __get_page
* __add_ovflpage
* Internal
* overflow_page
* open_temp
*/
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#ifndef __minix
#include <paths.h>
#else
#define _PATH_TMP "/tmp/"
#endif
#include <assert.h>
#include <db.h>
#include "hash.h"
#include "page.h"
#include "extern.h"
#ifndef LITTLE_ENDIAN
# define LITTLE_ENDIAN 1234
#endif
#ifndef BIG_ENDIAN
# define BIG_ENDIAN 4321
#endif
#ifndef BYTE_ORDER
#define BYTE_ORDER LITTLE_ENDIAN
#endif
#ifndef _DIAGASSERT
#define _DIAGASSERT assert
#endif
static uint32_t *fetch_bitmap(HTAB *, int);
static uint32_t first_free(uint32_t);
static int open_temp(HTAB *);
static uint16_t overflow_page(HTAB *);
static void putpair(char *, const DBT *, const DBT *);
static void squeeze_key(uint16_t *, const DBT *, const DBT *);
static int ugly_split(HTAB *, uint32_t, BUFHEAD *, BUFHEAD *, int, int);
#define PAGE_INIT(P) { \
((uint16_t *)(void *)(P))[0] = 0; \
temp = 3 * sizeof(uint16_t); \
_DIAGASSERT(hashp->BSIZE >= temp); \
((uint16_t *)(void *)(P))[1] = (uint16_t)(hashp->BSIZE - temp); \
((uint16_t *)(void *)(P))[2] = hashp->BSIZE; \
}
/*
* This is called AFTER we have verified that there is room on the page for
* the pair (PAIRFITS has returned true) so we go right ahead and start moving
* stuff on.
*/
static void
putpair(char *p, const DBT *key, const DBT *val)
{
uint16_t *bp, n, off;
size_t temp;
bp = (uint16_t *)(void *)p;
/* Enter the key first. */
n = bp[0];
temp = OFFSET(bp);
_DIAGASSERT(temp >= key->size);
off = (uint16_t)(temp - key->size);
memmove(p + off, key->data, key->size);
bp[++n] = off;
/* Now the data. */
_DIAGASSERT(off >= val->size);
off -= (uint16_t)val->size;
memmove(p + off, val->data, val->size);
bp[++n] = off;
/* Adjust page info. */
bp[0] = n;
temp = (n + 3) * sizeof(uint16_t);
_DIAGASSERT(off >= temp);
bp[n + 1] = (uint16_t)(off - temp);
bp[n + 2] = off;
}
/*
* Returns:
* 0 OK
* -1 error
*/
int
__delpair(HTAB *hashp, BUFHEAD *bufp, int ndx)
{
uint16_t *bp, newoff;
int n;
uint16_t pairlen;
size_t temp;
bp = (uint16_t *)(void *)bufp->page;
n = bp[0];
if (bp[ndx + 1] < REAL_KEY)
return (__big_delete(hashp, bufp));
if (ndx != 1)
newoff = bp[ndx - 1];
else
newoff = hashp->BSIZE;
pairlen = newoff - bp[ndx + 1];
if (ndx != (n - 1)) {
/* Hard Case -- need to shuffle keys */
int i;
char *src = bufp->page + (int)OFFSET(bp);
char *dst = src + (int)pairlen;
memmove(dst, src, (size_t)(bp[ndx + 1] - OFFSET(bp)));
/* Now adjust the pointers */
for (i = ndx + 2; i <= n; i += 2) {
if (bp[i + 1] == OVFLPAGE) {
bp[i - 2] = bp[i];
bp[i - 1] = bp[i + 1];
} else {
bp[i - 2] = bp[i] + pairlen;
bp[i - 1] = bp[i + 1] + pairlen;
}
}
}
/* Finally adjust the page data */
bp[n] = OFFSET(bp) + pairlen;
temp = bp[n + 1] + pairlen + 2 * sizeof(uint16_t);
_DIAGASSERT(temp <= 0xffff);
bp[n - 1] = (uint16_t)temp;
bp[0] = n - 2;
hashp->NKEYS--;
bufp->flags |= BUF_MOD;
return (0);
}
/*
* Returns:
* 0 ==> OK
* -1 ==> Error
*/
int
__split_page(HTAB *hashp, uint32_t obucket, uint32_t nbucket)
{
BUFHEAD *new_bufp, *old_bufp;
uint16_t *ino;
char *np;
DBT key, val;
int n, ndx, retval;
uint16_t copyto, diff, off, moved;
char *op;
size_t temp;
copyto = (uint16_t)hashp->BSIZE;
off = (uint16_t)hashp->BSIZE;
old_bufp = __get_buf(hashp, obucket, NULL, 0);
if (old_bufp == NULL)
return (-1);
new_bufp = __get_buf(hashp, nbucket, NULL, 0);
if (new_bufp == NULL)
return (-1);
old_bufp->flags |= (BUF_MOD | BUF_PIN);
new_bufp->flags |= (BUF_MOD | BUF_PIN);
ino = (uint16_t *)(void *)(op = old_bufp->page);
np = new_bufp->page;
moved = 0;
for (n = 1, ndx = 1; n < ino[0]; n += 2) {
if (ino[n + 1] < REAL_KEY) {
retval = ugly_split(hashp, obucket, old_bufp, new_bufp,
(int)copyto, (int)moved);
old_bufp->flags &= ~BUF_PIN;
new_bufp->flags &= ~BUF_PIN;
return (retval);
}
key.data = (uint8_t *)op + ino[n];
key.size = off - ino[n];
if (__call_hash(hashp, key.data, (int)key.size) == obucket) {
/* Don't switch page */
diff = copyto - off;
if (diff) {
copyto = ino[n + 1] + diff;
memmove(op + copyto, op + ino[n + 1],
(size_t)(off - ino[n + 1]));
ino[ndx] = copyto + ino[n] - ino[n + 1];
ino[ndx + 1] = copyto;
} else
copyto = ino[n + 1];
ndx += 2;
} else {
/* Switch page */
val.data = (uint8_t *)op + ino[n + 1];
val.size = ino[n] - ino[n + 1];
putpair(np, &key, &val);
moved += 2;
}
off = ino[n + 1];
}
/* Now clean up the page */
ino[0] -= moved;
temp = sizeof(uint16_t) * (ino[0] + 3);
_DIAGASSERT(copyto >= temp);
FREESPACE(ino) = (uint16_t)(copyto - temp);
OFFSET(ino) = copyto;
#ifdef DEBUG3
(void)fprintf(stderr, "split %d/%d\n",
((uint16_t *)np)[0] / 2,
((uint16_t *)op)[0] / 2);
#endif
/* unpin both pages */
old_bufp->flags &= ~BUF_PIN;
new_bufp->flags &= ~BUF_PIN;
return (0);
}
/*
* Called when we encounter an overflow or big key/data page during split
* handling. This is special cased since we have to begin checking whether
* the key/data pairs fit on their respective pages and because we may need
* overflow pages for both the old and new pages.
*
* The first page might be a page with regular key/data pairs in which case
* we have a regular overflow condition and just need to go on to the next
* page or it might be a big key/data pair in which case we need to fix the
* big key/data pair.
*
* Returns:
* 0 ==> success
* -1 ==> failure
*/
static int
ugly_split(
HTAB *hashp,
uint32_t obucket, /* Same as __split_page. */
BUFHEAD *old_bufp,
BUFHEAD *new_bufp,
int copyto, /* First byte on page which contains key/data values. */
int moved /* Number of pairs moved to new page. */
)
{
BUFHEAD *bufp; /* Buffer header for ino */
uint16_t *ino; /* Page keys come off of */
uint16_t *np; /* New page */
uint16_t *op; /* Page keys go on to if they aren't moving */
size_t temp;
BUFHEAD *last_bfp; /* Last buf header OVFL needing to be freed */
DBT key, val;
SPLIT_RETURN ret;
uint16_t n, off, ov_addr, scopyto;
char *cino; /* Character value of ino */
bufp = old_bufp;
ino = (uint16_t *)(void *)old_bufp->page;
np = (uint16_t *)(void *)new_bufp->page;
op = (uint16_t *)(void *)old_bufp->page;
last_bfp = NULL;
scopyto = (uint16_t)copyto; /* ANSI */
n = ino[0] - 1;
while (n < ino[0]) {
if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) {
if (__big_split(hashp, old_bufp,
new_bufp, bufp, (int)bufp->addr, obucket, &ret))
return (-1);
old_bufp = ret.oldp;
if (!old_bufp)
return (-1);
op = (uint16_t *)(void *)old_bufp->page;
new_bufp = ret.newp;
if (!new_bufp)
return (-1);
np = (uint16_t *)(void *)new_bufp->page;
bufp = ret.nextp;
if (!bufp)
return (0);
cino = (char *)bufp->page;
ino = (uint16_t *)(void *)cino;
last_bfp = ret.nextp;
} else if (ino[n + 1] == OVFLPAGE) {
ov_addr = ino[n];
/*
* Fix up the old page -- the extra 2 are the fields
* which contained the overflow information.
*/
ino[0] -= (moved + 2);
temp = sizeof(uint16_t) * (ino[0] + 3);
_DIAGASSERT(scopyto >= temp);
FREESPACE(ino) = (uint16_t)(scopyto - temp);
OFFSET(ino) = scopyto;
bufp = __get_buf(hashp, (uint32_t)ov_addr, bufp, 0);
if (!bufp)
return (-1);
ino = (uint16_t *)(void *)bufp->page;
n = 1;
scopyto = hashp->BSIZE;
moved = 0;
if (last_bfp)
__free_ovflpage(hashp, last_bfp);
last_bfp = bufp;
}
/* Move regular sized pairs of there are any */
off = hashp->BSIZE;
for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) {
cino = (char *)(void *)ino;
key.data = (uint8_t *)cino + ino[n];
key.size = off - ino[n];
val.data = (uint8_t *)cino + ino[n + 1];
val.size = ino[n] - ino[n + 1];
off = ino[n + 1];
if (__call_hash(hashp, key.data, (int)key.size) == obucket) {
/* Keep on old page */
if (PAIRFITS(op, (&key), (&val)))
putpair((char *)(void *)op, &key, &val);
else {
old_bufp =
__add_ovflpage(hashp, old_bufp);
if (!old_bufp)
return (-1);
op = (uint16_t *)(void *)old_bufp->page;
putpair((char *)(void *)op, &key, &val);
}
old_bufp->flags |= BUF_MOD;
} else {
/* Move to new page */
if (PAIRFITS(np, (&key), (&val)))
putpair((char *)(void *)np, &key, &val);
else {
new_bufp =
__add_ovflpage(hashp, new_bufp);
if (!new_bufp)
return (-1);
np = (uint16_t *)(void *)new_bufp->page;
putpair((char *)(void *)np, &key, &val);
}
new_bufp->flags |= BUF_MOD;
}
}
}
if (last_bfp)
__free_ovflpage(hashp, last_bfp);
return (0);
}
/*
* Add the given pair to the page
*
* Returns:
* 0 ==> OK
* 1 ==> failure
*/
int
__addel(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val)
{
uint16_t *bp, *sop;
int do_expand;
bp = (uint16_t *)(void *)bufp->page;
do_expand = 0;
while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY))
/* Exception case */
if (bp[2] == FULL_KEY_DATA && bp[0] == 2)
/* This is the last page of a big key/data pair
and we need to add another page */
break;
else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) {
bufp = __get_buf(hashp, (uint32_t)bp[bp[0] - 1], bufp,
0);
if (!bufp)
return (-1);
bp = (uint16_t *)(void *)bufp->page;
} else if (bp[bp[0]] != OVFLPAGE) {
/* Short key/data pairs, no more pages */
break;
} else {
/* Try to squeeze key on this page */
if (bp[2] >= REAL_KEY &&
FREESPACE(bp) >= PAIRSIZE(key, val)) {
squeeze_key(bp, key, val);
goto stats;
} else {
bufp = __get_buf(hashp,
(uint32_t)bp[bp[0] - 1], bufp, 0);
if (!bufp)
return (-1);
bp = (uint16_t *)(void *)bufp->page;
}
}
if (PAIRFITS(bp, key, val))
putpair(bufp->page, key, val);
else {
do_expand = 1;
bufp = __add_ovflpage(hashp, bufp);
if (!bufp)
return (-1);
sop = (uint16_t *)(void *)bufp->page;
if (PAIRFITS(sop, key, val))
putpair((char *)(void *)sop, key, val);
else
if (__big_insert(hashp, bufp, key, val))
return (-1);
}
stats:
bufp->flags |= BUF_MOD;
/*
* If the average number of keys per bucket exceeds the fill factor,
* expand the table.
*/
hashp->NKEYS++;
if (do_expand ||
(hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR))
return (__expand_table(hashp));
return (0);
}
/*
*
* Returns:
* pointer on success
* NULL on error
*/
BUFHEAD *
__add_ovflpage(HTAB *hashp, BUFHEAD *bufp)
{
uint16_t *sp;
uint16_t ndx, ovfl_num;
size_t temp;
#ifdef DEBUG1
int tmp1, tmp2;
#endif
sp = (uint16_t *)(void *)bufp->page;
/* Check if we are dynamically determining the fill factor */
if (hashp->FFACTOR == DEF_FFACTOR) {
hashp->FFACTOR = (uint32_t)sp[0] >> 1;
if (hashp->FFACTOR < MIN_FFACTOR)
hashp->FFACTOR = MIN_FFACTOR;
}
bufp->flags |= BUF_MOD;
ovfl_num = overflow_page(hashp);
#ifdef DEBUG1
tmp1 = bufp->addr;
tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0;
#endif
if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, (uint32_t)ovfl_num,
bufp, 1)))
return (NULL);
bufp->ovfl->flags |= BUF_MOD;
#ifdef DEBUG1
(void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n",
tmp1, tmp2, bufp->ovfl->addr);
#endif
ndx = sp[0];
/*
* Since a pair is allocated on a page only if there's room to add
* an overflow page, we know that the OVFL information will fit on
* the page.
*/
sp[ndx + 4] = OFFSET(sp);
temp = FREESPACE(sp);
_DIAGASSERT(temp >= OVFLSIZE);
sp[ndx + 3] = (uint16_t)(temp - OVFLSIZE);
sp[ndx + 1] = ovfl_num;
sp[ndx + 2] = OVFLPAGE;
sp[0] = ndx + 2;
#ifdef HASH_STATISTICS
hash_overflows++;
#endif
return (bufp->ovfl);
}
/*
* Returns:
* 0 indicates SUCCESS
* -1 indicates FAILURE
*/
int
__get_page(HTAB *hashp, char *p, uint32_t bucket, int is_bucket, int is_disk,
int is_bitmap)
{
int fd, page, size;
ssize_t rsize;
uint16_t *bp;
size_t temp;
fd = hashp->fp;
size = hashp->BSIZE;
if ((fd == -1) || !is_disk) {
PAGE_INIT(p);
return (0);
}
if (is_bucket)
page = BUCKET_TO_PAGE(bucket);
else
page = OADDR_TO_PAGE(bucket);
if ((rsize = pread(fd, p, (size_t)size, (off_t)page << hashp->BSHIFT)) == -1)
return (-1);
bp = (uint16_t *)(void *)p;
if (!rsize)
bp[0] = 0; /* We hit the EOF, so initialize a new page */
else
if (rsize != size) {
errno = EFTYPE;
return (-1);
}
if (!is_bitmap && !bp[0]) {
PAGE_INIT(p);
} else
if (hashp->LORDER != BYTE_ORDER) {
int i, max;
if (is_bitmap) {
max = (uint32_t)hashp->BSIZE >> 2; /* divide by 4 */
for (i = 0; i < max; i++)
M_32_SWAP(((int *)(void *)p)[i]);
} else {
M_16_SWAP(bp[0]);
max = bp[0] + 2;
for (i = 1; i <= max; i++)
M_16_SWAP(bp[i]);
}
}
return (0);
}
/*
* Write page p to disk
*
* Returns:
* 0 ==> OK
* -1 ==>failure
*/
int
__put_page(HTAB *hashp, char *p, uint32_t bucket, int is_bucket, int is_bitmap)
{
int fd, page, size;
ssize_t wsize;
size = hashp->BSIZE;
if ((hashp->fp == -1) && open_temp(hashp))
return (-1);
fd = hashp->fp;
if (hashp->LORDER != BYTE_ORDER) {
int i;
int max;
if (is_bitmap) {
max = (uint32_t)hashp->BSIZE >> 2; /* divide by 4 */
for (i = 0; i < max; i++)
M_32_SWAP(((int *)(void *)p)[i]);
} else {
max = ((uint16_t *)(void *)p)[0] + 2;
for (i = 0; i <= max; i++)
M_16_SWAP(((uint16_t *)(void *)p)[i]);
}
}
if (is_bucket)
page = BUCKET_TO_PAGE(bucket);
else
page = OADDR_TO_PAGE(bucket);
if ((wsize = pwrite(fd, p, (size_t)size, (off_t)page << hashp->BSHIFT)) == -1)
/* Errno is set */
return (-1);
if (wsize != size) {
errno = EFTYPE;
return (-1);
}
return (0);
}
#define BYTE_MASK ((1 << INT_BYTE_SHIFT) -1)
/*
* Initialize a new bitmap page. Bitmap pages are left in memory
* once they are read in.
*/
int
__ibitmap(HTAB *hashp, int pnum, int nbits, int ndx)
{
uint32_t *ip;
int clearbytes, clearints;
if ((ip = malloc((size_t)hashp->BSIZE)) == NULL)
return (1);
hashp->nmaps++;
clearints = ((uint32_t)(nbits - 1) >> INT_BYTE_SHIFT) + 1;
clearbytes = clearints << INT_TO_BYTE;
(void)memset(ip, 0, (size_t)clearbytes);
(void)memset(((char *)(void *)ip) + clearbytes, 0xFF,
(size_t)(hashp->BSIZE - clearbytes));
ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK);
SETBIT(ip, 0);
hashp->BITMAPS[ndx] = (uint16_t)pnum;
hashp->mapp[ndx] = ip;
return (0);
}
static uint32_t
first_free(uint32_t map)
{
uint32_t i, mask;
mask = 0x1;
for (i = 0; i < BITS_PER_MAP; i++) {
if (!(mask & map))
return (i);
mask = mask << 1;
}
return (i);
}
static uint16_t
overflow_page(HTAB *hashp)
{
uint32_t *freep = NULL;
int max_free, offset, splitnum;
uint16_t addr;
int bit, first_page, free_bit, free_page, i, in_use_bits, j;
#ifdef DEBUG2
int tmp1, tmp2;
#endif
splitnum = hashp->OVFL_POINT;
max_free = hashp->SPARES[splitnum];
free_page = (uint32_t)(max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT);
free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1);
/* Look through all the free maps to find the first free block */
first_page = (uint32_t)hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT);
for ( i = first_page; i <= free_page; i++ ) {
if (!(freep = (uint32_t *)hashp->mapp[i]) &&
!(freep = fetch_bitmap(hashp, i)))
return (0);
if (i == free_page)
in_use_bits = free_bit;
else
in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1;
if (i == first_page) {
bit = hashp->LAST_FREED &
((hashp->BSIZE << BYTE_SHIFT) - 1);
j = bit / BITS_PER_MAP;
bit = bit & ~(BITS_PER_MAP - 1);
} else {
bit = 0;
j = 0;
}
for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
if (freep[j] != ALL_SET)
goto found;
}
/* No Free Page Found */
hashp->LAST_FREED = hashp->SPARES[splitnum];
hashp->SPARES[splitnum]++;
offset = hashp->SPARES[splitnum] -
(splitnum ? hashp->SPARES[splitnum - 1] : 0);
#define OVMSG "HASH: Out of overflow pages. Increase page size\n"
if (offset > SPLITMASK) {
if (++splitnum >= NCACHED) {
(void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
errno = EFBIG;
return (0);
}
hashp->OVFL_POINT = splitnum;
hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
hashp->SPARES[splitnum-1]--;
offset = 1;
}
/* Check if we need to allocate a new bitmap page */
if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) {
free_page++;
if (free_page >= NCACHED) {
(void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
errno = EFBIG;
return (0);
}
/*
* This is tricky. The 1 indicates that you want the new page
* allocated with 1 clear bit. Actually, you are going to
* allocate 2 pages from this map. The first is going to be
* the map page, the second is the overflow page we were
* looking for. The init_bitmap routine automatically, sets
* the first bit of itself to indicate that the bitmap itself
* is in use. We would explicitly set the second bit, but
* don't have to if we tell init_bitmap not to leave it clear
* in the first place.
*/
if (__ibitmap(hashp,
(int)OADDR_OF(splitnum, offset), 1, free_page))
return (0);
hashp->SPARES[splitnum]++;
#ifdef DEBUG2
free_bit = 2;
#endif
offset++;
if (offset > SPLITMASK) {
if (++splitnum >= NCACHED) {
(void)write(STDERR_FILENO, OVMSG,
sizeof(OVMSG) - 1);
errno = EFBIG;
return (0);
}
hashp->OVFL_POINT = splitnum;
hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
hashp->SPARES[splitnum-1]--;
offset = 0;
}
} else {
/*
* Free_bit addresses the last used bit. Bump it to address
* the first available bit.
*/
free_bit++;
SETBIT(freep, free_bit);
}
/* Calculate address of the new overflow page */
addr = OADDR_OF(splitnum, offset);
#ifdef DEBUG2
(void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
addr, free_bit, free_page);
#endif
return (addr);
found:
bit = bit + first_free(freep[j]);
SETBIT(freep, bit);
#ifdef DEBUG2
tmp1 = bit;
tmp2 = i;
#endif
/*
* Bits are addressed starting with 0, but overflow pages are addressed
* beginning at 1. Bit is a bit addressnumber, so we need to increment
* it to convert it to a page number.
*/
bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT));
if (bit >= hashp->LAST_FREED)
hashp->LAST_FREED = bit - 1;
/* Calculate the split number for this page */
for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++);
offset = (i ? bit - hashp->SPARES[i - 1] : bit);
if (offset >= SPLITMASK) {
(void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
errno = EFBIG;
return (0); /* Out of overflow pages */
}
addr = OADDR_OF(i, offset);
#ifdef DEBUG2
(void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
addr, tmp1, tmp2);
#endif
/* Allocate and return the overflow page */
return (addr);
}
/*
* Mark this overflow page as free.
*/
void
__free_ovflpage(HTAB *hashp, BUFHEAD *obufp)
{
uint16_t addr;
uint32_t *freep;
int bit_address, free_page, free_bit;
uint16_t ndx;
addr = obufp->addr;
#ifdef DEBUG1
(void)fprintf(stderr, "Freeing %d\n", addr);
#endif
ndx = (((uint32_t)addr) >> SPLITSHIFT);
bit_address =
(ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1;
if (bit_address < hashp->LAST_FREED)
hashp->LAST_FREED = bit_address;
free_page = ((uint32_t)bit_address >> (hashp->BSHIFT + BYTE_SHIFT));
free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1);
if (!(freep = hashp->mapp[free_page]))
freep = fetch_bitmap(hashp, free_page);
/*
* This had better never happen. It means we tried to read a bitmap
* that has already had overflow pages allocated off it, and we
* failed to read it from the file.
*/
_DIAGASSERT(freep != NULL);
CLRBIT(freep, free_bit);
#ifdef DEBUG2
(void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n",
obufp->addr, free_bit, free_page);
#endif
__reclaim_buf(hashp, obufp);
}
/*
* Returns:
* 0 success
* -1 failure
*/
static int
open_temp(HTAB *hashp)
{
sigset_t set, oset;
char *envtmp;
char namestr[PATH_MAX];
#ifndef __minix
if (issetugid())
envtmp = NULL;
else
envtmp = getenv("TMPDIR");
#else
envtmp = getenv("TMPDIR");
#endif
if (-1 == snprintf(namestr, sizeof(namestr), "%s/_hashXXXXXX",
envtmp ? envtmp : _PATH_TMP))
return -1;
/* Block signals; make sure file goes away at process exit. */
(void)sigfillset(&set);
(void)sigprocmask(SIG_BLOCK, &set, &oset);
if ((hashp->fp = mkstemp(namestr)) != -1) {
(void)unlink(namestr);
(void)fcntl(hashp->fp, F_SETFD, FD_CLOEXEC);
}
(void)sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL);
return (hashp->fp != -1 ? 0 : -1);
}
/*
* We have to know that the key will fit, but the last entry on the page is
* an overflow pair, so we need to shift things.
*/
static void
squeeze_key(uint16_t *sp, const DBT *key, const DBT *val)
{
char *p;
uint16_t free_space, n, off, pageno;
size_t temp;
p = (char *)(void *)sp;
n = sp[0];
free_space = FREESPACE(sp);
off = OFFSET(sp);
pageno = sp[n - 1];
_DIAGASSERT(off >= key->size);
off -= (uint16_t)key->size;
sp[n - 1] = off;
memmove(p + off, key->data, key->size);
_DIAGASSERT(off >= val->size);
off -= (uint16_t)val->size;
sp[n] = off;
memmove(p + off, val->data, val->size);
sp[0] = n + 2;
sp[n + 1] = pageno;
sp[n + 2] = OVFLPAGE;
temp = PAIRSIZE(key, val);
_DIAGASSERT(free_space >= temp);
FREESPACE(sp) = (uint16_t)(free_space - temp);
OFFSET(sp) = off;
}
static uint32_t *
fetch_bitmap(HTAB *hashp, int ndx)
{
if (ndx >= hashp->nmaps)
return (NULL);
if ((hashp->mapp[ndx] = malloc((size_t)hashp->BSIZE)) == NULL)
return (NULL);
if (__get_page(hashp,
(char *)(void *)hashp->mapp[ndx], (uint32_t)hashp->BITMAPS[ndx], 0, 1, 1)) {
free(hashp->mapp[ndx]);
return (NULL);
}
return (hashp->mapp[ndx]);
}
#ifdef DEBUG4
void print_chain(HTAB *, uint32_t);
void
print_chain(HTAB *hashp, uint32_t addr)
{
BUFHEAD *bufp;
uint16_t *bp, oaddr;
(void)fprintf(stderr, "%d ", addr);
bufp = __get_buf(hashp, addr, NULL, 0);
bp = (uint16_t *)bufp->page;
while (bp[0] && ((bp[bp[0]] == OVFLPAGE) ||
((bp[0] > 2) && bp[2] < REAL_KEY))) {
oaddr = bp[bp[0] - 1];
(void)fprintf(stderr, "%d ", (int)oaddr);
bufp = __get_buf(hashp, (uint32_t)oaddr, bufp, 0);
bp = (uint16_t *)bufp->page;
}
(void)fprintf(stderr, "\n");
}
#endif

119
lib/libc/db/hash/ndbm.c Normal file
View file

@ -0,0 +1,119 @@
/* $NetBSD: ndbm.c,v 1.23 2008/09/11 12:58:00 joerg Exp $ */
/* from: NetBSD: ndbm.c,v 1.18 2004/04/27 20:03:45 kleink Exp */
/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: ndbm.c,v 1.23 2008/09/11 12:58:00 joerg Exp $");
#endif
/*
* This package provides a dbm compatible interface to the new hashing
* package described in db(3).
*/
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/param.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <ndbm.h>
#include "hash.h"
/*
* Returns:
* *DBM on success
* NULL on failure
*/
DBM *
dbm_open(const char *file, int flags, mode_t mode)
{
HASHINFO info;
char path[MAXPATHLEN];
info.bsize = 4096;
info.ffactor = 40;
info.nelem = 1;
info.cachesize = 0;
info.hash = NULL;
info.lorder = 0;
(void)strncpy(path, file, sizeof(path) - 1);
(void)strncat(path, DBM_SUFFIX, sizeof(path) - strlen(path) - 1);
if ((flags & O_ACCMODE) == O_WRONLY) {
flags &= ~O_WRONLY;
flags |= O_RDWR;
}
return ((DBM *)__hash_open(path, flags, mode, &info, 0));
}
void
dbm_close(DBM *db)
{
(void)(db->close)(db);
}
int
dbm_error(DBM *db)
{
HTAB *hp;
hp = db->internal;
return (hp->err);
}
int
dbm_clearerr(DBM *db)
{
HTAB *hp;
hp = db->internal;
hp->err = 0;
return (0);
}
int
dbm_dirfno(DBM *db)
{
HTAB *hp;
hp = db->internal;
return hp->fp;
}

View file

@ -0,0 +1,162 @@
/* $NetBSD: ndbmdatum.c,v 1.4 2008/09/11 12:58:00 joerg Exp $ */
/* from: NetBSD: ndbm.c,v 1.18 2004/04/27 20:03:45 kleink Exp */
/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: ndbmdatum.c,v 1.4 2008/09/11 12:58:00 joerg Exp $");
#endif
/*
* This package provides a dbm compatible interface to the new hashing
* package described in db(3).
*/
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/param.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <ndbm.h>
#include "hash.h"
/*
* Returns:
* DATUM on success
* NULL on failure
*/
datum
dbm_fetch(DBM *db, datum key)
{
datum retdata;
int status;
DBT dbtkey, dbtretdata;
dbtkey.data = key.dptr;
dbtkey.size = key.dsize;
status = (db->get)(db, &dbtkey, &dbtretdata, 0);
if (status) {
dbtretdata.data = NULL;
dbtretdata.size = 0;
}
retdata.dptr = dbtretdata.data;
retdata.dsize = dbtretdata.size;
return (retdata);
}
/*
* Returns:
* DATUM on success
* NULL on failure
*/
datum
dbm_firstkey(DBM *db)
{
int status;
datum retkey;
DBT dbtretkey, dbtretdata;
status = (db->seq)(db, &dbtretkey, &dbtretdata, R_FIRST);
if (status)
dbtretkey.data = NULL;
retkey.dptr = dbtretkey.data;
retkey.dsize = dbtretkey.size;
return (retkey);
}
/*
* Returns:
* DATUM on success
* NULL on failure
*/
datum
dbm_nextkey(DBM *db)
{
int status;
datum retkey;
DBT dbtretkey, dbtretdata;
status = (db->seq)(db, &dbtretkey, &dbtretdata, R_NEXT);
if (status)
dbtretkey.data = NULL;
retkey.dptr = dbtretkey.data;
retkey.dsize = dbtretkey.size;
return (retkey);
}
/*
* Returns:
* 0 on success
* <0 failure
*/
int
dbm_delete(DBM *db, datum key)
{
int status;
DBT dbtkey;
dbtkey.data = key.dptr;
dbtkey.size = key.dsize;
status = (db->del)(db, &dbtkey, 0);
if (status)
return (-1);
else
return (0);
}
/*
* Returns:
* 0 on success
* <0 failure
* 1 if DBM_INSERT and entry exists
*/
int
dbm_store(DBM *db, datum key, datum data, int flags)
{
DBT dbtkey, dbtdata;
dbtkey.data = key.dptr;
dbtkey.size = key.dsize;
dbtdata.data = data.dptr;
dbtdata.size = data.dsize;
return ((db->put)(db, &dbtkey, &dbtdata,
(u_int)((flags == DBM_INSERT) ? R_NOOVERWRITE : 0)));
}

90
lib/libc/db/hash/page.h Normal file
View file

@ -0,0 +1,90 @@
/* $NetBSD: page.h,v 1.8 2008/08/26 21:18:38 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)page.h 8.2 (Berkeley) 5/31/94
*/
/*
* Definitions for hashing page file format.
*/
/*
* routines dealing with a data page
*
* page format:
* +------------------------------+
* p | n | keyoff | datoff | keyoff |
* +------------+--------+--------+
* | datoff | free | ptr | --> |
* +--------+---------------------+
* | F R E E A R E A |
* +--------------+---------------+
* | <---- - - - | data |
* +--------+-----+----+----------+
* | key | data | key |
* +--------+----------+----------+
*
* Pointer to the free space is always: p[p[0] + 2]
* Amount of free space on the page is: p[p[0] + 1]
*/
/*
* How many bytes required for this pair?
* 2 shorts in the table at the top of the page + room for the
* key and room for the data
*
* We prohibit entering a pair on a page unless there is also room to append
* an overflow page. The reason for this it that you can get in a situation
* where a single key/data pair fits on a page, but you can't append an
* overflow page and later you'd have to split the key/data and handle like
* a big pair.
* You might as well do this up front.
*/
#define PAIRSIZE(K,D) (2*sizeof(uint16_t) + (K)->size + (D)->size)
#define BIGOVERHEAD (4*sizeof(uint16_t))
#define KEYSIZE(K) (4*sizeof(uint16_t) + (K)->size);
#define OVFLSIZE (2*sizeof(uint16_t))
#define FREESPACE(P) ((P)[(P)[0]+1])
#define OFFSET(P) ((P)[(P)[0]+2])
#define PAIRFITS(P,K,D) \
(((P)[2] >= REAL_KEY) && \
(PAIRSIZE((K),(D)) + OVFLSIZE) <= FREESPACE((P)))
#define PAGE_META(N) (((N)+3) * sizeof(uint16_t))
typedef struct {
BUFHEAD *newp;
BUFHEAD *oldp;
BUFHEAD *nextp;
uint16_t next_addr;
} SPLIT_RETURN;

View file

@ -0,0 +1,15 @@
# $NetBSD: Makefile.inc,v 1.10 2004/04/30 21:13:23 kleink Exp $
# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
.PATH: ${.CURDIR}/db/man
MAN+= btree.3 dbm_clearerr.3 dbopen.3 hash.3 recno.3 mpool.3
MLINKS+= dbm_clearerr.3 dbm_close.3 dbm_clearerr.3 dbm_delete.3
MLINKS+= dbm_clearerr.3 dbm_dirfno.3 dbm_clearerr.3 dbm_error.3
MLINKS+= dbm_clearerr.3 dbm_fetch.3 dbm_clearerr.3 dbm_firstkey.3
MLINKS+= dbm_clearerr.3 dbm_nextkey.3 dbm_clearerr.3 dbm_open.3
MLINKS+= dbm_clearerr.3 dbm_store.3 dbm_clearerr.3 ndbm.3
MLINKS+= dbopen.3 db.3
MLINKS+= mpool.3 mpool_open.3 mpool.3 mpool_filter.3 mpool.3 mpool_new.3
MLINKS+= mpool.3 mpool_get.3 mpool.3 mpool_put.3 mpool.3 mpool_sync.3
MLINKS+= mpool.3 mpool_close.3

255
lib/libc/db/man/btree.3 Normal file
View file

@ -0,0 +1,255 @@
.\" $NetBSD: btree.3,v 1.12 2010/03/22 19:30:53 joerg Exp $
.\"
.\" Copyright (c) 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)btree.3 8.4 (Berkeley) 8/18/94
.\"
.Dd April 17, 2003
.Dt BTREE 3
.Os
.Sh NAME
.Nm btree
.Nd btree database access method
.Sh SYNOPSIS
.In sys/types.h
.In db.h
.Sh DESCRIPTION
The routine
.Fn dbopen
is the library interface to database files.
One of the supported file formats is btree files.
The general description of the database access methods is in
.Xr dbopen 3 ,
this manual page describes only the btree specific information.
.Pp
The btree data structure is a sorted, balanced tree structure storing
associated key/data pairs.
.Pp
The btree access method specific data structure provided to
.Fn dbopen
is defined in the
.In db.h
include file as follows:
.Bd -literal
typedef struct {
u_long flags;
u_int cachesize;
int maxkeypage;
int minkeypage;
u_int psize;
int (*compare)(const DBT *key1, const DBT *key2);
size_t (*prefix)(const DBT *key1, const DBT *key2);
int lorder;
} BTREEINFO;
.Ed
.Pp
The elements of this structure are as follows:
.Bl -tag -width maxkeypagex
.It Fa flags
The flag value is specified by or'ing any of the following values:
.Bl -tag -width R_DUP -offset indent
.It Dv R_DUP
Permit duplicate keys in the tree, i.e. permit insertion if the key to
be inserted already exists in the tree.
The default behavior, as described in
.Xr dbopen 3 ,
is to overwrite a matching key when inserting a new key or to fail if
the
.Dv R_NOOVERWRITE
flag is specified.
The
.Dv R_DUP
flag is overridden by the
.Dv R_NOOVERWRITE
flag, and if the
.Dv R_NOOVERWRITE
flag is specified, attempts to insert duplicate keys into the tree
will fail.
.Pp
If the database contains duplicate keys, the order of retrieval of
key/data pairs is undefined if the
.Em get
routine is used, however,
.Em seq
routine calls with the
.Dv R_CURSOR
flag set will always return the logical
.Dq first
of any group of duplicate keys.
.El
.It Fa cachesize
A suggested maximum size (in bytes) of the memory cache.
This value is
.Em only
advisory, and the access method will allocate more memory rather than
fail.
Since every search examines the root page of the tree, caching the
most recently used pages substantially improves access time.
In addition, physical writes are delayed as long as possible, so a
moderate cache can reduce the number of I/O operations significantly.
Obviously, using a cache increases (but only increases) the likelihood
of corruption or lost data if the system crashes while a tree is being
modified.
If
.Fa cachesize
is 0 (no size is specified) a default cache is used.
.It Fa maxkeypage
The maximum number of keys which will be stored on any single page.
Not currently implemented.
.\" The maximum number of keys which will be stored on any single page.
.\" Because of the way the btree data structure works,
.\" .Fa maxkeypage
.\" must always be greater than or equal to 2.
.\" If
.\" .Fa maxkeypage
.\" is 0 (no maximum number of keys is specified) the page fill factor is
.\" made as large as possible (which is almost invariably what is wanted).
.It Fa minkeypage
The minimum number of keys which will be stored on any single page.
This value is used to determine which keys will be stored on overflow
pages, i.e., if a key or data item is longer than the pagesize divided
by the
.Fa minkeypage
value, it will be stored on overflow pages instead of in the page
itself.
If
.Fa minkeypage
is 0 (no minimum number of keys is specified) a value of 2 is used.
.It Fa psize
Page size is the size (in bytes) of the pages used for nodes in the
tree.
The minimum page size is 512 bytes and the maximum page size is 64K.
If
.Fa psize
is 0 (no page size is specified) a page size is chosen based on the
underlying file system I/O block size.
.It Fa compare
Compare is the key comparison function.
It must return an integer less than, equal to, or greater than zero if
the first key argument is considered to be respectively less than,
equal to, or greater than the second key argument.
The same comparison function must be used on a given tree every time
it is opened.
If
.Fa compare
is
.Dv NULL
(no comparison function is specified), the keys are compared
lexically, with shorter keys considered less than longer keys.
.It Fa prefix
Prefix is the prefix comparison function.
If specified, this routine must return the number of bytes of the
second key argument which are necessary to determine that it is
greater than the first key argument.
If the keys are equal, the key length should be returned.
Note, the usefulness of this routine is very data dependent, but, in
some data sets can produce significantly reduced tree sizes and search
times.
If
.Fa prefix
is
.Dv NULL
(no prefix function is specified),
.Em and
no comparison function is specified, a default lexical comparison
routine is used.
If
.Fa prefix
is
.Dv NULL
and a comparison routine is specified, no prefix comparison is done.
.It Fa lorder
The byte order for integers in the stored database metadata.
The number should represent the order as an integer; for example,
big endian order would be the number 4,321.
If
.Fa lorder
is 0 (no order is specified) the current host order is used.
.El
.Pp
If the file already exists (and the
.Dv O_TRUNC
flag is not specified), the values specified for the parameters flags,
lorder and psize are ignored in favor of the values used when the tree
was created.
.Pp
Forward sequential scans of a tree are from the least key to the
greatest.
.Pp
Space freed up by deleting key/data pairs from the tree is never
reclaimed, although it is normally made available for reuse.
This means that the btree storage structure is grow-only.
The only solutions are to avoid excessive deletions, or to create a
fresh tree periodically from a scan of an existing one.
.Pp
Searches, insertions, and deletions in a btree will all complete in
O lg base N where base is the average fill factor.
Often, inserting ordered data into btrees results in a low fill
factor.
This implementation has been modified to make ordered insertion the
best case, resulting in a much better than normal page fill factor.
.Sh ERRORS
The
.Nm
access method routines may fail and set
.Va errno
for any of the errors specified for the library routine
.Xr dbopen 3 .
.Sh SEE ALSO
.Xr dbopen 3 ,
.Xr hash 3 ,
.Xr mpool 3 ,
.Xr recno 3
.Pp
.Rs
.%T "The Ubiquitous B-tree"
.%A "Douglas Comer"
.%J "ACM Comput. Surv."
.%V 2
.%N 11
.%D June 1979
.%P 121-138
.Re
.Rs
.%T "Prefix B-trees"
.%A "Bayer"
.%A "Unterauer"
.%J "ACM Transactions on Database Systems"
.%V Vol. 2
.%N 1
.%D March 1977
.%P 11-26
.Re
.Rs
.%B "The Art of Computer Programming Vol. 3: Sorting and Searching"
.%A "D.E. Knuth"
.%D 1968
.%P 471-480
.Re
.Sh BUGS
Only big and little endian byte order is supported.

View file

@ -0,0 +1,306 @@
.\" $NetBSD: dbm_clearerr.3,v 1.5 2010/05/05 06:55:57 jruoho Exp $
.\"
.\" Copyright (c) 2004 The NetBSD Foundation, Inc.
.\" All rights reserved.
.\"
.\" This code is derived from software contributed to The NetBSD Foundation
.\" by Klaus Klein.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd May 5, 2010
.Dt DBM_CLEARERR 3
.Os
.Sh NAME
.Nm dbm_clearerr ,
.Nm dbm_close ,
.Nm dbm_delete ,
.Nm dbm_dirfno ,
.Nm dbm_error ,
.Nm dbm_fetch ,
.Nm dbm_firstkey ,
.Nm dbm_nextkey ,
.Nm dbm_open ,
.Nm dbm_store ,
.Nm ndbm
.Nd database functions
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
.In ndbm.h
.Ft int
.Fn dbm_clearerr "DBM *db"
.Ft void
.Fn dbm_close "DBM *db"
.Ft int
.Fn dbm_delete "DBM *db" "datum key"
.Ft int
.Fn dbm_dirfno "DBM *db"
.Ft int
.Fn dbm_error "DBM *db"
.Ft datum
.Fn dbm_fetch "DBM *db" "datum key"
.Ft datum
.Fn dbm_firstkey "DBM *db"
.Ft datum
.Fn dbm_nextkey "DBM *db"
.Ft DBM *
.Fn dbm_open "const char *file" "int open_flags" "mode_t file_mode"
.Ft int
.Fn dbm_store "DBM *db" "datum key" "datum content" "int store_mode"
.Sh DESCRIPTION
The
.Nm ndbm
facility provides access to hash database files.
.Pp
Two data types are fundamental to the
.Nm ndbm
facility.
.Fa DBM
serves as a handle to a database.
It is an opaque type.
.Pp
The other data type is
.Fa datum ,
which is a structure type which includes the following members:
.Bd -literal -offset indent
void * dptr
size_t dsize
.Ed
.Pp
A
.Fa datum
is thus given by
.Fa dptr
pointing at an object of
.Fa dsize
bytes in length.
.Pp
The
.Fn dbm_open
function opens a database.
The
.Fa file
argument is the pathname which the actual database file pathname
is based on.
This implementation uses a single file with the suffix
.Pa .db
appended to
.Fa file .
The
.Fa open_flags
argument has the same meaning as the
.Fa flags
argument to
.Xr open 2
except that when opening a database for write-only access the file
is opened for read/write access, and the
.Dv O_APPEND
flag must not be specified.
The
.Fa file_mode
argument has the same meaning as the
.Fa mode
argument to
.Xr open 2 .
.Pp
For the following functions, the
.Fa db
argument is a handle previously returned by a call to
.Fn dbm_open .
.Pp
The
.Fn dbm_close
function closes a database.
.Pp
The
.Fn dbm_fetch
function retrieves a record from the database.
The
.Fa key
argument is a
.Fa datum
that identifies the record to be fetched.
.Pp
The
.Fn dbm_store
function stores a record into the database.
The
.Fa key
argument is a
.Fa datum
that identifies the record to be stored.
The
.Fa content
argument is a
.Fa datum
that specifies the value of the record to be stored.
The
.Fa store_mode
argument specifies the behavior of
.Fn dbm_store
if a record matching
.Fa key
is already present in the database,
.Fa db .
.Fa store_mode
must be one of the following:
.Bl -tag -width DBM_REPLACEXX -offset indent
.It Dv DBM_INSERT
If a record matching
.Fa key
is already present, it is left unchanged.
.It Dv DBM_REPLACE
If a record matching
.Fa key
is already present, its value is replaced by
.Fa content .
.El
.Pp
If no record matching
.Fa key
is present, a new record is inserted regardless of
.Fa store_mode .
.Pp
The
.Fn dbm_delete
function deletes a record from the database.
The
.Fa key
argument is a
.Fa datum
that identifies the record to be deleted.
.Pp
The
.Fn dbm_firstkey
function returns the first key in the database.
.Pp
The
.Fn dbm_nextkey
function returns the next key in the database.
In order to be meaningful, it must be preceded by a call to
.Fn dbm_firstkey .
.Pp
The
.Fn dbm_error
function returns the error indicator of the database.
.Pp
The
.Fn dbm_clearerr
function clears the error indicator of the database.
.Pp
The
.Fn dbm_dirfno
function returns the file descriptor of the underlying database file.
.Sh IMPLEMENTATION NOTES
The
.Nm ndbm
facility is implemented on top of the
.Xr hash 3
access method of the
.Xr db 3
database facility.
.Sh RETURN VALUES
The
.Fn dbm_open
function returns a pointer to a
.Fa DBM
when successful; otherwise a null pointer is returned.
.Pp
The
.Fn dbm_close
function returns no value.
.Pp
The
.Fn dbm_fetch
function returns a content
.Fa datum ;
if no record matching
.Fa key
was found or if an error occured, its
.Fa dptr
member is a null pointer.
.Pp
The
.Fn dbm_store
function returns 0 when then record was successfully inserted;
it returns 1 when called with
.Fa store_mode
being
.Dv DBM_INSERT
and a record matching
.Fa key
is already present;
otherwise a negative value is returned.
.Pp
The
.Fn dbm_delete
function returns 0 when the record was successfully deleted;
otherwise a negative value is returned.
.Pp
The
.Fn dbm_firstkey
and
.Fn dbm_nextkey
functions return a key
.Fa datum .
When the end of the database is reached or if an error occured, its
.Fa dptr
member is a null pointer.
.Pp
The
.Fn dbm_error
function returns 0 if the error indicator is clear;
if the error indicator is set a non-zero value is returned.
.Pp
The
.Fn dbm_clearerr
function always returns 0.
.Pp
The
.Fn dbm_dirfno
function returns the file descriptor of the underlying database file.
.Sh ERRORS
No errors are defined.
.Sh SEE ALSO
.Xr open 2 ,
.Xr db 3 ,
.Xr hash 3
.Sh STANDARDS
The
.Fn dbm_clearerr ,
.Fn dbm_close ,
.Fn dbm_delete ,
.Fn dbm_error ,
.Fn dbm_fetch ,
.Fn dbm_firstkey ,
.Fn dbm_nextkey ,
.Fn dbm_open ,
and
.Fn dbm_store
functions conform to
.St -xpg4.2
and
.St -susv2 .
The
.Fn dbm_dirfno
function is an extension.

534
lib/libc/db/man/dbopen.3 Normal file
View file

@ -0,0 +1,534 @@
.\" $NetBSD: dbopen.3,v 1.18 2010/03/22 19:30:53 joerg Exp $
.\"
.\" Copyright (c) 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)dbopen.3 8.5 (Berkeley) 1/2/94
.\"
.Dd April 17, 2003
.Dt DBOPEN 3
.Os
.Sh NAME
.Nm dbopen ,
.Nm db
.Nd database access methods
.Sh SYNOPSIS
.In sys/types.h
.In limits.h
.In db.h
.In fcntl.h
.Ft DB *
.Fn dbopen "const char *file" "int flags" "mode_t mode" \
"DBTYPE type" "const void *openinfo"
.Sh DESCRIPTION
.Nm
is the library interface to database files.
The supported file formats are btree, hashed, and UNIX file oriented.
The btree format is a representation of a sorted, balanced tree
structure.
The hashed format is an extensible, dynamic hashing scheme.
The flat-file format is a byte stream file with fixed or variable
length records.
The formats and file format specific information are described in
detail in their respective manual pages
.Xr btree 3 ,
.Xr hash 3 ,
and
.Xr recno 3 .
.Pp
.Nm
opens
.Fa file
for reading and/or writing.
Files never intended to be preserved on disk may be created by setting
the file parameter to
.Dv NULL .
.Pp
The
.Fa flags
and
.Fa mode
arguments are as specified to the
.Xr open 2
routine, however, only the
.Dv O_CREAT ,
.Dv O_EXCL ,
.Dv O_EXLOCK ,
.Dv O_NONBLOCK ,
.Dv O_RDONLY ,
.Dv O_RDWR ,
.Dv O_SHLOCK ,
and
.Dv O_TRUNC
flags are meaningful.
(Note, opening a database file
.Dv O_WRONLY
is not possible.)
.\"Three additional options may be specified by or'ing
.\"them into the
.\".Fa flags
.\"argument.
.\".Pp
.\".Dv DB_LOCK
.\"Do the necessary locking in the database to support concurrent access.
.\"If concurrent access isn't needed or the database is read-only this
.\"flag should not be set, as it tends to have an associated performance
.\"penalty.
.\".Pp
.\".Dv DB_SHMEM
.\"Place the underlying memory pool used by the database in shared
.\"memory.
.\"Necessary for concurrent access.
.\".Pp
.\".Dv DB_TXN
.\"Support transactions in the database.
.\"The
.\".Dv DB_LOCK
.\"and
.\".Dv DB_SHMEM
.\"flags must be set as well.
.Pp
The
.Fa type
argument is of type
.Vt DBTYPE
(as defined in the
.In db.h
include file) and may be set to
.Dv DB_BTREE ,
.Dv DB_HASH ,
or
.Dv DB_RECNO .
.Pp
The
.Fa openinfo
argument is a pointer to an access method specific structure described
in the access method's manual page.
If
.Fa openinfo
is
.Dv NULL ,
each access method will use defaults appropriate for the system and
the access method.
.Pp
.Nm
returns a pointer to a DB structure on success and
.Dv NULL
on error.
The DB structure is defined in the
.In db.h
include file, and contains at least the following fields:
.Bd -literal
typedef struct {
DBTYPE type;
int (*close)(const DB *db);
int (*del)(const DB *db, const DBT *key, u_int flags);
int (*fd)(const DB *db);
int (*get)(const DB *db, DBT *key, DBT *data, u_int flags);
int (*put)(const DB *db, DBT *key, const DBT *data,
u_int flags);
int (*sync)(const DB *db, u_int flags);
int (*seq)(const DB *db, DBT *key, DBT *data, u_int flags);
} DB;
.Ed
.Pp
These elements describe a database type and a set of functions
performing various actions.
These functions take a pointer to a structure as returned by
.Nm ,
and sometimes one or more pointers to key/data structures and a flag
value.
.Bl -tag -width closex
.It Fa type
The type of the underlying access method (and file format).
.It Fa close
A pointer to a routine to flush any cached information to disk, free
any allocated resources, and close the underlying file(s).
Since key/data pairs may be cached in memory, failing to sync the file
with a
.Fa close
or
.Fa sync
function may result in inconsistent or lost information.
.Fa close
routines return \-1 on error (setting
.Va errno )
and 0 on success.
.It Fa del
A pointer to a routine to remove key/data pairs from the database.
.Pp
The parameter
.Fa flag
may be set to the following value:
.Bl -tag -width R_CURSORX
.It Dv R_CURSOR
Delete the record referenced by the cursor.
The cursor must have previously been initialized.
.El
.Pp
.Fa delete
routines return \-1 on error (setting
.Va errno ) ,
0 on success, and 1 if the specified
.Fa key
was not in the file.
.It Fa fd
A pointer to a routine which returns a file descriptor representative
of the underlying database.
A file descriptor referencing the same file will be returned to all
processes which call
.Nm
with the same
.Fa file
name.
This file descriptor may be safely used as an argument to the
.Xr fcntl 2
and
.Xr flock 2
locking functions.
The file descriptor is not necessarily associated with any of the
underlying files used by the access method.
No file descriptor is available for in memory databases.
.Fa fd
routines return \-1 on error (setting
.Va errno ) ,
and the file descriptor on success.
.It Fa get
A pointer to a routine which is the interface for keyed retrieval from
the database.
The address and length of the data associated with the specified
.Fa key
are returned in the structure referenced by
.Fa data .
.Fa get
routines return \-1 on error (setting
.Va errno ) ,
0 on success, and 1 if the
.Fa key
was not in the file.
.It Fa put
A pointer to a routine to store key/data pairs in the database.
.Pp
The parameter
.Fa flag
may be set to one of the following values:
.Bl -tag -width R_NOOVERWRITEX
.It Dv R_CURSOR
Replace the key/data pair referenced by the cursor.
The cursor must have previously been initialized.
.It Dv R_IAFTER
Append the data immediately after the data referenced by
.Fa key ,
creating a new key/data pair.
The record number of the appended key/data pair is returned in the
.Fa key
structure.
(Applicable only to the
.Dv DB_RECNO
access method.)
.It Dv R_IBEFORE
Insert the data immediately before the data referenced by
.Fa key ,
creating a new key/data pair.
The record number of the inserted key/data pair is returned in the
.Fa key
structure.
(Applicable only to the
.Dv DB_RECNO
access method.)
.It Dv R_NOOVERWRITE
Enter the new key/data pair only if the key does not previously
exist.
.It Dv R_SETCURSOR
Store the key/data pair, setting or initializing the position of the
cursor to reference it.
(Applicable only to the
.Dv DB_BTREE
and
.Dv DB_RECNO
access methods.)
.El
.Pp
.Dv R_SETCURSOR
is available only for the
.Dv DB_BTREE
and
.Dv DB_RECNO
access methods because it implies that the keys have an inherent order
which does not change.
.Pp
.Dv R_IAFTER
and
.Dv R_IBEFORE
are available only for the
.Dv DB_RECNO
access method because they each imply that the access method is able
to create new keys.
This is only true if the keys are ordered and independent, record
numbers for example.
.Pp
The default behavior of the
.Fa put
routines is to enter the new key/data pair, replacing any previously
existing key.
.Pp
.Fa put
routines return \-1 on error (setting
.Va errno ) ,
0 on success, and 1 if the
.Dv R_NOOVERWRITE
.Fa flag
was set and the key already exists in the file.
.It Fa seq
A pointer to a routine which is the interface for sequential
retrieval from the database.
The address and length of the key are returned in the structure
referenced by
.Fa key ,
and the address and length of the data are returned in the
structure referenced by
.Fa data .
.Pp
Sequential key/data pair retrieval may begin at any time, and the
position of the
.Dq cursor
is not affected by calls to the
.Fa del ,
.Fa get ,
.Fa put ,
or
.Fa sync
routines.
Modifications to the database during a sequential scan will be
reflected in the scan, i.e., records inserted behind the cursor will
not be returned while records inserted in front of the cursor will be
returned.
.Pp
The flag value
.Em must
be set to one of the following values:
.Bl -tag -width R_CURSORX
.It Dv R_CURSOR
The data associated with the specified key is returned.
This differs from the
.Fa get
routines in that it sets or initializes the cursor to the location of
the key as well.
(Note, for the
.Dv DB_BTREE
access method, the returned key is not necessarily an exact match for
the specified key.
The returned key is the smallest key greater than or equal to the
specified key, permitting partial key matches and range searches.)
.It Dv R_FIRST
The first key/data pair of the database is returned, and the cursor
is set or initialized to reference it.
.It Dv R_LAST
The last key/data pair of the database is returned, and the cursor
is set or initialized to reference it.
(Applicable only to the
.Dv DB_BTREE
and
.Dv DB_RECNO
access methods.)
.It Dv R_NEXT
Retrieve the key/data pair immediately after the cursor.
If the cursor is not yet set, this is the same as the
.Dv R_FIRST
flag.
.It Dv R_PREV
Retrieve the key/data pair immediately before the cursor.
If the cursor is not yet set, this is the same as the
.Dv R_LAST
flag.
(Applicable only to the
.Dv DB_BTREE
and
.Dv DB_RECNO
access methods.)
.El
.Pp
.Dv R_LAST
and
.Dv R_PREV
are available only for the
.Dv DB_BTREE
and
.Dv DB_RECNO
access methods because they each imply that the keys have an inherent
order which does not change.
.Pp
.Fa seq
routines return \-1 on error (setting
.Va errno ) ,
0 on success and 1 if there are no key/data pairs less than or greater
than the specified or current key.
If the
.Dv DB_RECNO
access method is being used, and if the database file is a character
special file and no complete key/data pairs are currently available,
the
.Fa seq
routines return 2.
.It Fa sync
A pointer to a routine to flush any cached information to disk.
If the database is in memory only, the
.Fa sync
routine has no effect and will always succeed.
.Pp
The flag value may be set to the following value:
.Bl -tag -width ".Dv R_RECNOSYNC"
.It Dv R_RECNOSYNC
If the
.Dv DB_RECNO
access method is being used, this flag causes the sync routine to
apply to the btree file which underlies the recno file, not the recno
file itself.
(See the
.Fa bfname
field of the
.Xr recno 3
manual page for more information.)
.El
.Pp
.Fa sync
routines return \-1 on error (setting
.Va errno )
and 0 on success.
.El
.Ss KEY/DATA PAIRS
Access to all file types is based on key/data pairs.
Both keys and data are represented by the following data structure:
.Bd -literal
typedef struct {
void *data;
size_t size;
} DBT;
.Ed
.Pp
The elements of the DBT structure are defined as follows:
.Bl -tag -width datax
.It Fa data
A pointer to a byte string.
.It Fa size
The length of the byte string.
.El
.Pp
Key and data byte strings may reference strings of essentially
unlimited length although any two of them must fit into available
memory at the same time.
It should be noted that the access methods provide no guarantees about
byte string alignment.
.Sh ERRORS
The
.Nm
routine may fail and set
.Va errno
for any of the errors specified for the library routines
.Xr open 2
and
.Xr malloc 3
or the following:
.Bl -tag -width Er
.It Er EFTYPE
A file is incorrectly formatted.
.It Er EINVAL
A parameter has been specified (hash function, pad byte, etc.) that is
incompatible with the current file specification or which is not
meaningful for the function (for example, use of the cursor without
prior initialization) or there is a mismatch between the version
number of file and the software.
.It Er EFBIG
The key could not be inserted due to limitations in the DB file format
(e.g., a hash database was out of overflow pages).
.El
.Pp
The
.Fa close
routines may fail and set
.Va errno
for any of the errors specified for the library routines
.Xr close 2 ,
.Xr read 2 ,
.Xr write 2 ,
.Xr free 3 ,
or
.Xr fsync 2 .
.Pp
The
.Fa del ,
.Fa get ,
.Fa put ,
and
.Fa seq
routines may fail and set
.Va errno
for any of the errors specified for the library routines
.Xr read 2 ,
.Xr write 2 ,
.Xr free 3 ,
or
.Xr malloc 3 .
.Pp
The
.Fa fd
routines will fail and set
.Va errno
to
.Er ENOENT
for in memory databases.
.Pp
The
.Fa sync
routines may fail and set
.Va errno
for any of the errors specified for the library routine
.Xr fsync 2 .
.Sh SEE ALSO
.Xr btree 3 ,
.Xr hash 3 ,
.Xr mpool 3 ,
.Xr recno 3
.Pp
.Rs
.%T "LIBTP: Portable, Modular Transactions for UNIX"
.%A Margo Seltzer
.%A Michael Olson
.%J USENIX proceedings
.%D Winter 1992
.Re
.Sh BUGS
The typedef DBT is a mnemonic for
.Dq data base thang ,
and was used because no one could think of a reasonable name that
wasn't already used.
.Pp
The file descriptor interface is a kludge and will be deleted in a
future version of the interface.
.Pp
None of the access methods provide any form of concurrent access,
locking, or transactions.

167
lib/libc/db/man/hash.3 Normal file
View file

@ -0,0 +1,167 @@
.\" $NetBSD: hash.3,v 1.13 2010/03/22 19:30:53 joerg Exp $
.\"
.\" Copyright (c) 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)hash.3 8.6 (Berkeley) 8/18/94
.\"
.Dd September 5, 2008
.Dt HASH 3
.Os
.Sh NAME
.Nm hash
.Nd hash database access method
.Sh SYNOPSIS
.In sys/types.h
.In db.h
.Sh DESCRIPTION
The routine
.Fn dbopen
is the library interface to database files.
One of the supported file formats is hash files.
The general description of the database access methods is in
.Xr dbopen 3 ,
this manual page describes only the hash specific information.
.Pp
The hash data structure is an extensible, dynamic hashing scheme.
.Pp
The access method specific data structure provided to
.Fn dbopen
is defined in the
.In db.h
include file as follows:
.Bd -literal
typedef struct {
u_int bsize;
u_int ffactor;
u_int nelem;
u_int cachesize;
uint32_t (*hash)(const void *, size_t);
int lorder;
} HASHINFO;
.Ed
.Pp
The elements of this structure are as follows:
.Bl -tag -width cachesizex
.It Fa bsize
.Fa bsize
defines the hash table bucket size, and defaults to 4096 for in-memory tables.
If
.Fa bsize
is 0 (no bucket size is specified) a bucket size is chosen based on the
underlying file system I/O block size.
It may be preferable to increase the page size for disk-resident
tables and tables with large data items.
.It Fa ffactor
.Fa ffactor
indicates a desired density within the hash table.
It is an approximation of the number of keys allowed to accumulate in
any one bucket, determining when the hash table grows or shrinks.
The default value is 8.
.It Fa nelem
.Fa nelem
is an estimate of the final size of the hash table.
If not set or set too low, hash tables will expand gracefully as keys
are entered, although a slight performance degradation may be
noticed.
The default value is 1.
.It Fa cachesize
A suggested maximum size, in bytes, of the memory cache.
This value is
.Em only
advisory, and the access method will allocate more memory rather
than fail.
.It Fa hash
.Fa hash
is a user defined hash function.
Since no hash function performs equally well on all possible data, the
user may find that the built-in hash function does poorly on a
particular data set.
User specified hash functions must take two arguments (a pointer to a
byte string and a length) and return a 32-bit quantity to be used as
the hash value.
.It Fa lorder
The byte order for integers in the stored database metadata.
The number should represent the order as an integer; for example,
big endian order would be the number 4,321.
If
.Fa lorder
is 0 (no order is specified) the current host order is used.
If the file already exists, the specified value is ignored and the
value specified when the tree was created is used.
.El
.Pp
If the file already exists (and the
.Dv O_TRUNC
flag is not specified), the values specified for the parameters
.Fa bsize ,
.Fa ffactor ,
.Fa lorder ,
and
.Fa nelem
are ignored and the values specified when the tree was created are
used.
.Pp
If a hash function is specified,
.Fn hash_open
will attempt to determine if the hash function specified is the same
as the one with which the database was created, and will fail if it is
not.
.\".Pp
.\"Backward compatible interfaces to the routines described in
.\".Xr dbm 3 ,
.\"and
.\".Xr ndbm 3
.\"are provided, however these interfaces are not compatible with
.\"previous file formats.
.Sh ERRORS
The
.Nm
access method routines may fail and set
.Va errno
for any of the errors specified for the library routine
.Xr dbopen 3 .
.Sh SEE ALSO
.Xr btree 3 ,
.Xr dbopen 3 ,
.Xr mpool 3 ,
.Xr recno 3
.Pp
.Rs
.%T "Dynamic Hash Tables"
.%A Per-Ake Larson
.%J Communications of the ACM
.%D April 1988
.Re
.Rs
.%T "A New Hash Package for UNIX"
.%A Margo Seltzer
.%J USENIX Proceedings
.%D Winter 1991
.Re
.Sh BUGS
Only big and little endian byte order is supported.

226
lib/libc/db/man/mpool.3 Normal file
View file

@ -0,0 +1,226 @@
.\" $NetBSD: mpool.3,v 1.9 2003/08/07 16:42:43 agc Exp $
.\"
.\" Copyright (c) 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)mpool.3 8.1 (Berkeley) 6/4/93
.\"
.Dd April 17, 2003
.Dt MPOOL 3
.Os
.Sh NAME
.Nm mpool ,
.Nm mpool_open ,
.Nm mpool_filter ,
.Nm mpool_new ,
.Nm mpool_get ,
.Nm mpool_put ,
.Nm mpool_sync ,
.Nm mpool_close
.Nd shared memory buffer pool
.Sh SYNOPSIS
.In db.h
.In mpool.h
.Ft MPOOL *
.Fn mpool_open "DBT *key" "int fd" "pgno_t pagesize" "pgno_t maxcache"
.Ft void
.Fn mpool_filter "MPOOL *mp" "void (*pgin)(void *, pgno_t, void *)" \
"void (*pgout)(void *, pgno_t, void *)" "void *pgcookie"
.Ft void *
.Fn mpool_new "MPOOL *mp" "pgno_t *pgnoaddr"
.Ft void *
.Fn mpool_get "MPOOL *mp" "pgno_t pgno" "u_int flags"
.Ft int
.Fn mpool_put "MPOOL *mp" "void *pgaddr" "u_int flags"
.Ft int
.Fn mpool_sync "MPOOL *mp"
.Ft int
.Fn mpool_close "MPOOL *mp"
.Sh DESCRIPTION
.Nm
is the library interface intended to provide page oriented buffer
management of files.
The buffers may be shared between processes.
.Pp
The function
.Nm mpool_open
initializes a memory pool.
The
.Fa key
argument is the byte string used to negotiate between multiple
processes wishing to share buffers.
If the file buffers are mapped in shared memory, all processes using
the same key will share the buffers.
If
.Fa key
is
.Dv NULL ,
the buffers are mapped into private memory.
The
.Fa fd
argument is a file descriptor for the underlying file, which must be
seekable.
If
.Fa key
is
.No non- Ns Dv NULL
and matches a file already being mapped, the
.Fa fd
argument is ignored.
.Pp
The
.Fa pagesize
argument is the size, in bytes, of the pages into which the file is
broken up.
The
.Fa maxcache
argument is the maximum number of pages from the underlying file to
cache at any one time.
This value is not relative to the number of processes which share a
file's buffers, but will be the largest value specified by any of the
processes sharing the file.
.Pp
The
.Nm mpool_filter
function is intended to make transparent input and output processing
of the pages possible.
If the
.Fa pgin
function is specified, it is called each time a buffer is read into
the memory pool from the backing file.
If the
.Fa pgout
function is specified, it is called each time a buffer is written into
the backing file.
Both functions are are called with the
.Fa pgcookie
pointer, the page number and a pointer to the page to being read or
written.
.Pp
The function
.Nm mpool_new
takes an MPOOL pointer and an address as arguments.
If a new page can be allocated, a pointer to the page is returned and
the page number is stored into the
.Fa pgnoaddr
address.
Otherwise,
.Dv NULL
is returned and errno is set.
.Pp
The function
.Nm mpool_get
takes a MPOOL pointer and a page number as arguments.
If the page exists, a pointer to the page is returned.
Otherwise,
.Dv NULL
is returned and errno is set.
The flags parameter is not currently used.
.Pp
The function
.Nm mpool_put
unpins the page referenced by
.Fa pgaddr .
.Fa pgaddr
must be an address previously returned by
.Nm mpool_get
or
.Nm mpool_new .
The flag value is specified by or'ing any of the following values:
.Bl -tag -width MPOOL_DIRTYX -offset indent
.It Dv MPOOL_DIRTY
The page has been modified and needs to be written to the backing
file.
.El
.Pp
.Nm mpool_put
returns 0 on success and \-1 if an error occurs.
.Pp
The function
.Nm mpool_sync
writes all modified pages associated with the MPOOL pointer to the
backing file.
.Nm mpool_sync
returns 0 on success and \-1 if an error occurs.
.Pp
The
.Nm mpool_close
function frees up any allocated memory associated with the memory pool
cookie.
Modified pages are
.Em not
written to the backing file.
.Nm mpool_close
returns 0 on success and \-1 if an error occurs.
.Sh ERRORS
The
.Nm mpool_open
function may fail and set
.Va errno
for any of the errors specified for the library routine
.Xr malloc 3 .
.Pp
The
.Nm mpool_get
function may fail and set
.Va errno
for the following:
.Bl -tag -width Er -offset indent
.It Er EINVAL
The requested record doesn't exist.
.El
.Pp
The
.Nm mpool_new
and
.Nm mpool_get
functions may fail and set
.Va errno
for any of the errors specified for the library routines
.Xr read 2 ,
.Xr write 2 ,
and
.Xr malloc 3 .
.Pp
The
.Nm mpool_sync
function may fail and set
.Va errno
for any of the errors specified for the library routine
.Xr write 2 .
.Pp
The
.Nm mpool_close
function may fail and set
.Va errno
for any of the errors specified for the library routine
.Xr free 3 .
.Sh SEE ALSO
.Xr btree 3 ,
.Xr dbopen 3 ,
.Xr hash 3 ,
.Xr recno 3

214
lib/libc/db/man/recno.3 Normal file
View file

@ -0,0 +1,214 @@
.\" $NetBSD: recno.3,v 1.11 2010/03/22 19:30:53 joerg Exp $
.\"
.\" Copyright (c) 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)recno.3 8.5 (Berkeley) 8/18/94
.\"
.Dd April 17, 2003
.Dt RECNO 3
.Os
.Sh NAME
.Nm recno
.Nd record number database access method
.Sh SYNOPSIS
.In sys/types.h
.In db.h
.Sh DESCRIPTION
The routine
.Fn dbopen
is the library interface to database files.
One of the supported file formats is record number files.
The general description of the database access methods is in
.Xr dbopen 3 ,
this manual page describes only the recno specific information.
.Pp
The record number data structure is either variable or fixed-length
records stored in a flat-file format, accessed by the logical record
number.
The existence of record number five implies the existence of records
one through four, and the deletion of record number one causes
record number five to be renumbered to record number four, as well
as the cursor, if positioned after record number one, to shift down
one record.
.Pp
The recno access method specific data structure provided to
.Fn dbopen
is defined in the
.In db.h
include file as follows:
.Bd -literal
typedef struct {
u_long flags;
u_int cachesize;
u_int psize;
int lorder;
size_t reclen;
uint8_t bval;
char *bfname;
} RECNOINFO;
.Ed
.Pp
The elements of this structure are defined as follows:
.Bl -tag -width cachesizex
.It Fa flags
The flag value is specified by or'ing any of the following values:
.Bl -tag -width R_FIXEDLENX -offset indent
.It Dv R_FIXEDLEN
The records are fixed-length, not byte delimited.
The structure element
.Fa reclen
specifies the length of the record, and the structure element
.Fa bval
is used as the pad character.
Any records, inserted into the database, that are less than
.Fa reclen
bytes long are automatically padded.
.It Dv R_NOKEY
In the interface specified by
.Fn dbopen ,
the sequential record retrieval fills in both the caller's key and
data structures.
If the
.Dv R_NOKEY
flag is specified, the cursor routines are not required to fill in the
key structure.
This permits applications to retrieve records at the end of files
without reading all of the intervening records.
.It Dv R_SNAPSHOT
This flag requires that a snapshot of the file be taken when
.Fn dbopen
is called, instead of permitting any unmodified records to be read
from the original file.
.El
.It Fa cachesize
A suggested maximum size, in bytes, of the memory cache.
This value is
.Em only
advisory, and the access method will allocate more memory rather than
fail.
If
.Fa cachesize
is 0 (no size is specified) a default cache is used.
.It Fa psize
The recno access method stores the in-memory copies of its records
in a btree.
This value is the size (in bytes) of the pages used for nodes in that
tree.
If
.Fa psize
is 0 (no page size is specified) a page size is chosen based on the
underlying file system I/O block size.
See
.Xr btree 3
for more information.
.It Fa lorder
The byte order for integers in the stored database metadata.
The number should represent the order as an integer; for example,
big endian order would be the number 4,321.
If
.Fa lorder
is 0 (no order is specified) the current host order is used.
.It Fa reclen
The length of a fixed-length record.
.It Fa bval
The delimiting byte to be used to mark the end of a record for
variable-length records, and the pad character for fixed-length
records.
If no value is specified, newlines
.Pq Dq \en
are used to mark the end of variable-length records and fixed-length
records are padded with spaces.
.It Fa bfname
The recno access method stores the in-memory copies of its records
in a btree.
If bfname is
.No non- Ns Dv NULL ,
it specifies the name of the btree file, as if specified as the file
name for a
.Fn dbopen
of a btree file.
.El
.Pp
The data part of the key/data pair used by the recno access method
is the same as other access methods.
The key is different.
The
.Fa data
field of the key should be a pointer to a memory location of type
recno_t, as defined in the
.In db.h
include file.
This type is normally the largest unsigned integral type available to
the implementation.
The
.Fa size
field of the key should be the size of that type.
.Pp
Because there can be no meta-data associated with the underlying
recno access method files, any changes made to the default values
(e.g., fixed record length or byte separator value) must be explicitly
specified each time the file is opened.
.Pp
In the interface specified by
.Fn dbopen ,
using the
.Fa put
interface to create a new record will cause the creation of multiple,
empty records if the record number is more than one greater than the
largest record currently in the database.
.Sh ERRORS
The
.Nm
access method routines may fail and set
.Va errno
for any of the errors specified for the library routine
.Xr dbopen 3
or the following:
.Bl -tag -width Er
.It Er EINVAL
An attempt was made to add a record to a fixed-length database that
was too large to fit.
.El
.Sh SEE ALSO
.Xr btree 3 ,
.Xr dbopen 3 ,
.Xr hash 3 ,
.Xr mpool 3
.Pp
.Rs
.%T "Document Processing in a Relational Database System"
.%A Michael Stonebraker
.%A Heidi Stettner
.%A Joseph Kalash
.%A Antonin Guttman
.%A Nadene Lynn
.%J Memorandum No. UCB/ERL M82/32
.%D May 1982
.Re
.Sh BUGS
Only big and little endian byte order is supported.

View file

@ -0,0 +1,6 @@
# $NetBSD: Makefile.inc,v 1.4 1995/02/27 13:23:53 cgd Exp $
# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
.PATH: ${.CURDIR}/db/mpool
SRCS+= mpool.c

8
lib/libc/db/mpool/README Normal file
View file

@ -0,0 +1,8 @@
# $NetBSD: README,v 1.2 1995/02/27 13:24:00 cgd Exp $
# @(#)README 8.1 (Berkeley) 6/4/93
These are the current memory pool routines.
They aren't ready for prime time, yet, and
the interface is expected to change.
--keith

466
lib/libc/db/mpool/mpool.c Normal file
View file

@ -0,0 +1,466 @@
/* $NetBSD: mpool.c,v 1.19 2009/04/22 18:44:06 christos Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: mpool.c,v 1.19 2009/04/22 18:44:06 christos Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/queue.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <db.h>
#define __MPOOLINTERFACE_PRIVATE
#include <mpool.h>
#ifdef __weak_alias
__weak_alias(mpool_close,_mpool_close)
__weak_alias(mpool_filter,_mpool_filter)
__weak_alias(mpool_get,_mpool_get)
__weak_alias(mpool_new,_mpool_new)
__weak_alias(mpool_open,_mpool_open)
__weak_alias(mpool_put,_mpool_put)
__weak_alias(mpool_sync,_mpool_sync)
#endif
static BKT *mpool_bkt(MPOOL *);
static BKT *mpool_look(MPOOL *, pgno_t);
static int mpool_write(MPOOL *, BKT *);
/*
* mpool_open --
* Initialize a memory pool.
*/
/*ARGSUSED*/
MPOOL *
mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache)
{
struct stat sb;
MPOOL *mp;
int entry;
/*
* Get information about the file.
*
* XXX
* We don't currently handle pipes, although we should.
*/
if (fstat(fd, &sb))
return (NULL);
if (!S_ISREG(sb.st_mode)) {
errno = ESPIPE;
return (NULL);
}
/* Allocate and initialize the MPOOL cookie. */
if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
return (NULL);
CIRCLEQ_INIT(&mp->lqh);
for (entry = 0; entry < HASHSIZE; ++entry)
CIRCLEQ_INIT(&mp->hqh[entry]);
mp->maxcache = maxcache;
mp->npages = (pgno_t)(sb.st_size / pagesize);
mp->pagesize = pagesize;
mp->fd = fd;
return (mp);
}
/*
* mpool_filter --
* Initialize input/output filters.
*/
void
mpool_filter(MPOOL *mp, void (*pgin)(void *, pgno_t, void *),
void (*pgout)(void *, pgno_t, void *), void *pgcookie)
{
mp->pgin = pgin;
mp->pgout = pgout;
mp->pgcookie = pgcookie;
}
/*
* mpool_new --
* Get a new page of memory.
*/
void *
mpool_new( MPOOL *mp, pgno_t *pgnoaddr)
{
struct _hqh *head;
BKT *bp;
if (mp->npages == MAX_PAGE_NUMBER) {
(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
abort();
}
#ifdef STATISTICS
++mp->pagenew;
#endif
/*
* Get a BKT from the cache. Assign a new page number, attach
* it to the head of the hash chain, the tail of the lru chain,
* and return.
*/
if ((bp = mpool_bkt(mp)) == NULL)
return (NULL);
*pgnoaddr = bp->pgno = mp->npages++;
bp->flags = MPOOL_PINNED;
head = &mp->hqh[HASHKEY(bp->pgno)];
CIRCLEQ_INSERT_HEAD(head, bp, hq);
CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
return (bp->page);
}
/*
* mpool_get
* Get a page.
*/
/*ARGSUSED*/
void *
mpool_get(MPOOL *mp, pgno_t pgno, u_int flags)
{
struct _hqh *head;
BKT *bp;
off_t off;
ssize_t nr;
/* Check for attempt to retrieve a non-existent page. */
if (pgno >= mp->npages) {
errno = EINVAL;
return (NULL);
}
#ifdef STATISTICS
++mp->pageget;
#endif
/* Check for a page that is cached. */
if ((bp = mpool_look(mp, pgno)) != NULL) {
#ifdef DEBUG
if (bp->flags & MPOOL_PINNED) {
(void)fprintf(stderr,
"mpool_get: page %d already pinned\n", bp->pgno);
abort();
}
#endif
/*
* Move the page to the head of the hash chain and the tail
* of the lru chain.
*/
head = &mp->hqh[HASHKEY(bp->pgno)];
CIRCLEQ_REMOVE(head, bp, hq);
CIRCLEQ_INSERT_HEAD(head, bp, hq);
CIRCLEQ_REMOVE(&mp->lqh, bp, q);
CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
/* Return a pinned page. */
bp->flags |= MPOOL_PINNED;
return (bp->page);
}
/* Get a page from the cache. */
if ((bp = mpool_bkt(mp)) == NULL)
return (NULL);
/* Read in the contents. */
#ifdef STATISTICS
++mp->pageread;
#endif
off = mp->pagesize * pgno;
if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
if (nr >= 0)
errno = EFTYPE;
return (NULL);
}
/* Set the page number, pin the page. */
bp->pgno = pgno;
bp->flags = MPOOL_PINNED;
/*
* Add the page to the head of the hash chain and the tail
* of the lru chain.
*/
head = &mp->hqh[HASHKEY(bp->pgno)];
CIRCLEQ_INSERT_HEAD(head, bp, hq);
CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
/* Run through the user's filter. */
if (mp->pgin != NULL)
(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
return (bp->page);
}
/*
* mpool_put
* Return a page.
*/
/*ARGSUSED*/
int
mpool_put(MPOOL *mp, void *page, u_int flags)
{
BKT *bp;
#ifdef STATISTICS
++mp->pageput;
#endif
bp = (BKT *)(void *)((char *)page - sizeof(BKT));
#ifdef DEBUG
if (!(bp->flags & MPOOL_PINNED)) {
(void)fprintf(stderr,
"mpool_put: page %d not pinned\n", bp->pgno);
abort();
}
#endif
bp->flags &= ~MPOOL_PINNED;
bp->flags |= flags & MPOOL_DIRTY;
return (RET_SUCCESS);
}
/*
* mpool_close
* Close the buffer pool.
*/
int
mpool_close(MPOOL *mp)
{
BKT *bp;
/* Free up any space allocated to the lru pages. */
while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
free(bp);
}
/* Free the MPOOL cookie. */
free(mp);
return (RET_SUCCESS);
}
/*
* mpool_sync
* Sync the pool to disk.
*/
int
mpool_sync(MPOOL *mp)
{
BKT *bp;
/* Walk the lru chain, flushing any dirty pages to disk. */
for (bp = mp->lqh.cqh_first;
bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
if (bp->flags & MPOOL_DIRTY &&
mpool_write(mp, bp) == RET_ERROR)
return (RET_ERROR);
/* Sync the file descriptor. */
return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
}
/*
* mpool_bkt
* Get a page from the cache (or create one).
*/
static BKT *
mpool_bkt(MPOOL *mp)
{
struct _hqh *head;
BKT *bp;
/* If under the max cached, always create a new page. */
if (mp->curcache < mp->maxcache)
goto new;
/*
* If the cache is max'd out, walk the lru list for a buffer we
* can flush. If we find one, write it (if necessary) and take it
* off any lists. If we don't find anything we grow the cache anyway.
* The cache never shrinks.
*/
for (bp = mp->lqh.cqh_first;
bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
if (!(bp->flags & MPOOL_PINNED)) {
/* Flush if dirty. */
if (bp->flags & MPOOL_DIRTY &&
mpool_write(mp, bp) == RET_ERROR)
return (NULL);
#ifdef STATISTICS
++mp->pageflush;
#endif
/* Remove from the hash and lru queues. */
head = &mp->hqh[HASHKEY(bp->pgno)];
CIRCLEQ_REMOVE(head, bp, hq);
CIRCLEQ_REMOVE(&mp->lqh, bp, q);
#ifdef DEBUG
{
void *spage = bp->page;
(void)memset(bp, 0xff,
(size_t)(sizeof(BKT) + mp->pagesize));
bp->page = spage;
}
#endif
return (bp);
}
new: if ((bp = calloc(1, (size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
return (NULL);
#ifdef STATISTICS
++mp->pagealloc;
#endif
#if defined(DEBUG) || defined(PURIFY)
(void)memset(bp, 0xff, (size_t)(sizeof(BKT) + mp->pagesize));
#endif
bp->page = (char *)(void *)bp + sizeof(BKT);
++mp->curcache;
return (bp);
}
/*
* mpool_write
* Write a page to disk.
*/
static int
mpool_write(MPOOL *mp, BKT *bp)
{
off_t off;
#ifdef STATISTICS
++mp->pagewrite;
#endif
/* Run through the user's filter. */
if (mp->pgout)
(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
off = mp->pagesize * bp->pgno;
if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != (int)mp->pagesize)
return (RET_ERROR);
/*
* Re-run through the input filter since this page may soon be
* accessed via the cache, and whatever the user's output filter
* did may screw things up if we don't let the input filter
* restore the in-core copy.
*/
if (mp->pgin)
(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
bp->flags &= ~MPOOL_DIRTY;
return (RET_SUCCESS);
}
/*
* mpool_look
* Lookup a page in the cache.
*/
static BKT *
mpool_look(MPOOL *mp, pgno_t pgno)
{
struct _hqh *head;
BKT *bp;
head = &mp->hqh[HASHKEY(pgno)];
for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
if (bp->pgno == pgno) {
#ifdef STATISTICS
++mp->cachehit;
#endif
return (bp);
}
#ifdef STATISTICS
++mp->cachemiss;
#endif
return (NULL);
}
#ifdef STATISTICS
/*
* mpool_stat
* Print out cache statistics.
*/
void
mpool_stat(mp)
MPOOL *mp;
{
BKT *bp;
int cnt;
const char *sep;
(void)fprintf(stderr, "%lu pages in the file\n", (u_long)mp->npages);
(void)fprintf(stderr,
"page size %lu, cacheing %lu pages of %lu page max cache\n",
(u_long)mp->pagesize, (u_long)mp->curcache, (u_long)mp->maxcache);
(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
mp->pageput, mp->pageget, mp->pagenew);
(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
mp->pagealloc, mp->pageflush);
if (mp->cachehit + mp->cachemiss)
(void)fprintf(stderr,
"%.0f%% cache hit rate (%lu hits, %lu misses)\n",
((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
* 100, mp->cachehit, mp->cachemiss);
(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
mp->pageread, mp->pagewrite);
sep = "";
cnt = 0;
for (bp = mp->lqh.cqh_first;
bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
(void)fprintf(stderr, "%s%d", sep, bp->pgno);
if (bp->flags & MPOOL_DIRTY)
(void)fprintf(stderr, "d");
if (bp->flags & MPOOL_PINNED)
(void)fprintf(stderr, "P");
if (++cnt == 10) {
sep = "\n";
cnt = 0;
} else
sep = ", ";
}
(void)fprintf(stderr, "\n");
}
#endif

View file

@ -0,0 +1,7 @@
# $NetBSD: Makefile.inc,v 1.5 1996/05/03 21:38:43 cgd Exp $
# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
.PATH: ${.CURDIR}/db/recno
SRCS+= rec_close.c rec_delete.c rec_get.c rec_open.c rec_put.c rec_search.c \
rec_seq.c rec_utils.c

View file

@ -0,0 +1,52 @@
/* $NetBSD: extern.h,v 1.8 2008/08/26 21:18:38 joerg Exp $ */
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)extern.h 8.3 (Berkeley) 6/4/94
*/
#include "../btree/extern.h"
int __rec_close(DB *);
int __rec_delete(const DB *, const DBT *, u_int);
int __rec_dleaf(BTREE *, PAGE *, uint32_t);
int __rec_fd(const DB *);
int __rec_fmap(BTREE *, recno_t);
int __rec_fout(BTREE *);
int __rec_fpipe(BTREE *, recno_t);
int __rec_get(const DB *, const DBT *, DBT *, u_int);
int __rec_iput(BTREE *, recno_t, const DBT *, u_int);
int __rec_put(const DB *dbp, DBT *, const DBT *, u_int);
int __rec_ret(BTREE *, EPG *, recno_t, DBT *, DBT *);
EPG *__rec_search(BTREE *, recno_t, enum SRCHOP);
int __rec_seq(const DB *, DBT *, DBT *, u_int);
int __rec_sync(const DB *, u_int);
int __rec_vmap(BTREE *, recno_t);
int __rec_vout(BTREE *);
int __rec_vpipe(BTREE *, recno_t);

View file

@ -0,0 +1,190 @@
/* $NetBSD: rec_close.c,v 1.15 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: rec_close.c,v 1.15 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <unistd.h>
#include <db.h>
#include "recno.h"
/*
* __REC_CLOSE -- Close a recno tree.
*
* Parameters:
* dbp: pointer to access method
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_close(DB *dbp)
{
BTREE *t;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
if (__rec_sync(dbp, 0) == RET_ERROR)
return (RET_ERROR);
/* Committed to closing. */
status = RET_SUCCESS;
if (F_ISSET(t, R_MEMMAPPED) && munmap(t->bt_smap, t->bt_msize))
status = RET_ERROR;
if (!F_ISSET(t, R_INMEM)) {
if (F_ISSET(t, R_CLOSEFP)) {
if (fclose(t->bt_rfp))
status = RET_ERROR;
} else {
if (close(t->bt_rfd))
status = RET_ERROR;
}
}
if (__bt_close(dbp) == RET_ERROR)
status = RET_ERROR;
return (status);
}
/*
* __REC_SYNC -- sync the recno tree to disk.
*
* Parameters:
* dbp: pointer to access method
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__rec_sync(const DB *dbp, u_int flags)
{
struct iovec iov[2];
BTREE *t;
DBT data, key;
off_t off;
recno_t scursor, trec;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
if (flags == R_RECNOSYNC)
return (__bt_sync(dbp, 0));
if (F_ISSET(t, R_RDONLY | R_INMEM) || !F_ISSET(t, R_MODIFIED))
return (RET_SUCCESS);
/* Read any remaining records into the tree. */
if (!F_ISSET(t, R_EOF) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR)
return (RET_ERROR);
/* Rewind the file descriptor. */
if (lseek(t->bt_rfd, (off_t)0, SEEK_SET) != 0)
return (RET_ERROR);
/* Save the cursor. */
scursor = t->bt_cursor.rcursor;
key.size = sizeof(recno_t);
key.data = &trec;
if (F_ISSET(t, R_FIXLEN)) {
/*
* We assume that fixed length records are all fixed length.
* Any that aren't are either EINVAL'd or corrected by the
* record put code.
*/
status = (dbp->seq)(dbp, &key, &data, R_FIRST);
while (status == RET_SUCCESS) {
if (write(t->bt_rfd, data.data, data.size) !=
(ssize_t) data.size)
return (RET_ERROR);
status = (dbp->seq)(dbp, &key, &data, R_NEXT);
}
} else {
iov[1].iov_base = &t->bt_bval;
iov[1].iov_len = 1;
status = (dbp->seq)(dbp, &key, &data, R_FIRST);
while (status == RET_SUCCESS) {
iov[0].iov_base = data.data;
iov[0].iov_len = data.size;
if (writev(t->bt_rfd, iov, 2) !=
(ssize_t) (data.size + 1))
return (RET_ERROR);
status = (dbp->seq)(dbp, &key, &data, R_NEXT);
}
}
/* Restore the cursor. */
t->bt_cursor.rcursor = scursor;
if (status == RET_ERROR)
return (RET_ERROR);
if ((off = lseek(t->bt_rfd, (off_t)0, SEEK_CUR)) == -1)
return (RET_ERROR);
if (ftruncate(t->bt_rfd, off))
return (RET_ERROR);
F_CLR(t, R_MODIFIED);
return (RET_SUCCESS);
}

View file

@ -0,0 +1,201 @@
/* $NetBSD: rec_delete.c,v 1.17 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: rec_delete.c,v 1.17 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <db.h>
#include "recno.h"
static int rec_rdelete(BTREE *, recno_t);
/*
* __REC_DELETE -- Delete the item(s) referenced by a key.
*
* Parameters:
* dbp: pointer to access method
* key: key to delete
* flags: R_CURSOR if deleting what the cursor references
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
*/
int
__rec_delete(const DB *dbp, const DBT *key, u_int flags)
{
BTREE *t;
recno_t nrec;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
switch(flags) {
case 0:
if ((nrec = *(recno_t *)key->data) == 0)
goto einval;
if (nrec > t->bt_nrecs)
return (RET_SPECIAL);
--nrec;
status = rec_rdelete(t, nrec);
break;
case R_CURSOR:
if (!F_ISSET(&t->bt_cursor, CURS_INIT))
goto einval;
if (t->bt_nrecs == 0)
return (RET_SPECIAL);
status = rec_rdelete(t, t->bt_cursor.rcursor - 1);
if (status == RET_SUCCESS)
--t->bt_cursor.rcursor;
break;
default:
einval: errno = EINVAL;
return (RET_ERROR);
}
if (status == RET_SUCCESS)
F_SET(t, B_MODIFIED | R_MODIFIED);
return (status);
}
/*
* REC_RDELETE -- Delete the data matching the specified key.
*
* Parameters:
* tree: tree
* nrec: record to delete
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
*/
static int
rec_rdelete(BTREE *t, recno_t nrec)
{
EPG *e;
PAGE *h;
int status;
/* Find the record; __rec_search pins the page. */
if ((e = __rec_search(t, nrec, SDELETE)) == NULL)
return (RET_ERROR);
/* Delete the record. */
h = e->page;
status = __rec_dleaf(t, h, (uint32_t)e->index);
if (status != RET_SUCCESS) {
mpool_put(t->bt_mp, h, 0);
return (status);
}
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
/*
* __REC_DLEAF -- Delete a single record from a recno leaf page.
*
* Parameters:
* t: tree
* index: index on current page to delete
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__rec_dleaf(BTREE *t, PAGE *h, uint32_t idx)
{
RLEAF *rl;
indx_t *ip, cnt, offset;
uint32_t nbytes;
char *from;
void *to;
size_t temp;
/*
* Delete a record from a recno leaf page. Internal records are never
* deleted from internal pages, regardless of the records that caused
* them to be added being deleted. Pages made empty by deletion are
* not reclaimed. They are, however, made available for reuse.
*
* Pack the remaining entries at the end of the page, shift the indices
* down, overwriting the deleted record and its index. If the record
* uses overflow pages, make them available for reuse.
*/
to = rl = GETRLEAF(h, idx);
if (rl->flags & P_BIGDATA && __ovfl_delete(t, rl->bytes) == RET_ERROR)
return (RET_ERROR);
nbytes = NRLEAF(rl);
/*
* Compress the key/data pairs. Compress and adjust the [BR]LEAF
* offsets. Reset the headers.
*/
from = (char *)(void *)h + h->upper;
memmove(from + nbytes, from, (size_t)((char *)to - from));
h->upper += nbytes;
offset = h->linp[idx];
temp = &h->linp[idx] - (ip = &h->linp[0]);
_DBFIT(temp, uint16_t);
for (cnt = (uint16_t)temp; cnt--; ++ip)
if (ip[0] < offset)
ip[0] += nbytes;
temp = &h->linp[NEXTINDEX(h)] - ip;
_DBFIT(temp, uint16_t);
for (cnt = (uint16_t)temp; --cnt; ++ip)
ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
h->lower -= sizeof(indx_t);
--t->bt_nrecs;
return (RET_SUCCESS);
}

306
lib/libc/db/recno/rec_get.c Normal file
View file

@ -0,0 +1,306 @@
/* $NetBSD: rec_get.c,v 1.16 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: rec_get.c,v 1.16 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <db.h>
#include "recno.h"
/*
* __REC_GET -- Get a record from the btree.
*
* Parameters:
* dbp: pointer to access method
* key: key to find
* data: data to return
* flag: currently unused
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
*/
int
__rec_get(const DB *dbp, const DBT *key, DBT *data, u_int flags)
{
BTREE *t;
EPG *e;
recno_t nrec;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Get currently doesn't take any flags, and keys of 0 are illegal. */
if (flags || (nrec = *(recno_t *)key->data) == 0) {
errno = EINVAL;
return (RET_ERROR);
}
/*
* If we haven't seen this record yet, try to find it in the
* original file.
*/
if (nrec > t->bt_nrecs) {
if (F_ISSET(t, R_EOF | R_INMEM))
return (RET_SPECIAL);
if ((status = t->bt_irec(t, nrec)) != RET_SUCCESS)
return (status);
}
--nrec;
if ((e = __rec_search(t, nrec, SEARCH)) == NULL)
return (RET_ERROR);
status = __rec_ret(t, e, 0, NULL, data);
if (F_ISSET(t, B_DB_LOCK))
mpool_put(t->bt_mp, e->page, 0);
else
t->bt_pinned = e->page;
return (status);
}
/*
* __REC_FPIPE -- Get fixed length records from a pipe.
*
* Parameters:
* t: tree
* cnt: records to read
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_fpipe(BTREE *t, recno_t top)
{
DBT data;
recno_t nrec;
size_t len;
int ch;
uint8_t *p;
if (t->bt_rdata.size < t->bt_reclen) {
t->bt_rdata.data = t->bt_rdata.data == NULL ?
malloc(t->bt_reclen) :
realloc(t->bt_rdata.data, t->bt_reclen);
if (t->bt_rdata.data == NULL)
return (RET_ERROR);
t->bt_rdata.size = t->bt_reclen;
}
data.data = t->bt_rdata.data;
data.size = t->bt_reclen;
for (nrec = t->bt_nrecs; nrec < top;) {
len = t->bt_reclen;
for (p = t->bt_rdata.data;; *p++ = ch)
if ((ch = getc(t->bt_rfp)) == EOF || !--len) {
if (ch != EOF)
*p = ch;
if (len != 0)
memset(p, t->bt_bval, len);
if (__rec_iput(t,
nrec, &data, 0) != RET_SUCCESS)
return (RET_ERROR);
++nrec;
break;
}
if (ch == EOF)
break;
}
if (nrec < top) {
F_SET(t, R_EOF);
return (RET_SPECIAL);
}
return (RET_SUCCESS);
}
/*
* __REC_VPIPE -- Get variable length records from a pipe.
*
* Parameters:
* t: tree
* cnt: records to read
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_vpipe(BTREE *t, recno_t top)
{
DBT data;
recno_t nrec;
ptrdiff_t len;
size_t sz;
int bval, ch;
uint8_t *p;
bval = t->bt_bval;
for (nrec = t->bt_nrecs; nrec < top; ++nrec) {
for (p = t->bt_rdata.data,
sz = t->bt_rdata.size;; *p++ = ch, --sz) {
if ((ch = getc(t->bt_rfp)) == EOF || ch == bval) {
data.data = t->bt_rdata.data;
data.size = p - (uint8_t *)t->bt_rdata.data;
if (ch == EOF && data.size == 0)
break;
if (__rec_iput(t, nrec, &data, 0)
!= RET_SUCCESS)
return (RET_ERROR);
break;
}
if (sz == 0) {
len = p - (uint8_t *)t->bt_rdata.data;
t->bt_rdata.size += (sz = 256);
t->bt_rdata.data = t->bt_rdata.data == NULL ?
malloc(t->bt_rdata.size) :
realloc(t->bt_rdata.data, t->bt_rdata.size);
if (t->bt_rdata.data == NULL)
return (RET_ERROR);
p = (uint8_t *)t->bt_rdata.data + len;
}
}
if (ch == EOF)
break;
}
if (nrec < top) {
F_SET(t, R_EOF);
return (RET_SPECIAL);
}
return (RET_SUCCESS);
}
/*
* __REC_FMAP -- Get fixed length records from a file.
*
* Parameters:
* t: tree
* cnt: records to read
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_fmap(BTREE *t, recno_t top)
{
DBT data;
recno_t nrec;
uint8_t *sp, *ep, *p;
size_t len;
if (t->bt_rdata.size < t->bt_reclen) {
t->bt_rdata.data = t->bt_rdata.data == NULL ?
malloc(t->bt_reclen) :
realloc(t->bt_rdata.data, t->bt_reclen);
if (t->bt_rdata.data == NULL)
return (RET_ERROR);
t->bt_rdata.size = t->bt_reclen;
}
data.data = t->bt_rdata.data;
data.size = t->bt_reclen;
sp = (uint8_t *)t->bt_cmap;
ep = (uint8_t *)t->bt_emap;
for (nrec = t->bt_nrecs; nrec < top; ++nrec) {
if (sp >= ep) {
F_SET(t, R_EOF);
return (RET_SPECIAL);
}
len = t->bt_reclen;
for (p = t->bt_rdata.data;
sp < ep && len > 0; *p++ = *sp++, --len);
if (len != 0)
memset(p, t->bt_bval, len);
if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS)
return (RET_ERROR);
}
t->bt_cmap = (caddr_t)sp;
return (RET_SUCCESS);
}
/*
* __REC_VMAP -- Get variable length records from a file.
*
* Parameters:
* t: tree
* cnt: records to read
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_vmap(BTREE *t, recno_t top)
{
DBT data;
uint8_t *sp, *ep;
recno_t nrec;
int bval;
sp = (uint8_t *)t->bt_cmap;
ep = (uint8_t *)t->bt_emap;
bval = t->bt_bval;
for (nrec = t->bt_nrecs; nrec < top; ++nrec) {
if (sp >= ep) {
F_SET(t, R_EOF);
return (RET_SPECIAL);
}
for (data.data = sp; sp < ep && *sp != bval; ++sp);
data.size = sp - (uint8_t *)data.data;
if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS)
return (RET_ERROR);
++sp;
}
t->bt_cmap = (caddr_t)sp;
return (RET_SUCCESS);
}

View file

@ -0,0 +1,250 @@
/* $NetBSD: rec_open.c,v 1.17 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: rec_open.c,v 1.17 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stddef.h>
#include <stdio.h>
#include <unistd.h>
#include <db.h>
#include "recno.h"
DB *
__rec_open(const char *fname, int flags, mode_t mode, const RECNOINFO *openinfo,
int dflags)
{
BTREE *t;
BTREEINFO btopeninfo;
DB *dbp;
PAGE *h;
struct stat sb;
int rfd = -1; /* pacify gcc */
int sverrno;
dbp = NULL;
/* Open the user's file -- if this fails, we're done. */
if (fname != NULL) {
if ((rfd = open(fname, flags, mode)) == -1)
return (NULL);
if (fcntl(rfd, F_SETFD, FD_CLOEXEC) == -1)
goto err;
}
/* Create a btree in memory (backed by disk). */
if (openinfo) {
if (openinfo->flags & ~(R_FIXEDLEN | R_NOKEY | R_SNAPSHOT))
goto einval;
btopeninfo.flags = 0;
btopeninfo.cachesize = openinfo->cachesize;
btopeninfo.maxkeypage = 0;
btopeninfo.minkeypage = 0;
btopeninfo.psize = openinfo->psize;
btopeninfo.compare = NULL;
btopeninfo.prefix = NULL;
btopeninfo.lorder = openinfo->lorder;
dbp = __bt_open(openinfo->bfname,
O_RDWR, S_IRUSR | S_IWUSR, &btopeninfo, dflags);
} else
dbp = __bt_open(NULL, O_RDWR, S_IRUSR | S_IWUSR, NULL, dflags);
if (dbp == NULL)
goto err;
/*
* Some fields in the tree structure are recno specific. Fill them
* in and make the btree structure look like a recno structure. We
* don't change the bt_ovflsize value, it's close enough and slightly
* bigger.
*/
t = dbp->internal;
if (openinfo) {
if (openinfo->flags & R_FIXEDLEN) {
F_SET(t, R_FIXLEN);
t->bt_reclen = openinfo->reclen;
if (t->bt_reclen == 0)
goto einval;
}
t->bt_bval = openinfo->bval;
} else
t->bt_bval = '\n';
F_SET(t, R_RECNO);
if (fname == NULL)
F_SET(t, R_EOF | R_INMEM);
else
t->bt_rfd = rfd;
if (fname != NULL) {
/*
* In 4.4BSD, stat(2) returns true for ISSOCK on pipes.
* Unfortunately, that's not portable, so we use lseek
* and check the errno values.
*/
errno = 0;
if (lseek(rfd, (off_t)0, SEEK_CUR) == -1 && errno == ESPIPE) {
switch (flags & O_ACCMODE) {
case O_RDONLY:
F_SET(t, R_RDONLY);
break;
default:
goto einval;
}
slow: if ((t->bt_rfp = fdopen(rfd, "r")) == NULL)
goto err;
F_SET(t, R_CLOSEFP);
t->bt_irec =
F_ISSET(t, R_FIXLEN) ? __rec_fpipe : __rec_vpipe;
} else {
switch (flags & O_ACCMODE) {
case O_RDONLY:
F_SET(t, R_RDONLY);
break;
case O_RDWR:
break;
default:
goto einval;
}
if (fstat(rfd, &sb))
goto err;
/*
* Kluge -- we'd like to test to see if the file is too
* big to mmap. Since, we don't know what size or type
* off_t's or size_t's are, what the largest unsigned
* integral type is, or what random insanity the local
* C compiler will perpetrate, doing the comparison in
* a portable way is flatly impossible. Hope that mmap
* fails if the file is too large.
*/
if (sb.st_size == 0)
F_SET(t, R_EOF);
else {
#ifdef MMAP_NOT_AVAILABLE
/*
* XXX
* Mmap doesn't work correctly on many current
* systems. In particular, it can fail subtly,
* with cache coherency problems. Don't use it
* for now.
*/
t->bt_msize = sb.st_size;
if ((t->bt_smap = mmap(NULL, t->bt_msize,
PROT_READ, MAP_FILE | MAP_PRIVATE, rfd,
(off_t)0)) == (caddr_t)-1)
goto slow;
t->bt_cmap = t->bt_smap;
t->bt_emap = t->bt_smap + sb.st_size;
t->bt_irec = F_ISSET(t, R_FIXLEN) ?
__rec_fmap : __rec_vmap;
F_SET(t, R_MEMMAPPED);
#else
goto slow;
#endif
}
}
}
/* Use the recno routines. */
dbp->close = __rec_close;
dbp->del = __rec_delete;
dbp->fd = __rec_fd;
dbp->get = __rec_get;
dbp->put = __rec_put;
dbp->seq = __rec_seq;
dbp->sync = __rec_sync;
/* If the root page was created, reset the flags. */
if ((h = mpool_get(t->bt_mp, P_ROOT, 0)) == NULL)
goto err;
if ((h->flags & P_TYPE) == P_BLEAF) {
F_CLR(h, P_TYPE);
F_SET(h, P_RLEAF);
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
} else
mpool_put(t->bt_mp, h, 0);
if (openinfo && openinfo->flags & R_SNAPSHOT &&
!F_ISSET(t, R_EOF | R_INMEM) &&
t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR)
goto err;
return (dbp);
einval: errno = EINVAL;
err: sverrno = errno;
if (dbp != NULL)
(void)__bt_close(dbp);
if (fname != NULL)
(void)close(rfd);
errno = sverrno;
return (NULL);
}
int
__rec_fd(const DB *dbp)
{
BTREE *t;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* In-memory database can't have a file descriptor. */
if (F_ISSET(t, R_INMEM)) {
errno = ENOENT;
return (-1);
}
return (t->bt_rfd);
}

282
lib/libc/db/recno/rec_put.c Normal file
View file

@ -0,0 +1,282 @@
/* $NetBSD: rec_put.c,v 1.17 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: rec_put.c,v 1.17 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "recno.h"
/*
* __REC_PUT -- Add a recno item to the tree.
*
* Parameters:
* dbp: pointer to access method
* key: key
* data: data
* flag: R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is
* already in the tree and R_NOOVERWRITE specified.
*/
int
__rec_put(const DB *dbp, DBT *key, const DBT *data, u_int flags)
{
BTREE *t;
DBT fdata, tdata;
recno_t nrec;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/*
* If using fixed-length records, and the record is long, return
* EINVAL. If it's short, pad it out. Use the record data return
* memory, it's only short-term.
*/
if (F_ISSET(t, R_FIXLEN) && data->size != t->bt_reclen) {
if (data->size > t->bt_reclen)
goto einval;
if (t->bt_rdata.size < t->bt_reclen) {
t->bt_rdata.data = t->bt_rdata.data == NULL ?
malloc(t->bt_reclen) :
realloc(t->bt_rdata.data, t->bt_reclen);
if (t->bt_rdata.data == NULL)
return (RET_ERROR);
t->bt_rdata.size = t->bt_reclen;
}
memmove(t->bt_rdata.data, data->data, data->size);
memset((char *)t->bt_rdata.data + data->size,
t->bt_bval, t->bt_reclen - data->size);
fdata.data = t->bt_rdata.data;
fdata.size = t->bt_reclen;
} else {
fdata.data = data->data;
fdata.size = data->size;
}
switch (flags) {
case R_CURSOR:
if (!F_ISSET(&t->bt_cursor, CURS_INIT))
goto einval;
nrec = t->bt_cursor.rcursor;
break;
case R_SETCURSOR:
if ((nrec = *(recno_t *)key->data) == 0)
goto einval;
break;
case R_IAFTER:
if ((nrec = *(recno_t *)key->data) == 0) {
nrec = 1;
flags = R_IBEFORE;
}
break;
case 0:
case R_IBEFORE:
if ((nrec = *(recno_t *)key->data) == 0)
goto einval;
break;
case R_NOOVERWRITE:
if ((nrec = *(recno_t *)key->data) == 0)
goto einval;
if (nrec <= t->bt_nrecs)
return (RET_SPECIAL);
break;
default:
einval: errno = EINVAL;
return (RET_ERROR);
}
/*
* Make sure that records up to and including the put record are
* already in the database. If skipping records, create empty ones.
*/
if (nrec > t->bt_nrecs) {
if (!F_ISSET(t, R_EOF | R_INMEM) &&
t->bt_irec(t, nrec) == RET_ERROR)
return (RET_ERROR);
if (nrec > t->bt_nrecs + 1) {
if (F_ISSET(t, R_FIXLEN)) {
if ((tdata.data =
(void *)malloc(t->bt_reclen)) == NULL)
return (RET_ERROR);
tdata.size = t->bt_reclen;
memset(tdata.data, t->bt_bval, tdata.size);
} else {
tdata.data = NULL;
tdata.size = 0;
}
while (nrec > t->bt_nrecs + 1)
if (__rec_iput(t,
t->bt_nrecs, &tdata, 0) != RET_SUCCESS)
return (RET_ERROR);
if (F_ISSET(t, R_FIXLEN))
free(tdata.data);
}
}
if ((status = __rec_iput(t, nrec - 1, &fdata, flags)) != RET_SUCCESS)
return (status);
if (flags == R_SETCURSOR)
t->bt_cursor.rcursor = nrec;
F_SET(t, R_MODIFIED);
return (__rec_ret(t, NULL, nrec, key, NULL));
}
/*
* __REC_IPUT -- Add a recno item to the tree.
*
* Parameters:
* t: tree
* nrec: record number
* data: data
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_iput(BTREE *t, recno_t nrec, const DBT *data, u_int flags)
{
DBT tdata;
EPG *e;
PAGE *h;
indx_t idx, nxtindex;
pgno_t pg;
uint32_t nbytes;
int dflags, status;
char *dest, db[NOVFLSIZE];
/*
* If the data won't fit on a page, store it on indirect pages.
*
* XXX
* If the insert fails later on, these pages aren't recovered.
*/
if (data->size > t->bt_ovflsize) {
if (__ovfl_put(t, data, &pg) == RET_ERROR)
return (RET_ERROR);
tdata.data = db;
tdata.size = NOVFLSIZE;
*(pgno_t *)(void *)db = pg;
_DBFIT(data->size, uint32_t);
*(uint32_t *)(void *)(db + sizeof(pgno_t)) =
(uint32_t)data->size;
dflags = P_BIGDATA;
data = &tdata;
} else
dflags = 0;
/* __rec_search pins the returned page. */
if ((e = __rec_search(t, nrec,
nrec > t->bt_nrecs || flags == R_IAFTER || flags == R_IBEFORE ?
SINSERT : SEARCH)) == NULL)
return (RET_ERROR);
h = e->page;
idx = e->index;
/*
* Add the specified key/data pair to the tree. The R_IAFTER and
* R_IBEFORE flags insert the key after/before the specified key.
*
* Pages are split as required.
*/
switch (flags) {
case R_IAFTER:
++idx;
break;
case R_IBEFORE:
break;
default:
if (nrec < t->bt_nrecs &&
__rec_dleaf(t, h, (uint32_t)idx) == RET_ERROR) {
mpool_put(t->bt_mp, h, 0);
return (RET_ERROR);
}
break;
}
/*
* If not enough room, split the page. The split code will insert
* the key and data and unpin the current page. If inserting into
* the offset array, shift the pointers up.
*/
nbytes = NRLEAFDBT(data->size);
if ((uint32_t) (h->upper - h->lower) < nbytes + sizeof(indx_t)) {
status = __bt_split(t, h, NULL, data, dflags, nbytes,
(uint32_t)idx);
if (status == RET_SUCCESS)
++t->bt_nrecs;
return (status);
}
if (idx < (nxtindex = NEXTINDEX(h)))
memmove(h->linp + idx + 1, h->linp + idx,
(nxtindex - idx) * sizeof(indx_t));
h->lower += sizeof(indx_t);
h->linp[idx] = h->upper -= nbytes;
dest = (char *)(void *)h + h->upper;
WR_RLEAF(dest, data, dflags);
++t->bt_nrecs;
F_SET(t, B_MODIFIED);
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}

View file

@ -0,0 +1,130 @@
/* $NetBSD: rec_search.c,v 1.14 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: rec_search.c,v 1.14 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <db.h>
#include "recno.h"
/*
* __REC_SEARCH -- Search a btree for a key.
*
* Parameters:
* t: tree to search
* recno: key to find
* op: search operation
*
* Returns:
* EPG for matching record, if any, or the EPG for the location of the
* key, if it were inserted into the tree.
*
* Returns:
* The EPG for matching record, if any, or the EPG for the location
* of the key, if it were inserted into the tree, is entered into
* the bt_cur field of the tree. A pointer to the field is returned.
*/
EPG *
__rec_search(BTREE *t, recno_t recno, enum SRCHOP op)
{
indx_t idx;
PAGE *h;
EPGNO *parent;
RINTERNAL *r;
pgno_t pg;
indx_t top;
recno_t total;
int sverrno;
BT_CLR(t);
for (pg = P_ROOT, total = 0;;) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
goto err;
if (h->flags & P_RLEAF) {
t->bt_cur.page = h;
t->bt_cur.index = recno - total;
return (&t->bt_cur);
}
for (idx = 0, top = NEXTINDEX(h);;) {
r = GETRINTERNAL(h, idx);
if (++idx == top || total + r->nrecs > recno)
break;
total += r->nrecs;
}
BT_PUSH(t, pg, idx - 1);
pg = r->pgno;
switch (op) {
case SDELETE:
--GETRINTERNAL(h, (idx - 1))->nrecs;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
break;
case SINSERT:
++GETRINTERNAL(h, (idx - 1))->nrecs;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
break;
case SEARCH:
mpool_put(t->bt_mp, h, 0);
break;
}
}
/* Try and recover the tree. */
err: sverrno = errno;
if (op != SEARCH)
while ((parent = BT_POP(t)) != NULL) {
if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
break;
if (op == SINSERT)
--GETRINTERNAL(h, parent->index)->nrecs;
else
++GETRINTERNAL(h, parent->index)->nrecs;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
}
errno = sverrno;
return (NULL);
}

135
lib/libc/db/recno/rec_seq.c Normal file
View file

@ -0,0 +1,135 @@
/* $NetBSD: rec_seq.c,v 1.14 2008/09/11 12:58:00 joerg Exp $ */
/*-
* Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: rec_seq.c,v 1.14 2008/09/11 12:58:00 joerg Exp $");
#endif
#ifndef __minix
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <db.h>
#include "recno.h"
/*
* __REC_SEQ -- Recno sequential scan interface.
*
* Parameters:
* dbp: pointer to access method
* key: key for positioning and return value
* data: data return value
* flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
*
* Returns:
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
*/
int
__rec_seq(const DB *dbp, DBT *key, DBT *data, u_int flags)
{
BTREE *t;
EPG *e;
recno_t nrec;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
switch(flags) {
case R_CURSOR:
if ((nrec = *(recno_t *)key->data) == 0)
goto einval;
break;
case R_NEXT:
if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
nrec = t->bt_cursor.rcursor + 1;
break;
}
/* FALLTHROUGH */
case R_FIRST:
nrec = 1;
break;
case R_PREV:
if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
if ((nrec = t->bt_cursor.rcursor - 1) == 0)
return (RET_SPECIAL);
break;
}
/* FALLTHROUGH */
case R_LAST:
if (!F_ISSET(t, R_EOF | R_INMEM) &&
t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR)
return (RET_ERROR);
nrec = t->bt_nrecs;
break;
default:
einval: errno = EINVAL;
return (RET_ERROR);
}
if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) {
if (!F_ISSET(t, R_EOF | R_INMEM) &&
(status = t->bt_irec(t, nrec)) != RET_SUCCESS)
return (status);
if (t->bt_nrecs == 0 || nrec > t->bt_nrecs)
return (RET_SPECIAL);
}
if ((e = __rec_search(t, nrec - 1, SEARCH)) == NULL)
return (RET_ERROR);
F_SET(&t->bt_cursor, CURS_INIT);
t->bt_cursor.rcursor = nrec;
status = __rec_ret(t, e, nrec, key, data);
if (F_ISSET(t, B_DB_LOCK))
mpool_put(t->bt_mp, e->page, 0);
else
t->bt_pinned = e->page;
return (status);
}

View file

@ -0,0 +1,122 @@
/* $NetBSD: rec_utils.c,v 1.12 2008/09/10 17:52:36 joerg Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef __minix
__RCSID("$NetBSD: rec_utils.c,v 1.12 2008/09/10 17:52:36 joerg Exp $");
#endif
#include <sys/param.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "recno.h"
/*
* __rec_ret --
* Build return data.
*
* Parameters:
* t: tree
* e: key/data pair to be returned
* nrec: record number
* key: user's key structure
* data: user's data structure
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__rec_ret(BTREE *t, EPG *e, recno_t nrec, DBT *key, DBT *data)
{
RLEAF *rl;
void *p;
if (key == NULL)
goto dataonly;
/* We have to copy the key, it's not on the page. */
if (sizeof(recno_t) > t->bt_rkey.size) {
p = (void *)(t->bt_rkey.data == NULL ?
malloc(sizeof(recno_t)) :
realloc(t->bt_rkey.data, sizeof(recno_t)));
if (p == NULL)
return (RET_ERROR);
t->bt_rkey.data = p;
t->bt_rkey.size = sizeof(recno_t);
}
memmove(t->bt_rkey.data, &nrec, sizeof(recno_t));
key->size = sizeof(recno_t);
key->data = t->bt_rkey.data;
dataonly:
if (data == NULL)
return (RET_SUCCESS);
/*
* We must copy big keys/data to make them contigous. Otherwise,
* leave the page pinned and don't copy unless the user specified
* concurrent access.
*/
rl = GETRLEAF(e->page, e->index);
if (rl->flags & P_BIGDATA) {
if (__ovfl_get(t, rl->bytes,
&data->size, &t->bt_rdata.data, &t->bt_rdata.size))
return (RET_ERROR);
data->data = t->bt_rdata.data;
} else if (F_ISSET(t, B_DB_LOCK)) {
/* Use +1 in case the first record retrieved is 0 length. */
if (rl->dsize + 1 > t->bt_rdata.size) {
p = (void *)(t->bt_rdata.data == NULL ?
malloc(rl->dsize + 1) :
realloc(t->bt_rdata.data, rl->dsize + 1));
if (p == NULL)
return (RET_ERROR);
t->bt_rdata.data = p;
t->bt_rdata.size = rl->dsize + 1;
}
memmove(t->bt_rdata.data, rl->bytes, rl->dsize);
data->size = rl->dsize;
data->data = t->bt_rdata.data;
} else {
data->size = rl->dsize;
data->data = rl->bytes;
}
return (RET_SUCCESS);
}

37
lib/libc/db/recno/recno.h Normal file
View file

@ -0,0 +1,37 @@
/* $NetBSD: recno.h,v 1.6 2003/08/07 16:42:44 agc Exp $ */
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)recno.h 8.1 (Berkeley) 6/4/93
*/
enum SRCHOP { SDELETE, SINSERT, SEARCH}; /* Rec_search operation. */
#include "../btree/btree.h"
#include "extern.h"