diff --git a/distrib/sets/lists/minix/mi b/distrib/sets/lists/minix/mi index 66cba42c3..60ef01fb9 100644 --- a/distrib/sets/lists/minix/mi +++ b/distrib/sets/lists/minix/mi @@ -298,6 +298,7 @@ ./usr/bin/mktemp minix-sys ./usr/bin/more minix-sys ./usr/bin/mt minix-sys +./usr/bin/nbperf minix-sys ./usr/bin/newgrp minix-sys ./usr/bin/nice minix-sys ./usr/bin/nohup minix-sys @@ -1316,6 +1317,7 @@ ./usr/man/man1/mount.1 minix-sys ./usr/man/man1/mt.1 minix-sys ./usr/man/man1/mv.1 minix-sys +./usr/man/man1/nbperf.1 minix-sys ./usr/man/man1/newgrp.1 minix-sys ./usr/man/man1/nice.1 minix-sys ./usr/man/man1/nm.1 minix-sys diff --git a/releasetools/nbsd_ports b/releasetools/nbsd_ports index 3f558b623..285e5d06a 100644 --- a/releasetools/nbsd_ports +++ b/releasetools/nbsd_ports @@ -63,6 +63,7 @@ 2012/10/17 12:00:00,tools/mkheaderlist.sh 2012/10/17 12:00:00,tools/mpc 2012/10/17 12:00:00,tools/mpfr +2012/10/17 12:00:00,tools/nbperf 2012/10/17 12:00:00,tools/sed 2012/10/17 12:00:00,tools/tic 2012/10/17 12:00:00,usr.bin/gzip/Makefile @@ -70,6 +71,7 @@ 2012/10/17 12:00:00,usr.bin/lorder 2012/10/17 12:00:00,usr.bin/Makefile 2012/10/17 12:00:00,usr.bin/Makefile.inc +2012/10/17 12:00:00,usr.bin/nbperf 2012/10/17 12:00:00,usr.bin/passwd/Makefile 2012/10/17 12:00:00,usr.bin/sort 2012/10/17 12:00:00,usr.bin/xinstall diff --git a/tools/Makefile b/tools/Makefile index 9eabe1902..d43d89b5b 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -61,7 +61,7 @@ LINT_BITS= lint lint2 SUBDIR= host-mkdep .WAIT compat .WAIT \ binstall .WAIT mktemp .WAIT sed .WAIT \ genassym \ - lorder makewhatis mkdep mtree .WAIT \ + lorder makewhatis mkdep mtree nbperf .WAIT \ m4 \ .WAIT mkfs.mfs \ .WAIT yacc \ diff --git a/tools/nbperf/Makefile b/tools/nbperf/Makefile new file mode 100644 index 000000000..aa76fb1dd --- /dev/null +++ b/tools/nbperf/Makefile @@ -0,0 +1,6 @@ +# $NetBSD: Makefile,v 1.3 2011/11/01 21:16:29 joerg Exp $ + +HOSTPROGNAME= ${_TOOL_PREFIX}perf +HOST_SRCDIR= usr.bin/nbperf + +.include "${.CURDIR}/../Makefile.host" diff --git a/usr.bin/Makefile b/usr.bin/Makefile index 174191110..69085dde3 100644 --- a/usr.bin/Makefile +++ b/usr.bin/Makefile @@ -15,7 +15,7 @@ SUBDIR= \ login lorder m4 \ make man \ mkdep mktemp \ - newgrp \ + nbperf newgrp \ passwd \ sed seq \ sort stat su \ diff --git a/usr.bin/nbperf/Makefile b/usr.bin/nbperf/Makefile new file mode 100644 index 000000000..e777472a3 --- /dev/null +++ b/usr.bin/nbperf/Makefile @@ -0,0 +1,8 @@ +# $NetBSD: Makefile,v 1.1 2009/08/15 16:21:04 joerg Exp $ + +PROG= nbperf +SRCS= nbperf.c +SRCS+= nbperf-bdz.c nbperf-chm.c nbperf-chm3.c +SRCS+= graph2.c graph3.c + +.include diff --git a/usr.bin/nbperf/graph2.c b/usr.bin/nbperf/graph2.c new file mode 100644 index 000000000..2c32834bf --- /dev/null +++ b/usr.bin/nbperf/graph2.c @@ -0,0 +1,211 @@ +/* $NetBSD: graph2.c,v 1.4 2011/10/21 23:47:11 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: graph2.c,v 1.4 2011/10/21 23:47:11 joerg Exp $"); + +#include +#include +#include +#include +#include + +#include "nbperf.h" +#include "graph2.h" + +static const uint32_t unused = 0xffffffffU; + +void +graph2_setup(struct graph2 *graph, uint32_t v, uint32_t e) +{ + graph->v = v; + graph->e = e; + + graph->verts = calloc(sizeof(struct vertex2), v); + graph->edges = calloc(sizeof(struct edge2), e); + graph->output_order = calloc(sizeof(uint32_t), e); + + if (graph->verts == NULL || graph->edges == NULL || + graph->output_order == NULL) + err(1, "malloc failed"); +} + +void +graph2_free(struct graph2 *graph) +{ + free(graph->verts); + free(graph->edges); + free(graph->output_order); + + graph->verts = NULL; + graph->edges = NULL; + graph->output_order = NULL; +} + +static int +graph2_check_duplicates(struct nbperf *nbperf, struct graph2 *graph) +{ + struct vertex2 *v; + struct edge2 *e, *e2; + uint32_t i, j; + + for (i = 0; i < graph->e; ++i) { + e = &graph->edges[i]; + v = &graph->verts[e->left]; + j = v->l_edge; + e2 = &graph->edges[j]; + for (;;) { + if (i < j && e->right == e2->right && + nbperf->keylens[i] == nbperf->keylens[j] && + memcmp(nbperf->keys[i], nbperf->keys[j], + nbperf->keylens[i]) == 0) { + nbperf->has_duplicates = 1; + return -1; + } + if (e2->l_next == unused) + break; + j = e2->l_next; + e2 = &graph->edges[j]; + } + } + return 0; +} + +int +graph2_hash(struct nbperf *nbperf, struct graph2 *graph) +{ + struct vertex2 *v; + uint32_t hashes[NBPERF_MAX_HASH_SIZE]; + size_t i; + + for (i = 0; i < graph->e; ++i) { + (*nbperf->compute_hash)(nbperf, + nbperf->keys[i], nbperf->keylens[i], hashes); + graph->edges[i].left = hashes[0] % graph->v; + graph->edges[i].right = hashes[1] % graph->v; + if (graph->edges[i].left == graph->edges[i].right) + return -1; + } + + for (i = 0; i < graph->v; ++i) { + graph->verts[i].l_edge = unused; + graph->verts[i].r_edge = unused; + } + + for (i = 0; i < graph->e; ++i) { + v = &graph->verts[graph->edges[i].left]; + if (v->l_edge != unused) + graph->edges[v->l_edge].l_prev = i; + graph->edges[i].l_next = v->l_edge; + graph->edges[i].l_prev = unused; + v->l_edge = i; + + v = &graph->verts[graph->edges[i].right]; + if (v->r_edge != unused) + graph->edges[v->r_edge].r_prev = i; + graph->edges[i].r_next = v->r_edge; + graph->edges[i].r_prev = unused; + v->r_edge = i; + } + + if (nbperf->first_round) { + nbperf->first_round = 0; + return graph2_check_duplicates(nbperf, graph); + } + + return 0; +} + +static void +graph2_remove_vertex(struct graph2 *graph, struct vertex2 *v) +{ + struct edge2 *e; + struct vertex2 *v2; + + for (;;) { + if (v->l_edge != unused && v->r_edge != unused) + break; + if (v->l_edge == unused && v->r_edge == unused) + break; + + if (v->l_edge != unused) { + e = &graph->edges[v->l_edge]; + if (e->l_next != unused) + break; + v->l_edge = unused; /* No other elements possible! */ + v2 = &graph->verts[e->right]; + if (e->r_prev == unused) + v2->r_edge = e->r_next; + else + graph->edges[e->r_prev].r_next = e->r_next; + if (e->r_next != unused) + graph->edges[e->r_next].r_prev = e->r_prev; + v = v2; + } else { + e = &graph->edges[v->r_edge]; + if (e->r_next != unused) + break; + v->r_edge = unused; /* No other elements possible! */ + v2 = &graph->verts[e->left]; + if (e->l_prev == unused) + v2->l_edge = e->l_next; + else + graph->edges[e->l_prev].l_next = e->l_next; + if (e->l_next != unused) + graph->edges[e->l_next].l_prev = e->l_prev; + v = v2; + } + + graph->output_order[--graph->output_index] = e - graph->edges; + } +} + +int +graph2_output_order(struct graph2 *graph) +{ + size_t i; + + graph->output_index = graph->e; + + for (i = 0; i < graph->v; ++i) + graph2_remove_vertex(graph, &graph->verts[i]); + + if (graph->output_index != 0) + return -1; + + return 0; +} diff --git a/usr.bin/nbperf/graph2.h b/usr.bin/nbperf/graph2.h new file mode 100644 index 000000000..29ba7753e --- /dev/null +++ b/usr.bin/nbperf/graph2.h @@ -0,0 +1,63 @@ +/* $NetBSD: graph2.h,v 1.1 2009/08/15 16:21:05 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Implementation of common 2-graph routines: + * - build a 2-graph with hash-pairs as edges + * - check a 2-graph for acyclicness and compute an output order + */ + +struct vertex2 { + uint32_t l_edge, r_edge; +}; + +struct edge2 { + uint32_t left, right; + uint32_t l_prev, l_next; + uint32_t r_prev, r_next; +}; + +struct graph2 { + struct vertex2 *verts; + struct edge2 *edges; + uint32_t output_index; + uint32_t *output_order; + uint8_t *visited; + uint32_t e, v; +}; + +void graph2_setup(struct graph2 *, uint32_t, uint32_t); +void graph2_free(struct graph2 *); + +int graph2_hash(struct nbperf *, struct graph2 *); +int graph2_output_order(struct graph2 *graph); diff --git a/usr.bin/nbperf/graph3.c b/usr.bin/nbperf/graph3.c new file mode 100644 index 000000000..df14fe564 --- /dev/null +++ b/usr.bin/nbperf/graph3.c @@ -0,0 +1,250 @@ +/* $NetBSD: graph3.c,v 1.4 2011/10/21 23:47:11 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: graph3.c,v 1.4 2011/10/21 23:47:11 joerg Exp $"); + +#include +#include +#include +#include +#include + +#include "nbperf.h" +#include "graph3.h" + +static const uint32_t unused = 0xffffffffU; + +void +graph3_setup(struct graph3 *graph, uint32_t v, uint32_t e) +{ + graph->v = v; + graph->e = e; + + graph->verts = calloc(sizeof(struct vertex3), v); + graph->edges = calloc(sizeof(struct edge3), e); + graph->output_order = calloc(sizeof(uint32_t), e); + + if (graph->verts == NULL || graph->edges == NULL || + graph->output_order == NULL) + err(1, "malloc failed"); +} + +void +graph3_free(struct graph3 *graph) +{ + free(graph->verts); + free(graph->edges); + free(graph->output_order); + + graph->verts = NULL; + graph->edges = NULL; + graph->output_order = NULL; +} + +static int +graph3_check_duplicates(struct nbperf *nbperf, struct graph3 *graph) +{ + struct vertex3 *v; + struct edge3 *e, *e2; + uint32_t i, j; + + for (i = 0; i < graph->e; ++i) { + e = &graph->edges[i]; + v = &graph->verts[e->left]; + j = v->l_edge; + e2 = &graph->edges[j]; + for (;;) { + if (i < j && e->middle == e2->middle && + e->right == e2->right && + nbperf->keylens[i] == nbperf->keylens[j] && + memcmp(nbperf->keys[i], nbperf->keys[j], + nbperf->keylens[i]) == 0) { + nbperf->has_duplicates = 1; + return -1; + } + if (e2->l_next == unused) + break; + j = e2->l_next; + e2 = &graph->edges[j]; + } + } + return 0; +} + +int +graph3_hash(struct nbperf *nbperf, struct graph3 *graph) +{ + struct vertex3 *v; + uint32_t hashes[NBPERF_MAX_HASH_SIZE]; + size_t i; + + for (i = 0; i < graph->e; ++i) { + (*nbperf->compute_hash)(nbperf, + nbperf->keys[i], nbperf->keylens[i], hashes); + graph->edges[i].left = hashes[0] % graph->v; + graph->edges[i].middle = hashes[1] % graph->v; + graph->edges[i].right = hashes[2] % graph->v; + if (graph->edges[i].left == graph->edges[i].middle) + return -1; + if (graph->edges[i].left == graph->edges[i].right) + return -1; + if (graph->edges[i].middle == graph->edges[i].right) + return -1; + } + + for (i = 0; i < graph->v; ++i) { + graph->verts[i].l_edge = unused; + graph->verts[i].m_edge = unused; + graph->verts[i].r_edge = unused; + } + + for (i = 0; i < graph->e; ++i) { + v = &graph->verts[graph->edges[i].left]; + if (v->l_edge != unused) + graph->edges[v->l_edge].l_prev = i; + graph->edges[i].l_next = v->l_edge; + graph->edges[i].l_prev = unused; + v->l_edge = i; + + v = &graph->verts[graph->edges[i].middle]; + if (v->m_edge != unused) + graph->edges[v->m_edge].m_prev = i; + graph->edges[i].m_next = v->m_edge; + graph->edges[i].m_prev = unused; + v->m_edge = i; + + v = &graph->verts[graph->edges[i].right]; + if (v->r_edge != unused) + graph->edges[v->r_edge].r_prev = i; + graph->edges[i].r_next = v->r_edge; + graph->edges[i].r_prev = unused; + v->r_edge = i; + } + + if (nbperf->first_round) { + nbperf->first_round = 0; + return graph3_check_duplicates(nbperf, graph); + } + + return 0; +} + +static void +graph3_remove_vertex(struct graph3 *graph, struct vertex3 *v) +{ + struct edge3 *e; + struct vertex3 *vl, *vm, *vr; + + if (v->l_edge != unused && v->m_edge != unused) + return; + if (v->l_edge != unused && v->r_edge != unused) + return; + if (v->m_edge != unused && v->r_edge != unused) + return; + if (v->l_edge == unused && v->m_edge == unused && v->r_edge == unused) + return; + + if (v->l_edge != unused) { + e = &graph->edges[v->l_edge]; + if (e->l_next != unused) + return; + } else if (v->m_edge != unused) { + e = &graph->edges[v->m_edge]; + if (e->m_next != unused) + return; + } else { + if (v->r_edge == unused) + abort(); + e = &graph->edges[v->r_edge]; + if (e->r_next != unused) + return; + } + + graph->output_order[--graph->output_index] = e - graph->edges; + + vl = &graph->verts[e->left]; + vm = &graph->verts[e->middle]; + vr = &graph->verts[e->right]; + + if (e->l_prev == unused) + vl->l_edge = e->l_next; + else + graph->edges[e->l_prev].l_next = e->l_next; + if (e->l_next != unused) + graph->edges[e->l_next].l_prev = e->l_prev; + + if (e->m_prev == unused) + vm->m_edge = e->m_next; + else + graph->edges[e->m_prev].m_next = e->m_next; + if (e->m_next != unused) + graph->edges[e->m_next].m_prev = e->m_prev; + + if (e->r_prev == unused) + vr->r_edge = e->r_next; + else + graph->edges[e->r_prev].r_next = e->r_next; + if (e->r_next != unused) + graph->edges[e->r_next].r_prev = e->r_prev; +} + +int +graph3_output_order(struct graph3 *graph) +{ + struct edge3 *e; + size_t i; + + graph->output_index = graph->e; + + for (i = 0; i < graph->v; ++i) + graph3_remove_vertex(graph, &graph->verts[i]); + + for (i = graph->e; i > 0 && i > graph->output_index;) { + --i; + e = &graph->edges[graph->output_order[i]]; + + graph3_remove_vertex(graph, &graph->verts[e->left]); + graph3_remove_vertex(graph, &graph->verts[e->middle]); + graph3_remove_vertex(graph, &graph->verts[e->right]); + } + + if (graph->output_index != 0) + return -1; + + return 0; +} diff --git a/usr.bin/nbperf/graph3.h b/usr.bin/nbperf/graph3.h new file mode 100644 index 000000000..ce6482df2 --- /dev/null +++ b/usr.bin/nbperf/graph3.h @@ -0,0 +1,62 @@ +/* $NetBSD: graph3.h,v 1.1 2009/08/15 16:21:05 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Implementation of common 3-graph routines: + * - build a 3-graph with hash-triple as edges + * - check a 3-graph for acyclicness and compute an output order + */ + +struct vertex3 { + uint32_t l_edge, m_edge, r_edge; +}; + +struct edge3 { + uint32_t left, middle, right; + uint32_t l_prev, m_prev, l_next; + uint32_t r_prev, m_next, r_next; +}; + +struct graph3 { + struct vertex3 *verts; + struct edge3 *edges; + uint32_t output_index; + uint32_t *output_order; + uint32_t e, v; +}; + +void graph3_setup(struct graph3 *, uint32_t, uint32_t); +void graph3_free(struct graph3 *); + +int graph3_hash(struct nbperf *, struct graph3 *); +int graph3_output_order(struct graph3 *); diff --git a/usr.bin/nbperf/nbperf-bdz.c b/usr.bin/nbperf/nbperf-bdz.c new file mode 100644 index 000000000..726c180c3 --- /dev/null +++ b/usr.bin/nbperf/nbperf-bdz.c @@ -0,0 +1,307 @@ +/* $NetBSD: nbperf-bdz.c,v 1.5 2012/09/25 20:53:46 joerg Exp $ */ +/*- + * Copyright (c) 2009, 2012 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: nbperf-bdz.c,v 1.5 2012/09/25 20:53:46 joerg Exp $"); + +#include +#include +#include +#include +#include + +#include "nbperf.h" + +/* + * A full description of the algorithm can be found in: + * "Simple and Space-Efficient Minimal Perfect Hash Functions" + * by Botelho, Pagh and Ziviani, proceeedings of WADS 2007. + */ + +/* + * The algorithm is based on random, acyclic 3-graphs. + * + * Each edge in the represents a key. The vertices are the reminder of + * the hash function mod n. n = cm with c > 1.23. This ensures that + * an acyclic graph can be found with a very high probality. + * + * An acyclic graph has an edge order, where at least one vertex of + * each edge hasn't been seen before. It is declares the first unvisited + * vertex as authoritive for the edge and assigns a 2bit value to unvisited + * vertices, so that the sum of all vertices of the edge modulo 4 is + * the index of the authoritive vertex. + */ + +#include "graph3.h" + +struct state { + struct graph3 graph; + uint32_t *visited; + uint32_t *holes64k; + uint16_t *holes64; + uint8_t *g; + uint32_t *result_map; +}; + +static void +assign_nodes(struct state *state) +{ + struct edge3 *e; + size_t i, j; + uint32_t t, r, holes; + + for (i = 0; i < state->graph.v; ++i) + state->g[i] = 3; + + for (i = 0; i < state->graph.e; ++i) { + j = state->graph.output_order[i]; + e = &state->graph.edges[j]; + if (!state->visited[e->left]) { + r = 0; + t = e->left; + } else if (!state->visited[e->middle]) { + r = 1; + t = e->middle; + } else { + if (state->visited[e->right]) + abort(); + r = 2; + t = e->right; + } + + state->visited[t] = 2 + j; + if (state->visited[e->left] == 0) + state->visited[e->left] = 1; + if (state->visited[e->middle] == 0) + state->visited[e->middle] = 1; + if (state->visited[e->right] == 0) + state->visited[e->right] = 1; + + state->g[t] = (9 + r - state->g[e->left] - state->g[e->middle] + - state->g[e->right]) % 3; + } + + holes = 0; + for (i = 0; i < state->graph.v; ++i) { + if (i % 65536 == 0) + state->holes64k[i >> 16] = holes; + + if (i % 64 == 0) + state->holes64[i >> 6] = holes - state->holes64k[i >> 16]; + + if (state->visited[i] > 1) { + j = state->visited[i] - 2; + state->result_map[j] = i - holes; + } + + if (state->g[i] == 3) + ++holes; + } +} + +static void +print_hash(struct nbperf *nbperf, struct state *state) +{ + uint64_t sum; + size_t i; + + fprintf(nbperf->output, "#include \n"); + fprintf(nbperf->output, "#include \n\n"); + + fprintf(nbperf->output, "%suint32_t\n", + nbperf->static_hash ? "static " : ""); + fprintf(nbperf->output, + "%s(const void * __restrict key, size_t keylen)\n", + nbperf->hash_name); + fprintf(nbperf->output, "{\n"); + + fprintf(nbperf->output, + "\tstatic const uint64_t g1[%" PRId32 "] = {\n", + (state->graph.v + 63) / 64); + sum = 0; + for (i = 0; i < state->graph.v; ++i) { + sum |= ((uint64_t)state->g[i] & 1) << (i & 63); + if (i % 64 == 63) { + fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s", + (i / 64 % 2 == 0 ? "\t " : " "), + sum, + (i / 64 % 2 == 1 ? "\n" : "")); + sum = 0; + } + } + if (i % 64 != 0) { + fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s", + (i / 64 % 2 == 0 ? "\t " : " "), + sum, + (i / 64 % 2 == 1 ? "\n" : "")); + } + fprintf(nbperf->output, "%s\t};\n", (i % 2 ? "\n" : "")); + + fprintf(nbperf->output, + "\tstatic const uint64_t g2[%" PRId32 "] = {\n", + (state->graph.v + 63) / 64); + sum = 0; + for (i = 0; i < state->graph.v; ++i) { + sum |= (((uint64_t)state->g[i] & 2) >> 1) << (i & 63); + if (i % 64 == 63) { + fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s", + (i / 64 % 2 == 0 ? "\t " : " "), + sum, + (i / 64 % 2 == 1 ? "\n" : "")); + sum = 0; + } + } + if (i % 64 != 0) { + fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s", + (i / 64 % 2 == 0 ? "\t " : " "), + sum, + (i / 64 % 2 == 1 ? "\n" : "")); + } + fprintf(nbperf->output, "%s\t};\n", (i % 2 ? "\n" : "")); + + fprintf(nbperf->output, + "\tstatic const uint32_t holes64k[%" PRId32 "] = {\n", + (state->graph.v + 65535) / 65536); + for (i = 0; i < state->graph.v; i += 65536) + fprintf(nbperf->output, "%s0x%08" PRIx32 ",%s", + (i / 65536 % 4 == 0 ? "\t " : " "), + state->holes64k[i >> 16], + (i / 65536 % 4 == 3 ? "\n" : "")); + fprintf(nbperf->output, "%s\t};\n", (i / 65536 % 4 ? "\n" : "")); + + fprintf(nbperf->output, + "\tstatic const uint16_t holes64[%" PRId32 "] = {\n", + (state->graph.v + 63) / 64); + for (i = 0; i < state->graph.v; i += 64) + fprintf(nbperf->output, "%s0x%04" PRIx32 ",%s", + (i / 64 % 4 == 0 ? "\t " : " "), + state->holes64[i >> 6], + (i / 64 % 4 == 3 ? "\n" : "")); + fprintf(nbperf->output, "%s\t};\n", (i / 64 % 4 ? "\n" : "")); + + fprintf(nbperf->output, "\tuint64_t m;\n"); + fprintf(nbperf->output, "\tuint32_t idx, i, idx2;\n"); + fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size); + + (*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h"); + + fprintf(nbperf->output, "\n\th[0] = h[0] %% %" PRIu32 ";\n", + state->graph.v); + fprintf(nbperf->output, "\th[1] = h[1] %% %" PRIu32 ";\n", + state->graph.v); + fprintf(nbperf->output, "\th[2] = h[2] %% %" PRIu32 ";\n", + state->graph.v); + + fprintf(nbperf->output, + "\tidx = 9 + ((g1[h[0] >> 6] >> (h[0] & 63)) &1)" + "\t + ((g1[h[1] >> 6] >> (h[1] & 63)) & 1)" + "\t + ((g1[h[2] >> 6] >> (h[2] & 63)) & 1)" + "\t - ((g2[h[0] >> 6] >> (h[0] & 63)) & 1)" + "\t - ((g2[h[1] >> 6] >> (h[1] & 63)) & 1)" + "\t - ((g2[h[2] >> 6] >> (h[2] & 63)) & 1);" + ); + + fprintf(nbperf->output, + "\tidx = h[idx %% 3];\n"); + fprintf(nbperf->output, + "\tidx2 = idx - holes64[idx >> 6] - holes64k[idx >> 16];\n" + "\tidx2 -= popcount64(g1[idx >> 6] & g2[idx >> 6]\n" + "\t & (((uint64_t)1 << idx) - 1));\n" + "\treturn idx2;"); + + fprintf(nbperf->output, "}\n"); + + if (nbperf->map_output != NULL) { + for (i = 0; i < state->graph.e; ++i) + fprintf(nbperf->map_output, "%" PRIu32 "\n", + state->result_map[i]); + } +} + +int +bdz_compute(struct nbperf *nbperf) +{ + struct state state; + int retval = -1; + uint32_t v, e; + + if (nbperf->c == 0) + nbperf->c = 1.24; + if (nbperf->c < 1.24) + errx(1, "The argument for option -c must be at least 1.24"); + if (nbperf->hash_size < 3) + errx(1, "The hash function must generate at least 3 values"); + + (*nbperf->seed_hash)(nbperf); + e = nbperf->n; + v = nbperf->c * nbperf->n; + if (1.24 * nbperf->n > v) + ++v; + if (v < 10) + v = 10; + + graph3_setup(&state.graph, v, e); + + state.holes64k = calloc(sizeof(uint32_t), (v + 65535) / 65536); + state.holes64 = calloc(sizeof(uint16_t), (v + 63) / 64 ); + state.g = calloc(sizeof(uint32_t), v | 63); + state.visited = calloc(sizeof(uint32_t), v); + state.result_map = calloc(sizeof(uint32_t), e); + + if (state.holes64k == NULL || state.holes64 == NULL || + state.g == NULL || state.visited == NULL || + state.result_map == NULL) + err(1, "malloc failed"); + + if (graph3_hash(nbperf, &state.graph)) + goto failed; + if (graph3_output_order(&state.graph)) + goto failed; + assign_nodes(&state); + print_hash(nbperf, &state); + + retval = 0; + +failed: + graph3_free(&state.graph); + free(state.visited); + free(state.g); + free(state.holes64k); + free(state.holes64); + free(state.result_map); + return retval; +} diff --git a/usr.bin/nbperf/nbperf-chm.c b/usr.bin/nbperf/nbperf-chm.c new file mode 100644 index 000000000..68489922b --- /dev/null +++ b/usr.bin/nbperf/nbperf-chm.c @@ -0,0 +1,272 @@ +/* $NetBSD: nbperf-chm.c,v 1.3 2011/10/21 23:47:11 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: nbperf-chm.c,v 1.3 2011/10/21 23:47:11 joerg Exp $"); + +#include +#include +#include +#include +#include + +#include "nbperf.h" + +#ifdef BUILD_CHM3 +#include "graph3.h" +#else +#include "graph2.h" +#endif + +/* + * A full description of the algorithm can be found in: + * "An optimal algorithm for generating minimal perfect hash functions" + * by Czech, Havas and Majewski in Information Processing Letters, + * 43(5):256-264, October 1992. + */ + +/* + * The algorithm is based on random, acyclic graphs. + * + * Each edge in the represents a key. The vertices are the reminder of + * the hash function mod n. n = cm with c > 2, otherwise the propability + * of finding an acyclic graph is very low (for 2-graphs). The constant + * for 3-graphs is 1.24. + * + * After the hashing phase, the graph is checked for cycles. + * A cycle-free graph is either empty or has a vertex of degree 1. + * Removing the edge for this vertex doesn't change this property, + * so applying this recursively reduces the size of the graph. + * If the graph is empty at the end of the process, it was acyclic. + * + * The assignment step now sets g[i] := 0 and processes the edges + * in reverse order of removal. That ensures that at least one vertex + * is always unvisited and can be assigned. + */ + +struct state { +#ifdef BUILD_CHM3 + struct graph3 graph; +#else + struct graph2 graph; +#endif + uint32_t *g; + uint8_t *visited; +}; + +static void +assign_nodes(struct state *state) +{ +#ifdef BUILD_CHM3 + struct edge3 *e; +#else + struct edge2 *e; +#endif + size_t i; + uint32_t e_idx; + + for (i = 0; i < state->graph.e; ++i) { + e_idx = state->graph.output_order[i]; + e = &state->graph.edges[e_idx]; + +#ifdef BUILD_CHM3 + if (!state->visited[e->left]) { + state->g[e->left] = (2 * state->graph.e + e_idx + - state->g[e->middle] - state->g[e->right]) + % state->graph.e; + } else if (!state->visited[e->middle]) { + state->g[e->middle] = (2 * state->graph.e + e_idx + - state->g[e->left] - state->g[e->right]) + % state->graph.e; + } else { + state->g[e->right] = (2 * state->graph.e + e_idx + - state->g[e->left] - state->g[e->middle]) + % state->graph.e; + } + state->visited[e->left] = 1; + state->visited[e->middle] = 1; + state->visited[e->right] = 1; +#else + if (!state->visited[e->left]) { + state->g[e->left] = (state->graph.e + e_idx + - state->g[e->right]) % state->graph.e; + } else { + state->g[e->right] = (state->graph.e + e_idx + - state->g[e->left]) % state->graph.e; + } + state->visited[e->left] = 1; + state->visited[e->right] = 1; +#endif + } +} + +static void +print_hash(struct nbperf *nbperf, struct state *state) +{ + uint32_t i, per_line; + const char *g_type; + int g_width; + + fprintf(nbperf->output, "#include \n\n"); + + fprintf(nbperf->output, "%suint32_t\n", + nbperf->static_hash ? "static " : ""); + fprintf(nbperf->output, + "%s(const void * __restrict key, size_t keylen)\n", + nbperf->hash_name); + fprintf(nbperf->output, "{\n"); + if (state->graph.v >= 65536) { + g_type = "uint32_t"; + g_width = 8; + per_line = 4; + } else if (state->graph.v >= 256) { + g_type = "uint16_t"; + g_width = 4; + per_line = 8; + } else { + g_type = "uint8_t"; + g_width = 2; + per_line = 10; + } + fprintf(nbperf->output, "\tstatic const %s g[%" PRId32 "] = {\n", + g_type, state->graph.v); + for (i = 0; i < state->graph.v; ++i) { + fprintf(nbperf->output, "%s0x%0*" PRIx32 ",%s", + (i % per_line == 0 ? "\t " : " "), + g_width, state->g[i], + (i % per_line == per_line - 1 ? "\n" : "")); + } + if (i % per_line != 0) + fprintf(nbperf->output, "\n\t};\n"); + else + fprintf(nbperf->output, "\t};\n"); + fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size); + (*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h"); +#ifdef BUILD_CHM3 + fprintf(nbperf->output, "\treturn (g[h[0] %% %" PRIu32 "] + " + "g[h[1] %% %" PRIu32 "] + " + "g[h[2] %% %" PRIu32"]) %% %" PRIu32 ";\n", + state->graph.v, state->graph.v, state->graph.v, state->graph.e); +#else + fprintf(nbperf->output, "\treturn (g[h[0] %% %" PRIu32 "] + " + "g[h[1] %% %" PRIu32"]) %% %" PRIu32 ";\n", + state->graph.v, state->graph.v, state->graph.e); +#endif + fprintf(nbperf->output, "}\n"); + + if (nbperf->map_output != NULL) { + for (i = 0; i < state->graph.e; ++i) + fprintf(nbperf->map_output, "%" PRIu32 "\n", i); + } +} + +int +#ifdef BUILD_CHM3 +chm3_compute(struct nbperf *nbperf) +#else +chm_compute(struct nbperf *nbperf) +#endif +{ + struct state state; + int retval = -1; + uint32_t v, e; + +#ifdef BUILD_CHM3 + if (nbperf->c == 0) + nbperf-> c = 1.24; + + if (nbperf->c < 1.24) + errx(1, "The argument for option -c must be at least 1.24"); + + if (nbperf->hash_size < 3) + errx(1, "The hash function must generate at least 3 values"); +#else + if (nbperf->c == 0) + nbperf-> c = 2; + + if (nbperf->c < 2) + errx(1, "The argument for option -c must be at least 2"); + + if (nbperf->hash_size < 2) + errx(1, "The hash function must generate at least 2 values"); +#endif + + (*nbperf->seed_hash)(nbperf); + e = nbperf->n; + v = nbperf->c * nbperf->n; +#ifdef BUILD_CHM3 + if (v == 1.24 * nbperf->n) + ++v; + if (v < 10) + v = 10; +#else + if (v == 2 * nbperf->n) + ++v; +#endif + + state.g = calloc(sizeof(uint32_t), v); + state.visited = calloc(sizeof(uint8_t), v); + if (state.g == NULL || state.visited == NULL) + err(1, "malloc failed"); + +#ifdef BUILD_CHM3 + graph3_setup(&state.graph, v, e); + if (graph3_hash(nbperf, &state.graph)) + goto failed; + if (graph3_output_order(&state.graph)) + goto failed; +#else + graph2_setup(&state.graph, v, e); + if (graph2_hash(nbperf, &state.graph)) + goto failed; + if (graph2_output_order(&state.graph)) + goto failed; +#endif + assign_nodes(&state); + print_hash(nbperf, &state); + + retval = 0; + +failed: +#ifdef BUILD_CHM3 + graph3_free(&state.graph); +#else + graph2_free(&state.graph); +#endif + free(state.g); + free(state.visited); + return retval; +} diff --git a/usr.bin/nbperf/nbperf-chm3.c b/usr.bin/nbperf/nbperf-chm3.c new file mode 100644 index 000000000..d2a2adab3 --- /dev/null +++ b/usr.bin/nbperf/nbperf-chm3.c @@ -0,0 +1,4 @@ +/* $NetBSD: nbperf-chm3.c,v 1.1 2009/08/15 16:21:05 joerg Exp $ */ + +#define BUILD_CHM3 +#include "nbperf-chm.c" diff --git a/usr.bin/nbperf/nbperf.1 b/usr.bin/nbperf/nbperf.1 new file mode 100644 index 000000000..5da77d11b --- /dev/null +++ b/usr.bin/nbperf/nbperf.1 @@ -0,0 +1,143 @@ +.\" $NetBSD: nbperf.1,v 1.5 2012/09/25 20:53:46 joerg Exp $ +.\" +.\" Copyright (c) 2009 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Joerg Sonnenberger. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd September 25, 2012 +.Dt NBPERF 1 +.Os +.Sh NAME +.Nm nbperf +.Nd compute a perfect hash function +.Sh SYNOPSIS +.Nm +.Op Fl ps +.Op Fl a Ar algorithm +.Op Fl c Ar utilisation +.Op Fl h Ar hash +.Op Fl i Ar iterations +.Op Fl m Ar map-file +.Op Fl n Ar name +.Op Fl o Ar output +.Op Ar input +.Sh DESCRIPTION +.Nm +reads a number of keys one per line from standard input or +.Ar input . +It computes a minimal perfect hash function and writes it to stdout or +.Ar output . +The default algorithm is +.Qq Sy chm . +.Pp +The +.Fl m +argument instructs +.Nm +to write the resulting key mapping to +.Ar map-file . +Each line gives the result of the hash function for the corresponding input +key. +.Pp +The parameter +.Ar utilisation +determines the space efficiency. +.Pp +Supported arguments for +.Fl a : +.Bl -tag -width "chm" +.It Sy chm +This results in an order preserving minimal perfect hash function. +The +.Ar utilisation +must be at least 2, the default. +The number of iterations needed grows if the utilisation is very near to 2. +.It Sy chm3 +Similar to +.Ar chm . +The resulting hash function needs three instead of two table lookups when +compared to +.Ar chm . +The +.Ar utilisation +must be at least 1.24, the default. +This makes the output for +.Ar chm3 +noticable smaller than the output for +.Ar chm . +.It Sy bdz +This results in a non-order preserving minimal perfect hash function. +Output size is approximately 2.79 bit per key for the default value of +.Ar utilisation , +1.24. +This is also the smallest supported value. +.El +.Pp +Supported arguments for +.Fl h : +.Bl -tag -width "mi_vector_hash" +.It Sy mi_vector_hash +Platform-independent version of Jenkins parallel hash. +See +.Xr mi_vector_hash 3 . +.El +.Pp +The number of iterations can be limited with +.Fl i . +.Nm +outputs a function matching +.Ft uint32_t +.Fn hash "const void * restrict" "size_t" +to stdout. +The function expects the key length as second argument, for strings not +including the terminating NUL. +It is the responsibility of the caller to pass in only valid keys or compare +the resulting index to the key. +The function name can be changed using +.Fl n Ar name . +If the +.Fl s +flag is specified, it will be static. +.Pp +After each failing iteration, a dot is written to stderr. +.Pp +.Nm +checks for duplicate keys on the first iteration that passed +basic hash distribution tests. +In that case, an error message is printed and the program terminates. +.Pp +If the +.Fl p +flag is specified, the hash function is seeded in a stable way. +This may take longer than the normal random seed, but ensures +that the output is the same for repeated invocations as long as +the input is constant. +.Sh EXIT STATUS +.Ex -std +.Sh SEE ALSO +.Xr mi_vector_hash 3 +.Sh AUTHORS +.An J\(:org Sonnenberger diff --git a/usr.bin/nbperf/nbperf.c b/usr.bin/nbperf/nbperf.c new file mode 100644 index 000000000..dcb096203 --- /dev/null +++ b/usr.bin/nbperf/nbperf.c @@ -0,0 +1,258 @@ +/* $NetBSD: nbperf.c,v 1.4 2011/10/21 23:47:11 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: nbperf.c,v 1.4 2011/10/21 23:47:11 joerg Exp $"); + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nbperf.h" + +static int predictable; + +static __dead +void usage(void) +{ + fprintf(stderr, + "%s [-ps] [-c utilisation] [-i iterations] [-n name] " + "[-o output] input\n", + getprogname()); + exit(1); +} + +#if HAVE_NBTOOL_CONFIG_H && !defined(__NetBSD__) +#define arc4random() rand() +#endif + +static void +mi_vector_hash_seed_hash(struct nbperf *nbperf) +{ + static uint32_t predictable_counter; + if (predictable) + nbperf->seed[0] = predictable_counter++; + else + nbperf->seed[0] = arc4random(); +} + +static void +mi_vector_hash_compute(struct nbperf *nbperf, const void *key, size_t keylen, + uint32_t *hashes) +{ + mi_vector_hash(key, keylen, nbperf->seed[0], hashes); +} + +static void +mi_vector_hash_print_hash(struct nbperf *nbperf, const char *indent, + const char *key, const char *keylen, const char *hash) +{ + fprintf(nbperf->output, + "%smi_vector_hash(%s, %s, 0x%08" PRIx32 "U, %s);\n", + indent, key, keylen, nbperf->seed[0], hash); +} + +static void +set_hash(struct nbperf *nbperf, const char *arg) +{ + if (strcmp(arg, "mi_vector_hash") == 0) { + nbperf->hash_size = 3; + nbperf->seed_hash = mi_vector_hash_seed_hash; + nbperf->compute_hash = mi_vector_hash_compute; + nbperf->print_hash = mi_vector_hash_print_hash; + return; + } + if (nbperf->hash_size > NBPERF_MAX_HASH_SIZE) + errx(1, "Hash function creates too many output values"); + errx(1, "Unknown hash function: %s", arg); +} + +int +main(int argc, char **argv) +{ + struct nbperf nbperf = { + .c = 0, + .hash_name = "hash", + .map_output = NULL, + .output = NULL, + .static_hash = 0, + .first_round = 1, + .has_duplicates = 0, + }; + FILE *input; + size_t curlen = 0, curalloc = 0; + char *line, *eos; + ssize_t line_len; + size_t line_allocated; + const void **keys = NULL; + size_t *keylens = NULL; + uint32_t max_iterations = 0xffffffU; + long long tmp; + int looped, ch; + int (*build_hash)(struct nbperf *) = chm_compute; + + set_hash(&nbperf, "mi_vector_hash"); + + while ((ch = getopt(argc, argv, "a:c:h:i:m:n:o:ps")) != -1) { + switch (ch) { + case 'a': + if (strcmp(optarg, "chm") == 0) + build_hash = chm_compute; + else if (strcmp(optarg, "chm3") == 0) + build_hash = chm3_compute; + else if (strcmp(optarg, "bdz") == 0) + build_hash = bdz_compute; + else + errx(1, "Unsupport algorithm: %s", optarg); + break; + case 'c': + errno = 0; + nbperf.c = strtod(optarg, &eos); + if (errno || eos[0] || !nbperf.c) + errx(2, "Invalid argument for -c"); + break; + case 'h': + set_hash(&nbperf, optarg); + break; + case 'i': + errno = 0; + tmp = strtoll(optarg, &eos, 0); + if (errno || eos == optarg || eos[0] || + tmp < 0 || tmp > 0xffffffffU) + errx(2, "Iteration count must be " + "a 32bit integer"); + max_iterations = (uint32_t)tmp; + break; + case 'm': + if (nbperf.map_output) + fclose(nbperf.map_output); + nbperf.map_output = fopen(optarg, "w"); + if (nbperf.map_output == NULL) + err(2, "cannot open map file"); + break; + case 'n': + nbperf.hash_name = optarg; + break; + case 'o': + if (nbperf.output) + fclose(nbperf.output); + nbperf.output = fopen(optarg, "w"); + if (nbperf.output == NULL) + err(2, "cannot open output file"); + break; + case 'p': + predictable = 1; + break; + case 's': + nbperf.static_hash = 1; + break; + default: + usage(); + } + } + + argc -= optind; + argv += optind; + + if (argc > 1) + usage(); + + if (argc == 1) { + input = fopen(argv[0], "r"); + if (input == NULL) + err(1, "can't open input file"); + } else + input = stdin; + + if (nbperf.output == NULL) + nbperf.output = stdout; + + line = NULL; + line_allocated = 0; + while ((line_len = getline(&line, &line_allocated, input)) != -1) { + if (line_len && line[line_len - 1] == '\n') + --line_len; + if (curlen == curalloc) { + if (curalloc < 256) + curalloc = 256; + else + curalloc += curalloc; + keys = realloc(keys, curalloc * sizeof(*keys)); + if (keys == NULL) + err(1, "realloc failed"); + keylens = realloc(keylens, + curalloc * sizeof(*keylens)); + if (keylens == NULL) + err(1, "realloc failed"); + } + if ((keys[curlen] = strndup(line, line_len)) == NULL) + err(1, "malloc failed"); + keylens[curlen] = line_len; + ++curlen; + } + free(line); + + if (input != stdin) + fclose(input); + + nbperf.n = curlen; + nbperf.keys = keys; + nbperf.keylens = keylens; + + looped = 0; + while ((*build_hash)(&nbperf)) { + if (nbperf.has_duplicates) + errx(1, "Duplicate keys detected"); + fputc('.', stderr); + looped = 1; + if (max_iterations == 0xffffffffU) + continue; + if (--max_iterations == 0) { + fputc('\n', stderr); + errx(1, "Iteration count reached"); + } + } + if (looped) + fputc('\n', stderr); + + return 0; +} diff --git a/usr.bin/nbperf/nbperf.h b/usr.bin/nbperf/nbperf.h new file mode 100644 index 000000000..392287485 --- /dev/null +++ b/usr.bin/nbperf/nbperf.h @@ -0,0 +1,59 @@ +/* $NetBSD: nbperf.h,v 1.3 2010/03/03 01:55:04 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define NBPERF_MAX_HASH_SIZE 3 + +struct nbperf { + FILE *output; + FILE *map_output; + const char *hash_name; + int static_hash; + size_t n; + const void * __restrict * keys; + const size_t *keylens; + int first_round, has_duplicates; + + double c; + + size_t hash_size; + void (*seed_hash)(struct nbperf *); + void (*print_hash)(struct nbperf *, const char *, const char *, const char *, + const char *); + void (*compute_hash)(struct nbperf *, const void *, size_t, + uint32_t *); + uint32_t seed[1]; +}; + +int chm_compute(struct nbperf *); +int chm3_compute(struct nbperf *); +int bdz_compute(struct nbperf *);