Upgrading sort, which is needed by lorder

Change-Id: I64ac0509f4360c947a677600db77e7612a7cbebd
This commit is contained in:
Lionel Sambuc 2012-12-05 15:36:48 +01:00
parent a7ab29bf57
commit 0fbbaa43e9
21 changed files with 3416 additions and 1307 deletions

View file

@ -23,7 +23,7 @@ SUBDIR= add_route arp ash at backup banner basename btrace cal \
ramdisk rarpd rawspeed rcp rdate readclock \
reboot remsync rev rget rlogin \
rotate rsh rshd service setup shar acksize \
sleep slip sort spell split sprofalyze sprofdiff srccrc \
sleep slip spell split sprofalyze sprofdiff srccrc \
stty svclog svrctl swifi sync synctree sysenv \
syslogd tail tcpd tcpdp tcpstat tee telnet \
telnetd term termcap tget time touch tr \

View file

@ -1,4 +0,0 @@
PROG= sort
MAN=
.include <bsd.prog.mk>

File diff suppressed because it is too large Load diff

View file

@ -15,7 +15,7 @@ MAN= ash.1 at.1 banner.1 basename.1 \
paste.1 ping.1 playwave.1 pr.1 prep.1 \
profile.1 ps.1 pwd.1 rcp.1 recwave.1 \
ref.1 remsync.1 rget.1 rlogin.1 rsh.1 rz.1 \
shar.1 acksize.1 sleep.1 sort.1 spell.1 \
shar.1 acksize.1 sleep.1 spell.1 \
split.1 stty.1 svc.1 svrctl.1 \
synctree.1 sysenv.1 sz.1 tail.1 tee.1 telnet.1 template.1 \
term.1 termcap.1 tget.1 time.1 tr.1 true.1 \

View file

@ -1,83 +0,0 @@
.TH SORT 1
.SH NAME
sort \- sort a file of ASCII lines
.SH SYNOPSIS
\fBsort\fR [\fB\-bcdf\&imnru\fR]\fR [\fB\-t\fIc\fR] [\fB\-o \fIname\fR] [\fB+\fIpos1\fR] [\fB\-\fIpos2\fR] \fIfile\fR ...\fR
.br
.de FL
.TP
\\fB\\$1\\fR
\\$2
..
.de EX
.TP 20
\\fB\\$1\\fR
# \\$2
..
.SH OPTIONS
.TP 5
.B \-b
# Skip leading blanks when making comparisons
.TP 5
.B \-c
# Check to see if a file is sorted
.TP 5
.B \-d
# Dictionary order: ignore punctuation
.TP 5
.B \-f
# Fold upper case onto lower case
.TP 5
.B \-i
# Ignore nonASCII characters
.TP 5
.B \-m
# Merge presorted files
.TP 5
.B \-n
# Numeric sort order (decimal)
.TP 5
.B \-x
# Numeric sort order (hex)
.TP 5
.B \-o
# Next argument is output file
.TP 5
.B \-r
# Reverse the sort order
.TP 5
.B \-t
# Following character is field separator
.TP 5
.B \-u
# Unique mode (delete duplicate lines)
.SH EXAMPLES
.TP 20
.B sort \-nr file
# Sort keys numerically, reversed
.TP 20
.B sort +2 \-4 file
# Sort using fields 2 and 3 as key
.TP 20
.B sort +2 \-t: \-o out
# Field separator is \fI:\fP
.TP 20
.B sort +.3 \-.6
# Characters 3 through 5 form the key
.SH DESCRIPTION
.PP
.I Sort
sorts one or more files.
If no files are specified, \fIstdin\fR is sorted.
Output is written on standard output, unless \fB\-o\fP is specified.
The options \fB+\fIpos1 \fB\-\fIpos2\fR use only fields \fIpos1\fR
up to but not including \fIpos2\fR as the sort key, where a field is a
string of characters delimited by spaces and tabs, unless a different field
delimiter is specified with \fB\-t\fR.
Both \fIpos1\fR and \fIpos2\fR have the form \fIm.n\fR where \fIm\fR tells
the number of fields and \fIn\fR tells the number of characters.
Either \fIm\fR or \fIn\fR may be omitted.
.SH "SEE ALSO"
.BR comm (1),
.BR grep (1),
.BR uniq (1).

View file

@ -69,6 +69,7 @@
2012/10/17 12:00:00,usr.bin/Makefile
2012/10/17 12:00:00,usr.bin/Makefile.inc
2012/10/17 12:00:00,usr.bin/passwd/Makefile
2012/10/17 12:00:00,usr.bin/sort
2012/10/17 12:00:00,usr.bin/xinstall
2012/10/17 12:00:00,usr.sbin/Makefile
2012/10/17 12:00:00,usr.sbin/Makefile.inc

View file

@ -18,7 +18,7 @@ SUBDIR= \
newgrp \
passwd \
sed seq \
stat su \
sort stat su \
tic \
uniq \
xinstall

15
usr.bin/sort/Makefile Normal file
View file

@ -0,0 +1,15 @@
# $NetBSD: Makefile,v 1.8 2009/09/10 22:02:40 dsl Exp $
# from: @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= sort
SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c
SRCS+= radix_sort.c
LDADD+=-lutil
DPADD+=${LIBUTIL}
.if defined(__MINIX)
CPPFLAGS+= -Dlchown=chown -Dlchmod=chmod
.endif # defined(__MINIX)
.include <bsd.prog.mk>

94
usr.bin/sort/append.c Normal file
View file

@ -0,0 +1,94 @@
/* $NetBSD: append.c,v 1.23 2009/11/06 18:34:22 joerg Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
__RCSID("$NetBSD: append.c,v 1.23 2009/11/06 18:34:22 joerg Exp $");
#include <stdlib.h>
/*
* copy sorted lines to output
* Ignore duplicates (marked with -ve keylen)
*/
void
append(RECHEADER **keylist, int nelem, FILE *fp, put_func_t put)
{
RECHEADER **cpos, **lastkey;
RECHEADER *crec;
lastkey = keylist + nelem;
if (REVERSE) {
for (cpos = lastkey; cpos-- > keylist;) {
crec = *cpos;
if (crec->keylen >= 0)
put(crec, fp);
}
} else {
for (cpos = keylist; cpos < lastkey; cpos++) {
crec = *cpos;
if (crec->keylen >= 0)
put(crec, fp);
}
}
}

377
usr.bin/sort/fields.c Normal file
View file

@ -0,0 +1,377 @@
/* $NetBSD: fields.c,v 1.32 2010/12/18 23:09:48 christos Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* Subroutines to generate sort keys. */
#include "sort.h"
__RCSID("$NetBSD: fields.c,v 1.32 2010/12/18 23:09:48 christos Exp $");
#define SKIP_BLANKS(ptr) { \
if (BLANK & d_mask[*(ptr)]) \
while (BLANK & d_mask[*(++(ptr))]); \
}
#define NEXTCOL(pos) { \
if (!SEP_FLAG) \
while (BLANK & l_d_mask[*(++pos)]); \
while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\
}
static u_char *enterfield(u_char *, const u_char *, struct field *, int);
static u_char *number(u_char *, const u_char *, u_char *, u_char *, int);
static u_char *length(u_char *, const u_char *, u_char *, u_char *, int);
#define DECIMAL_POINT '.'
/*
* constructs sort key with leading recheader, followed by the key,
* followed by the original line.
*/
length_t
enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data,
size_t line_size, struct field fieldtable[])
/* keybuf: pointer to start of key */
{
int i;
u_char *l_d_mask;
u_char *lineend, *pos;
const u_char *endkey;
u_char *keypos;
struct coldesc *clpos;
int col = 1;
struct field *ftpos;
l_d_mask = d_mask;
pos = line_data - 1;
lineend = line_data + line_size-1;
/* don't include rec_delimiter */
for (i = 0; i < ncols; i++) {
clpos = clist + i;
for (; (col < clpos->num) && (pos < lineend); col++) {
NEXTCOL(pos);
}
if (pos >= lineend)
break;
clpos->start = SEP_FLAG ? pos + 1 : pos;
NEXTCOL(pos);
clpos->end = pos;
col++;
if (pos >= lineend) {
clpos->end = lineend;
i++;
break;
}
}
for (; i <= ncols; i++)
clist[i].start = clist[i].end = lineend;
if (clist[0].start < line_data)
clist[0].start++;
/*
* We write the sort keys (concatenated) followed by the
* original line data (for output) as the 'keybuf' data.
* keybuf->length is the number of key bytes + data bytes.
* keybuf->offset is the number of key bytes.
* We add a record separator weight after the key in case
* (as is usual) we need to preserve the order of equal lines,
* and for 'sort -u'.
* The key itself will have had the correct weight applied.
*/
keypos = keybuf->data;
endkey = keybuf_end - line_size - 1;
if (endkey <= keypos)
/* No room for any key bytes */
return 1;
for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) {
if ((keypos = enterfield(keypos, endkey, ftpos,
fieldtable->flags)) == NULL)
return (1);
}
keybuf->offset = keypos - keybuf->data;
keybuf->length = keybuf->offset + line_size;
/*
* Posix requires that equal keys be further sorted by the
* entire original record.
* NetBSD has (at least for some time) kept equal keys in
* their original order.
* For 'sort -u' posix_sort is unset.
*/
keybuf->keylen = posix_sort ? keybuf->length : keybuf->offset;
memcpy(keypos, line_data, line_size);
return (0);
}
/*
* constructs a field (as defined by -k) within a key
*/
static u_char *
enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld,
int gflags)
{
u_char *start, *end, *lineend, *mask, *lweight;
struct column icol, tcol;
u_int flags;
icol = cur_fld->icol;
tcol = cur_fld->tcol;
flags = cur_fld->flags;
start = icol.p->start;
lineend = clist[ncols].end;
if (flags & BI)
SKIP_BLANKS(start);
start += icol.indent;
start = min(start, lineend);
if (!tcol.num)
end = lineend;
else {
if (tcol.indent) {
end = tcol.p->start;
if (flags & BT)
SKIP_BLANKS(end);
end += tcol.indent;
end = min(end, lineend);
} else
end = tcol.p->end;
}
if (flags & L)
return length(tablepos, endkey, start, end, flags);
if (flags & N)
return number(tablepos, endkey, start, end, flags);
/* Bound check space - assuming nothing is skipped */
if (tablepos + (end - start) + 1 >= endkey)
return NULL;
mask = cur_fld->mask;
lweight = cur_fld->weights;
for (; start < end; start++) {
if (!mask || mask[*start]) {
*tablepos++ = lweight[*start];
}
}
/* Add extra byte (absent from lweight) to sort short keys correctly */
*tablepos++ = lweight[REC_D];
return tablepos;
}
/*
* Numbers are converted to a floating point format (exponent & mantissa)
* so that they compare correctly as sequence of unsigned bytes.
* Bytes 0x00 and 0xff are used to terminate positive and negative numbers
* to ensure that 0.123 sorts after 0.12 and -0.123 sorts before -0.12.
*
* The first byte contain the overall sign, exponent sign and some of the
* exponent. These have to be ordered (-ve value, decreasing exponent),
* zero, (+ve value, increasing exponent).
*
* The first byte is 0x80 for zero, 0xc0 for +ve with exponent 0.
* -ve values are the 1's compliments (so 0x7f isn't used!).
*
* This only leaves 63 byte values for +ve exponents - which isn't enough.
* The largest 4 exponent values are used to hold a byte count of the
* number of following bytes that contain 8 exponent bits per byte,
* This lets us sort exponents from -2^31 to +2^31.
*
* The mantissa is stored 2 digits per byte offset by 0x40, for negative
* numbers the order must be reversed (they are bit inverted).
*
* Reverse sorts are done by inverting the sign of the number.
*/
#define MAX_EXP_ENC ((int)sizeof(int))
static u_char *
number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
int reverse)
{
int exponent = -1;
int had_dp = 0;
u_char *tline;
char ch;
unsigned int val;
u_char *last_nz_pos;
u_char negate;
if (reverse & R)
negate = 0xff;
else
negate = 0;
/* Give ourselves space for the key terminator */
bufend--;
/* Ensure we have enough space for the exponent */
if (pos + 1 + MAX_EXP_ENC > bufend)
return (NULL);
SKIP_BLANKS(line);
if (*line == '-') { /* set the sign */
negate ^= 0xff;
line++;
}
/* eat initial zeroes */
for (; *line == '0' && line < lineend; line++)
continue;
/* calculate exponents */
if (*line == DECIMAL_POINT) {
/* Decimal fraction */
had_dp = 1;
while (*++line == '0' && line < lineend)
exponent--;
} else {
/* Large (absolute) value, count digits */
for (tline = line; *tline >= '0' &&
*tline <= '9' && tline < lineend; tline++)
exponent++;
}
/* If the first/next character isn't a digit, value is zero */
if (*line < '1' || *line > '9' || line >= lineend) {
/* This may be "0", "0.00", "000" or "fubar" but sorts as 0 */
/* XXX what about NaN, NAN, inf and INF */
*pos++ = 0x80;
return pos;
}
/* Maybe here we should allow for e+12 (etc) */
if (exponent < 0x40 - MAX_EXP_ENC && -exponent < 0x40 - MAX_EXP_ENC) {
/* Value ok for simple encoding */
/* exponent 0 is 0xc0 for +ve numbers and 0x40 for -ve ones */
exponent += 0xc0;
*pos++ = negate ^ exponent;
} else {
/* Out or range for a single byte */
int c, t;
t = exponent > 0 ? exponent : -exponent;
/* Count how many 8-bit bytes are needed */
for (c = 0; ; c++) {
t >>= 8;
if (t == 0)
break;
}
/* 'c' better be 0..3 here - but probably 0..1 */
/* Offset just outside valid range */
t = c + 0x40 - MAX_EXP_ENC;
if (exponent < 0)
t = -t;
*pos++ = negate ^ (t + 0xc0);
/* now add each byte, most significant first */
for (; c >= 0; c--)
*pos++ = negate ^ (exponent >> (c * 8));
}
/* Finally add mantissa, 2 digits per byte */
for (last_nz_pos = pos; line < lineend; ) {
if (pos >= bufend)
return NULL;
ch = *line++;
val = (ch - '0') * 10;
if (val > 90) {
if (ch == DECIMAL_POINT && !had_dp) {
had_dp = 1;
continue;
}
break;
}
while (line < lineend) {
ch = *line++;
if (ch == DECIMAL_POINT && !had_dp) {
had_dp = 1;
continue;
}
if (ch < '0' || ch > '9')
line = lineend;
else
val += ch - '0';
break;
}
*pos++ = negate ^ (val + 0x40);
if (val != 0)
last_nz_pos = pos;
}
/* Add key terminator, deleting any trailing "00" */
*last_nz_pos++ = negate;
return (last_nz_pos);
}
static u_char *
length(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
int flag)
{
u_char buf[32];
int l;
SKIP_BLANKS(line);
l = snprintf((char *)buf, sizeof(buf), "%td", lineend - line);
return number(pos, bufend, buf, buf + l, flag);
}

276
usr.bin/sort/files.c Normal file
View file

@ -0,0 +1,276 @@
/* $NetBSD: files.c,v 1.41 2009/11/06 18:34:22 joerg Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
#include "fsort.h"
__RCSID("$NetBSD: files.c,v 1.41 2009/11/06 18:34:22 joerg Exp $");
#include <string.h>
/* Align records in temporary files to avoid misaligned copies */
#define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1))
static ssize_t seq(FILE *, u_char **);
/*
* this is called when there is no special key. It's only called
* in the first fsort pass.
*/
static u_char *opos;
static size_t osz;
void
makeline_copydown(RECHEADER *recbuf)
{
memmove(recbuf->data, opos, osz);
}
int
makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
{
u_char *pos;
int c;
pos = recbuf->data;
if (osz != 0) {
/*
* Buffer shortage is solved by either of two ways:
* o flush previous buffered data and start using the
* buffer from start.
* makeline_copydown() above must be called.
* o realloc buffer
*
* This code has relied on realloc changing 'bufend',
* but that isn't necessarily true.
*/
pos += osz;
osz = 0;
}
while (pos < bufend) {
c = getc(fp);
if (c == EOF) {
if (pos == recbuf->data) {
FCLOSE(fp);
return EOF;
}
/* Add terminator to partial line */
c = REC_D;
}
*pos++ = c;
if (c == REC_D) {
recbuf->offset = 0;
recbuf->length = pos - recbuf->data;
recbuf->keylen = recbuf->length - 1;
return (0);
}
}
/* Ran out of buffer space... */
if (recbuf->data < bufend) {
/* Remember where the partial record is */
osz = pos - recbuf->data;
opos = recbuf->data;
}
return (BUFFEND);
}
/*
* This generates keys. It's only called in the first fsort pass
*/
int
makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
{
static u_char *line_data;
static ssize_t line_size;
static int overflow = 0;
/* We get re-entered after returning BUFFEND - save old data */
if (overflow) {
overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
return overflow ? BUFFEND : 0;
}
line_size = seq(fp, &line_data);
if (line_size == 0) {
FCLOSE(fp);
return EOF;
}
if (line_size > bufend - recbuf->data) {
overflow = 1;
} else {
overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
}
return overflow ? BUFFEND : 0;
}
/*
* get a line of input from fp
*/
static ssize_t
seq(FILE *fp, u_char **line)
{
static u_char *buf;
static size_t buf_size = DEFLLEN;
u_char *end, *pos;
int c;
u_char *new_buf;
if (!buf) {
/* one-time initialization */
buf = malloc(buf_size);
if (!buf)
err(2, "malloc of linebuf for %zu bytes failed",
buf_size);
}
end = buf + buf_size;
pos = buf;
while ((c = getc(fp)) != EOF) {
*pos++ = c;
if (c == REC_D) {
*line = buf;
return pos - buf;
}
if (pos == end) {
/* Long line - double size of buffer */
/* XXX: Check here for stupidly long lines */
buf_size *= 2;
new_buf = realloc(buf, buf_size);
if (!new_buf)
err(2, "realloc of linebuf to %zu bytes failed",
buf_size);
end = new_buf + buf_size;
pos = new_buf + (pos - buf);
buf = new_buf;
}
}
if (pos != buf) {
/* EOF part way through line - add line terminator */
*pos++ = REC_D;
*line = buf;
return pos - buf;
}
return 0;
}
/*
* write a key/line pair to a temporary file
*/
void
putrec(const RECHEADER *rec, FILE *fp)
{
EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp);
}
/*
* write a line to output
*/
void
putline(const RECHEADER *rec, FILE *fp)
{
EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
}
/*
* write dump of key to output (for -Dk)
*/
void
putkeydump(const RECHEADER *rec, FILE *fp)
{
EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp);
}
/*
* get a record from a temporary file. (Used by merge sort.)
*/
int
geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2)
{
length_t file_len;
int i;
(void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]);
if ((u_char *)(rec + 1) > end)
return (BUFFEND);
if (!fread(&rec->length, 1, sizeof rec->length, fp)) {
fclose(fp);
return (EOF);
}
file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length);
if (end - rec->data < (ptrdiff_t)file_len) {
for (i = sizeof rec->length - 1; i >= 0; i--)
ungetc(*((char *) rec + i), fp);
return (BUFFEND);
}
fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp);
return (0);
}

214
usr.bin/sort/fsort.c Normal file
View file

@ -0,0 +1,214 @@
/* $NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Read in a block of records (until 'enough').
* sort, write to temp file.
* Merge sort temp files into output file
* Small files miss out the temp file stage.
* Large files might get multiple merges.
*/
#include "sort.h"
#include "fsort.h"
__RCSID("$NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $");
#include <stdlib.h>
#include <string.h>
#define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
void
fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
{
RECHEADER **keylist;
RECHEADER **keypos, **keyp;
RECHEADER *buffer;
size_t bufsize = DEFBUFSIZE;
u_char *bufend;
int mfct = 0;
int c, nelem;
get_func_t get;
RECHEADER *crec;
RECHEADER *nbuffer;
FILE *fp, *tmp_fp;
int file_no;
int max_recs = DEBUG('m') ? 16 : MAXNUM;
buffer = allocrec(NULL, bufsize);
bufend = (u_char *)buffer + bufsize;
/* Allocate double length keymap for radix_sort */
keylist = malloc(2 * max_recs * sizeof(*keylist));
if (buffer == NULL || keylist == NULL)
err(2, "failed to malloc initial buffer or keylist");
if (SINGL_FLD)
/* Key and data are one! */
get = makeline;
else
/* Key (merged key fields) added before data */
get = makekey;
file_no = 0;
#if defined(__minix)
/* LSC FIXME: Not very pretty, but reduce the diff */
#include "pathnames.h"
if (!strcmp(filelist->names[0], _PATH_STDIN))
fp = stdin;
else
#endif /* defined(__minix) */
fp = fopen(filelist->names[0], "r");
if (fp == NULL)
err(2, "%s", filelist->names[0]);
/* Loop through reads of chunk of input files that get sorted
* and then merged together. */
for (;;) {
keypos = keylist;
nelem = 0;
crec = buffer;
makeline_copydown(crec);
/* Loop reading records */
for (;;) {
c = get(fp, crec, bufend, ftbl);
/* 'c' is 0, EOF or BUFFEND */
if (c == 0) {
/* Save start of key in input buffer */
*keypos++ = crec;
if (++nelem == max_recs) {
c = BUFFEND;
break;
}
crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
continue;
}
if (c == EOF) {
/* try next file */
if (++file_no >= nfiles)
/* no more files */
break;
#if defined(__minix)
if (!strcmp(filelist->names[0], _PATH_STDIN))
fp = stdin;
else
#endif /* defined(__minix) */
fp = fopen(filelist->names[file_no], "r");
if (fp == NULL)
err(2, "%s", filelist->names[file_no]);
continue;
}
if (nelem >= max_recs
|| (bufsize >= MAXBUFSIZE && nelem > 8))
/* Need to sort and save this lot of data */
break;
/* c == BUFFEND, and we can process more data */
/* Allocate a larger buffer for this lot of data */
bufsize *= 2;
nbuffer = allocrec(buffer, bufsize);
if (!nbuffer) {
err(2, "failed to realloc buffer to %zu bytes",
bufsize);
}
/* patch up keylist[] */
for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
*keyp = nbuffer + (*keyp - buffer);
crec = nbuffer + (crec - buffer);
buffer = nbuffer;
bufend = (u_char *)buffer + bufsize;
}
/* Sort this set of records */
radix_sort(keylist, keylist + max_recs, nelem);
if (c == EOF && mfct == 0) {
/* all the data is (sorted) in the buffer */
append(keylist, nelem, outfp,
DEBUG('k') ? putkeydump : putline);
break;
}
/* Save current data to a temporary file for a later merge */
if (nelem != 0) {
tmp_fp = ftmp();
append(keylist, nelem, tmp_fp, putrec);
save_for_merge(tmp_fp, geteasy, ftbl);
}
mfct = 1;
if (c == EOF) {
/* merge to output file */
merge_sort(outfp,
DEBUG('k') ? putkeydump : putline, ftbl);
break;
}
}
free(keylist);
keylist = NULL;
free(buffer);
buffer = NULL;
}

78
usr.bin/sort/fsort.h Normal file
View file

@ -0,0 +1,78 @@
/* $NetBSD: fsort.h,v 1.17 2009/09/26 21:16:55 dsl Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fsort.h 8.1 (Berkeley) 6/6/93
*/
#define BUFSIZE (1<<20)
#define MAXNUM 131072 /* low guess at average record count */
#define BUFFEND (EOF-2)
#define MAXFCT 1000
#define DEFLLEN 65536
/*
* Default (initial) and maximum size of record buffer for fsort().
* Note that no more than MAXNUM records are stored in the buffer,
* even if the buffer is not full yet.
*/
#define DEFBUFSIZE (1 << 20) /* 1MB */
#define MAXBUFSIZE (8 << 20) /* 10 MB */

448
usr.bin/sort/init.c Normal file
View file

@ -0,0 +1,448 @@
/* $NetBSD: init.c,v 1.28 2010/12/18 23:09:48 christos Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
__RCSID("$NetBSD: init.c,v 1.28 2010/12/18 23:09:48 christos Exp $");
#include <ctype.h>
#include <string.h>
static void insertcol(struct field *);
static const char *setcolumn(const char *, struct field *);
/*
* masks of ignored characters.
*/
static u_char dtable[NBINS], itable[NBINS];
/*
* parsed key options
*/
struct coldesc *clist = NULL;
int ncols = 0;
/*
* clist (list of columns which correspond to one or more icol or tcol)
* is in increasing order of columns.
* Fields are kept in increasing order of fields.
*/
/*
* keep clist in order--inserts a column in a sorted array
*/
static void
insertcol(struct field *field)
{
int i;
struct coldesc *p;
/* Make space for new item */
p = realloc(clist, (ncols + 2) * sizeof(*clist));
if (!p)
err(1, "realloc");
clist = p;
memset(&clist[ncols], 0, sizeof(clist[ncols]));
for (i = 0; i < ncols; i++)
if (field->icol.num <= clist[i].num)
break;
if (field->icol.num != clist[i].num) {
memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
clist[i].num = field->icol.num;
ncols++;
}
if (field->tcol.num && field->tcol.num != field->icol.num) {
for (i = 0; i < ncols; i++)
if (field->tcol.num <= clist[i].num)
break;
if (field->tcol.num != clist[i].num) {
memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
clist[i].num = field->tcol.num;
ncols++;
}
}
}
/*
* matches fields with the appropriate columns--n^2 but who cares?
*/
void
fldreset(struct field *fldtab)
{
int i;
fldtab[0].tcol.p = clist + ncols - 1;
for (++fldtab; fldtab->icol.num; ++fldtab) {
for (i = 0; fldtab->icol.num != clist[i].num; i++)
;
fldtab->icol.p = clist + i;
if (!fldtab->tcol.num)
continue;
for (i = 0; fldtab->tcol.num != clist[i].num; i++)
;
fldtab->tcol.p = clist + i;
}
}
/*
* interprets a column in a -k field
*/
static const char *
setcolumn(const char *pos, struct field *cur_fld)
{
struct column *col;
char *npos;
int tmp;
col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
col->num = (int) strtol(pos, &npos, 10);
pos = npos;
if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
errx(2, "field numbers must be positive");
if (*pos == '.') {
if (!col->num)
errx(2, "cannot indent end of line");
++pos;
col->indent = (int) strtol(pos, &npos, 10);
pos = npos;
if (&cur_fld->icol == col)
col->indent--;
if (col->indent < 0)
errx(2, "illegal offset");
}
for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
cur_fld->flags |= tmp;
if (cur_fld->icol.num == 0)
cur_fld->icol.num = 1;
return (pos);
}
int
setfield(const char *pos, struct field *cur_fld, int gflag)
{
cur_fld->mask = NULL;
pos = setcolumn(pos, cur_fld);
if (*pos == '\0') /* key extends to EOL. */
cur_fld->tcol.num = 0;
else {
if (*pos != ',')
errx(2, "illegal field descriptor");
setcolumn((++pos), cur_fld);
}
if (!cur_fld->flags)
cur_fld->flags = gflag;
if (REVERSE)
/* A local 'r' doesn't invert the global one */
cur_fld->flags &= ~R;
/* Assign appropriate mask table and weight table. */
cur_fld->weights = weight_tables[cur_fld->flags & (R | F)];
if (cur_fld->flags & I)
cur_fld->mask = itable;
else if (cur_fld->flags & D)
cur_fld->mask = dtable;
cur_fld->flags |= (gflag & (BI | BT));
if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
cur_fld->flags &= ~BT;
if (cur_fld->tcol.num
&& !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
&& (cur_fld->tcol.num <= cur_fld->icol.num
/* indent if 0 -> end of field, i.e. okay */
&& cur_fld->tcol.indent != 0
&& cur_fld->tcol.indent < cur_fld->icol.indent))
errx(2, "fields out of order");
insertcol(cur_fld);
return (cur_fld->tcol.num);
}
int
optval(int desc, int tcolflag)
{
switch(desc) {
case 'b':
if (!tcolflag)
return BI;
else
return BT;
case 'd': return D;
case 'f': return F;
case 'i': return I;
case 'l': return L;
case 'n': return N;
case 'r': return R;
default: return 0;
}
}
/*
* Return true if the options found in ARG, according to the getopt
* spec in OPTS, require an additional argv word as an option
* argument.
*/
static int
options_need_argument(const char *arg, const char *opts)
{
size_t pos;
const char *s;
/*assert(arg[0] == '-');*/
pos = 1;
while (arg[pos]) {
s = strchr(opts, arg[pos]);
if (s == NULL) {
/* invalid option */
return 0;
}
if (s[1] == ':') {
/* option requires argument */
if (arg[pos+1] == '\0') {
/* no argument in this arg */
return 1;
}
else {
/* argument is in this arg; no more options */
return 0;
}
}
pos++;
}
return 0;
}
/*
* Replace historic +SPEC arguments with appropriate -kSPEC.
*
* The form can be either a single +SPEC or a pair +SPEC -SPEC.
* The following -SPEC is not recognized unless it follows
* immediately.
*/
void
fixit(int *argc, char **argv, const char *opts)
{
int i, j, sawplus;
char *vpos, *tpos, spec[20];
int col, indent;
size_t sz;
sawplus = 0;
for (i = 1; i < *argc; i++) {
/*
* This loop must stop exactly where getopt will stop.
* Otherwise it turns e.g. "sort x +3" into "sort x
* -k4.1", which will croak if +3 was in fact really a
* file name. In order to do this reliably we need to
* be able to identify argv words that are option
* arguments.
*/
if (!strcmp(argv[i], "--")) {
/* End of options; stop. */
break;
}
if (argv[i][0] == '+') {
/* +POS argument */
sawplus = 1;
} else if (argv[i][0] == '-' && sawplus &&
isdigit((unsigned char)argv[i][1])) {
/* -POS argument */
sawplus = 0;
} else if (argv[i][0] == '-') {
/* other option */
sawplus = 0;
if (options_need_argument(argv[i], opts)) {
/* skip over the argument */
i++;
}
continue;
} else {
/* not an option at all; stop */
sawplus = 0;
break;
}
/*
* At this point argv[i] is an old-style spec. The
* sawplus flag used by the above loop logic also
* tells us if it's a +SPEC or -SPEC.
*/
/* parse spec */
tpos = argv[i]+1;
col = (int)strtol(tpos, &tpos, 10);
if (*tpos == '.') {
++tpos;
indent = (int) strtol(tpos, &tpos, 10);
} else
indent = 0;
/* tpos now points to the optional flags */
/*
* In the traditional form, x.0 means beginning of line;
* in the new form, x.0 means end of line. Adjust the
* value of INDENT accordingly.
*/
if (sawplus) {
/* +POS */
col += 1;
indent += 1;
} else {
/* -POS */
if (indent > 0)
col += 1;
}
/* make the new style spec */
sz = snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
tpos);
if (sawplus) {
/* Replace the +POS argument with new-style -kSPEC */
asprintf(&vpos, "-k%s", spec);
argv[i] = vpos;
} else {
/*
* Append the spec to the one from the
* preceding +POS argument, and remove the
* current argv element entirely.
*/
asprintf(&vpos, "%s,%s", argv[i-1], spec);
free(argv[i-1]);
argv[i-1] = vpos;
for (j=i; j < *argc; j++)
argv[j] = argv[j+1];
*argc -= 1;
i--;
}
}
}
/*
* ascii, Rascii, Ftable, and RFtable map
*
* Sorting 'weight' tables.
* Convert 'ascii' characters into their sort order.
* The 'F' variants fold lower case to upper equivalent
* The 'R' variants are for reverse sorting.
*
* The record separator (REC_D) never needs a weight, this frees one
* byte value as an 'end of key' marker. This must be 0 for normal
* weight tables, and 0xff for reverse weight tables - and is used
* to terminate keys so that short keys sort before (after if reverse)
* longer keys.
*
* The field separator has a normal weight - although it cannot occur
* within a key unless it is the default (space+tab).
*
* All other bytes map to the appropriate value for the sort order.
* Numeric sorts don't need any tables, they are reversed by negation.
*
* Global reverse sorts are done by writing the sorted keys in reverse
* order - the sort itself is stil forwards.
* This means that weights are only ever used when generating keys, any
* sort of the original data bytes is always forwards and unweighted.
*
* Note: this is only good for ASCII sorting. For different LC 's,
* all bets are off.
*
* itable[] and dtable[] are the masks for -i (ignore non-printables)
* and -d (only sort blank and alphanumerics).
*/
void
settables(void)
{
int i;
int next_weight = 1;
int rev_weight = 254;
ascii[REC_D] = 0;
Rascii[REC_D] = 255;
Ftable[REC_D] = 0;
RFtable[REC_D] = 255;
for (i = 0; i < 256; i++) {
if (i == REC_D)
continue;
ascii[i] = next_weight;
Rascii[i] = rev_weight;
if (Ftable[i] == 0) {
Ftable[i] = next_weight;
RFtable[i] = rev_weight;
Ftable[tolower(i)] = next_weight;
RFtable[tolower(i)] = rev_weight;
}
next_weight++;
rev_weight--;
if (i == '\n' || isprint(i))
itable[i] = 1;
if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
dtable[i] = 1;
}
}

439
usr.bin/sort/msort.c Normal file
View file

@ -0,0 +1,439 @@
/* $NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
#include "fsort.h"
__RCSID("$NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $");
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <util.h>
/* Subroutines using comparisons: merge sort and check order */
#define DELETE (1)
typedef struct mfile {
FILE *fp;
get_func_t get;
RECHEADER *rec;
u_char *end;
} MFILE;
static int cmp(RECHEADER *, RECHEADER *);
static int insert(struct mfile **, struct mfile *, int, int);
static void merge_sort_fstack(FILE *, put_func_t, struct field *);
/*
* Number of files merge() can merge in one pass.
*/
#define MERGE_FNUM 16
static struct mfile fstack[MERGE_FNUM];
static struct mfile fstack_1[MERGE_FNUM];
static struct mfile fstack_2[MERGE_FNUM];
static int fstack_count, fstack_1_count, fstack_2_count;
void
save_for_merge(FILE *fp, get_func_t get, struct field *ftbl)
{
FILE *mfp, *mfp1, *mfp2;
if (fstack_count == MERGE_FNUM) {
/* Must reduce the number of temporary files */
mfp = ftmp();
merge_sort_fstack(mfp, putrec, ftbl);
/* Save output in next layer */
if (fstack_1_count == MERGE_FNUM) {
mfp1 = ftmp();
memcpy(fstack, fstack_1, sizeof fstack);
merge_sort_fstack(mfp1, putrec, ftbl);
if (fstack_2_count == MERGE_FNUM) {
/* More than 4096 files! */
mfp2 = ftmp();
memcpy(fstack, fstack_2, sizeof fstack);
merge_sort_fstack(mfp2, putrec, ftbl);
fstack_2[0].fp = mfp2;
fstack_2_count = 1;
}
fstack_2[fstack_2_count].fp = mfp1;
fstack_2[fstack_2_count].get = geteasy;
fstack_2_count++;
fstack_1_count = 0;
}
fstack_1[fstack_1_count].fp = mfp;
fstack_1[fstack_1_count].get = geteasy;
fstack_1_count++;
fstack_count = 0;
}
fstack[fstack_count].fp = fp;
fstack[fstack_count++].get = get;
}
void
fmerge(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
{
get_func_t get = SINGL_FLD ? makeline : makekey;
FILE *fp;
int i;
for (i = 0; i < nfiles; i++) {
#if defined(__minix)
/* LSC FIXME: Not very pretty, but reduce the diff */
#include "pathnames.h"
if (!strcmp(filelist->names[0], _PATH_STDIN))
fp = stdin;
else
#endif /* defined(__minix) */
fp = fopen(filelist->names[i], "r");
if (fp == NULL)
err(2, "%s", filelist->names[i]);
save_for_merge(fp, get, ftbl);
}
merge_sort(outfp, putline, ftbl);
}
void
merge_sort(FILE *outfp, put_func_t put, struct field *ftbl)
{
int count = fstack_1_count + fstack_2_count;
FILE *mfp;
int i;
if (count == 0) {
/* All files in initial array */
merge_sort_fstack(outfp, put, ftbl);
return;
}
count += fstack_count;
/* Too many files for one merge sort */
for (;;) {
/* Sort latest 16 files */
i = count;
if (i > MERGE_FNUM)
i = MERGE_FNUM;
while (fstack_count > 0)
fstack[--i] = fstack[--fstack_count];
while (i > 0 && fstack_1_count > 0)
fstack[--i] = fstack_1[--fstack_1_count];
while (i > 0)
fstack[--i] = fstack_2[--fstack_2_count];
if (count <= MERGE_FNUM) {
/* Got all the data */
fstack_count = count;
merge_sort_fstack(outfp, put, ftbl);
return;
}
mfp = ftmp();
fstack_count = count > MERGE_FNUM ? MERGE_FNUM : count;
merge_sort_fstack(mfp, putrec, ftbl);
fstack[0].fp = mfp;
fstack[0].get = geteasy;
fstack_count = 1;
count -= MERGE_FNUM - 1;
}
}
static void
merge_sort_fstack(FILE *outfp, put_func_t put, struct field *ftbl)
{
struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
RECHEADER *new_rec;
u_char *new_end;
void *tmp;
int c, i, nfiles;
size_t sz;
/* Read one record from each file (read again if a duplicate) */
for (nfiles = i = 0; i < fstack_count; i++) {
cfile = &fstack[i];
if (cfile->rec == NULL) {
cfile->rec = allocrec(NULL, DEFLLEN);
cfile->end = (u_char *)cfile->rec + DEFLLEN;
}
rewind(cfile->fp);
for (;;) {
c = cfile->get(cfile->fp, cfile->rec, cfile->end, ftbl);
if (c == EOF)
break;
if (c == BUFFEND) {
/* Double buffer size */
sz = (cfile->end - (u_char *)cfile->rec) * 2;
cfile->rec = allocrec(cfile->rec, sz);
cfile->end = (u_char *)cfile->rec + sz;
continue;
}
if (nfiles != 0) {
if (insert(flist, cfile, nfiles, !DELETE))
/* Duplicate removed */
continue;
} else
flist[0] = cfile;
nfiles++;
break;
}
}
if (nfiles == 0)
return;
/*
* We now loop reading a new record from the file with the
* 'sorted first' existing record.
* As each record is added, the 'first' record is written to the
* output file - maintaining one record from each file in the sorted
* list.
*/
new_rec = allocrec(NULL, DEFLLEN);
new_end = (u_char *)new_rec + DEFLLEN;
for (;;) {
cfile = flist[0];
c = cfile->get(cfile->fp, new_rec, new_end, ftbl);
if (c == EOF) {
/* Write out last record from now-empty input */
put(cfile->rec, outfp);
if (--nfiles == 0)
break;
/* Replace from file with now-first sorted record. */
/* (Moving base 'flist' saves copying everything!) */
flist++;
continue;
}
if (c == BUFFEND) {
/* Buffer not large enough - double in size */
sz = (new_end - (u_char *)new_rec) * 2;
new_rec = allocrec(new_rec, sz);
new_end = (u_char *)new_rec +sz;
continue;
}
/* Swap in new buffer, saving old */
tmp = cfile->rec;
cfile->rec = new_rec;
new_rec = tmp;
tmp = cfile->end;
cfile->end = new_end;
new_end = tmp;
/* Add into sort, removing the original first entry */
c = insert(flist, cfile, nfiles, DELETE);
if (c != 0 || (UNIQUE && cfile == flist[0]
&& cmp(new_rec, cfile->rec) == 0)) {
/* Was an unwanted duplicate, restore buffer */
tmp = cfile->rec;
cfile->rec = new_rec;
new_rec = tmp;
tmp = cfile->end;
cfile->end = new_end;
new_end = tmp;
continue;
}
/* Write out 'old' record */
put(new_rec, outfp);
}
free(new_rec);
}
/*
* if delete: inserts rec in flist, deletes flist[0];
* otherwise just inserts *rec in flist.
* Returns 1 if record is a duplicate to be ignored.
*/
static int
insert(struct mfile **flist, struct mfile *rec, int ttop, int delete)
{
int mid, top = ttop, bot = 0, cmpv = 1;
for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) {
cmpv = cmp(rec->rec, flist[mid]->rec);
if (cmpv == 0 ) {
if (UNIQUE)
/* Duplicate key, read another record */
/* NB: This doesn't guarantee to keep any
* particular record. */
return 1;
/*
* Apply sort by input file order.
* We could truncate the sort is the fileno are
* adjacent - but that is all too hard!
* The fileno cannot be equal, since we only have one
* record from each file (+ flist[0] which never
* comes here).
*/
cmpv = rec < flist[mid] ? -1 : 1;
if (REVERSE)
cmpv = -cmpv;
}
if (cmpv < 0)
top = mid;
else
bot = mid;
}
/* At this point we haven't yet compared against flist[0] */
if (delete) {
/* flist[0] is ourselves, only the caller knows the old data */
if (bot != 0) {
memmove(flist, flist + 1, bot * sizeof(MFILE *));
flist[bot] = rec;
}
return 0;
}
/* Inserting original set of records */
if (bot == 0 && cmpv != 0) {
/* Doesn't match flist[1], must compare with flist[0] */
cmpv = cmp(rec->rec, flist[0]->rec);
if (cmpv == 0 && UNIQUE)
return 1;
/* Add matching keys in file order (ie new is later) */
if (cmpv < 0)
bot = -1;
}
bot++;
memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE *));
flist[bot] = rec;
return 0;
}
/*
* check order on one file
*/
void
order(struct filelist *filelist, struct field *ftbl)
{
get_func_t get = SINGL_FLD ? makeline : makekey;
RECHEADER *crec, *prec, *trec;
u_char *crec_end, *prec_end, *trec_end;
FILE *fp;
int c;
#if defined(__minix)
if (!strcmp(filelist->names[0], _PATH_STDIN))
fp = stdin;
else
#endif /* defined(__minix) */
fp = fopen(filelist->names[0], "r");
if (fp == NULL)
err(2, "%s", filelist->names[0]);
crec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
crec_end = crec->data + DEFLLEN;
prec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
prec_end = prec->data + DEFLLEN;
/* XXX this does exit(0) for overlong lines */
if (get(fp, prec, prec_end, ftbl) != 0)
exit(0);
while (get(fp, crec, crec_end, ftbl) == 0) {
if (0 < (c = cmp(prec, crec))) {
crec->data[crec->length-1] = 0;
errx(1, "found disorder: %s", crec->data+crec->offset);
}
if (UNIQUE && !c) {
crec->data[crec->length-1] = 0;
errx(1, "found non-uniqueness: %s",
crec->data+crec->offset);
}
/*
* Swap pointers so that this record is on place pointed
* to by prec and new record is read to place pointed to by
* crec.
*/
trec = prec;
prec = crec;
crec = trec;
trec_end = prec_end;
prec_end = crec_end;
crec_end = trec_end;
}
exit(0);
}
static int
cmp(RECHEADER *rec1, RECHEADER *rec2)
{
int len;
int r;
/* key is weights */
len = min(rec1->keylen, rec2->keylen);
r = memcmp(rec1->data, rec2->data, len);
if (r == 0)
r = rec1->keylen - rec2->keylen;
if (REVERSE)
r = -r;
return r;
}

66
usr.bin/sort/pathnames.h Normal file
View file

@ -0,0 +1,66 @@
/* $NetBSD: pathnames.h,v 1.6 2008/04/28 20:24:15 martin Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)pathnames.h 8.1 (Berkeley) 6/6/93
*/
#define _PATH_STDIN "/dev/stdin"

217
usr.bin/sort/radix_sort.c Normal file
View file

@ -0,0 +1,217 @@
/* $NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $ */
/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy and by Dan Bernstein at New York University,
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
#if 0
static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95";
#else
__RCSID("$NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $");
#endif
#endif /* LIBC_SCCS and not lint */
/*
* 'stable' radix sort initially from libc/stdlib/radixsort.c
*/
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <util.h>
#include "sort.h"
typedef struct {
RECHEADER **sa; /* Base of saved area */
int sn; /* Number of entries */
int si; /* index into data for compare */
} stack;
static void simplesort(RECHEADER **, int, int);
#define THRESHOLD 20 /* Divert to simplesort(). */
#define empty(s) (s >= sp)
#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
#define swap(a, b, t) t = a, a = b, b = t
void
radix_sort(RECHEADER **a, RECHEADER **ta, int n)
{
u_int count[256], nc, bmin;
u_int c;
RECHEADER **ak, **tai, **lim;
RECHEADER *hdr;
int stack_size = 512;
stack *s, *sp, *sp0, *sp1, temp;
RECHEADER **top[256];
u_int *cp, bigc;
int data_index = 0;
if (n < THRESHOLD && !DEBUG('r')) {
simplesort(a, n, 0);
return;
}
s = emalloc(stack_size * sizeof *s);
memset(&count, 0, sizeof count);
/* Technically 'top' doesn't need zeroing */
memset(&top, 0, sizeof top);
sp = s;
push(a, n, data_index);
while (!empty(s)) {
pop(a, n, data_index);
if (n < THRESHOLD && !DEBUG('r')) {
simplesort(a, n, data_index);
continue;
}
/* Count number of times each 'next byte' occurs */
nc = 0;
bmin = 255;
lim = a + n;
for (ak = a, tai = ta; ak < lim; ak++) {
hdr = *ak;
if (data_index >= hdr->keylen) {
/* Short key, copy to start of output */
if (UNIQUE && a != sp->sa)
/* Stop duplicate being written out */
hdr->keylen = -1;
*a++ = hdr;
n--;
continue;
}
/* Save in temp buffer for distribute */
*tai++ = hdr;
c = hdr->data[data_index];
if (++count[c] == 1) {
if (c < bmin)
bmin = c;
nc++;
}
}
/*
* We need save the bounds for each 'next byte' that
* occurs more so we can sort each block.
*/
if (sp + nc > s + stack_size) {
stack_size *= 2;
sp1 = erealloc(s, stack_size * sizeof *s);
sp = sp1 + (sp - s);
s = sp1;
}
/* Minor optimisation to do the largest set last */
sp0 = sp1 = sp;
bigc = 2;
/* Convert 'counts' positions, saving bounds for later sorts */
ak = a;
for (cp = count + bmin; nc > 0; cp++) {
while (*cp == 0)
cp++;
if ((c = *cp) > 1) {
if (c > bigc) {
bigc = c;
sp1 = sp;
}
push(ak, c, data_index+1);
}
ak += c;
top[cp-count] = ak;
*cp = 0; /* Reset count[]. */
nc--;
}
swap(*sp0, *sp1, temp);
for (ak = ta+n; --ak >= ta;) /* Deal to piles. */
*--top[(*ak)->data[data_index]] = *ak;
}
free(s);
}
/* insertion sort, short records are sorted before long ones */
static void
simplesort(RECHEADER **a, int n, int data_index)
{
RECHEADER **ak, **ai;
RECHEADER *akh;
RECHEADER **lim = a + n;
const u_char *s, *t;
int s_len, t_len;
int i;
int r;
if (n <= 1)
return;
for (ak = a+1; ak < lim; ak++) {
akh = *ak;
s = akh->data;
s_len = akh->keylen;
for (ai = ak; ;) {
ai--;
t_len = (*ai)->keylen;
if (t_len != -1) {
t = (*ai)->data;
for (i = data_index; ; i++) {
if (i >= s_len || i >= t_len) {
r = s_len - t_len;
break;
}
r = s[i] - t[i];
if (r != 0)
break;
}
if (r >= 0) {
if (r == 0 && UNIQUE) {
/* Put record below existing */
ai[1] = ai[0];
/* Mark as duplicate - ignore */
akh->keylen = -1;
} else {
ai++;
}
break;
}
}
ai[1] = ai[0];
if (ai == a)
break;
}
ai[0] = akh;
}
}

462
usr.bin/sort/sort.1 Normal file
View file

@ -0,0 +1,462 @@
.\" $NetBSD: sort.1,v 1.32 2010/12/18 23:36:23 wiz Exp $
.\"
.\" Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
.\" All rights reserved.
.\"
.\" This code is derived from software contributed to The NetBSD Foundation
.\" by Ben Harris and Jaromir Dolecek.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.\" Copyright (c) 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" the Institute of Electrical and Electronics Engineers, Inc.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
.\"
.Dd December 18, 2010
.Dt SORT 1
.Os
.Sh NAME
.Nm sort
.Nd sort or merge text files
.Sh SYNOPSIS
.Nm sort
.Op Fl bcdfHilmnrSsu
.Oo
.Fl k
.Ar field1 Ns Op Li \&, Ns Ar field2
.Oc
.Op Fl o Ar output
.Op Fl R Ar char
.Op Fl T Ar dir
.Op Fl t Ar char
.Op Ar
.Sh DESCRIPTION
The
.Nm
utility sorts text files by lines.
Comparisons are based on one or more sort keys extracted
from each line of input, and are performed lexicographically.
By default, if keys are not given,
.Nm
regards each input line as a single field.
.Pp
The following options are available:
.Bl -tag -width Fl
.It Fl c
Check that the single input file is sorted.
If the file is not sorted,
.Nm
produces the appropriate error messages and exits with code 1; otherwise,
.Nm
returns 0.
.Nm
.Fl c
produces no output.
.It Fl H
Ignored for compatibility with earlier versions of
.Nm .
.It Fl m
Merge only; the input files are assumed to be pre-sorted.
.It Fl o Ar output
The argument given is the name of an
.Ar output
file to be used instead of the standard output.
This file can be the same as one of the input files.
.It Fl S
Don't use stable sort.
Default is to use stable sort.
.It Fl s
Use stable sort, keeps records with equal keys in their original order.
This is the default.
Provided for compatibility with other
.Nm
implementations only.
.It Fl T Ar dir
Use
.Ar dir
as the directory for temporary files.
The default is the value specified in the environment variable
.Ev TMPDIR or
.Pa /tmp
if
.Ev TMPDIR
is not defined.
.It Fl u
Unique: suppress all but one in each set of lines having equal keys.
If used with the
.Fl c
option, check that there are no lines with duplicate keys.
.El
.Pp
The following options override the default ordering rules.
When ordering options appear independent of key field
specifications, the requested field ordering rules are
applied globally to all sort keys.
When attached to a specific key (see
.Fl k ) ,
the ordering options override
all global ordering options for that key.
.Bl -tag -width Fl
.It Fl d
Only blank space and alphanumeric characters
.\" according
.\" to the current setting of LC_CTYPE
are used
in making comparisons.
.It Fl f
Considers all lowercase characters that have uppercase
equivalents to be the same for purposes of comparison.
.It Fl i
Ignore all non-printable characters.
.It Fl l
Sort by the string length of the field, not by the field itself.
.It Fl n
An initial numeric string, consisting of optional blank space, optional
minus sign, and zero or more digits (including decimal point)
.\" with
.\" optional radix character and thousands
.\" separator
.\" (as defined in the current locale),
is sorted by arithmetic value.
(The
.Fl n
option no longer implies the
.Fl b
option.)
.It Fl r
Reverse the sense of comparisons.
.El
.Pp
The treatment of field separators can be altered using these options:
.Bl -tag -width Fl
.It Fl b
Ignores leading blank space when determining the start
and end of a restricted sort key.
A
.Fl b
option specified before the first
.Fl k
option applies globally to all
.Fl k
options.
Otherwise, the
.Fl b
option can be attached independently to each
.Ar field
argument of the
.Fl k
option (see below).
Note that the
.Fl b
option has no effect unless key fields are specified.
.It Fl t Ar char
.Ar char
is used as the field separator character.
The initial
.Ar char
is not considered to be part of a field when determining
key offsets (see below).
Each occurrence of
.Ar char
is significant (for example,
.Dq Ar charchar
delimits an empty field).
If
.Fl t
is not specified, the default field separator is a sequence of
blank-space characters, and consecutive blank spaces do
.Em not
delimit an empty field; further, the initial blank space
.Em is
considered part of a field when determining key offsets.
.It Fl R Ar char
.Ar char
is used as the record separator character.
This should be used with discretion;
.Fl R Ar \*[Lt]alphanumeric\*[Gt]
usually produces undesirable results.
The default record separator is newline.
.It Fl k Ar field1 Ns Op Li \&, Ns Ar field2
Designates the starting position,
.Ar field1 ,
and optional ending position,
.Ar field2 ,
of a key field.
The
.Fl k
option replaces the obsolescent options
.Cm \(pl Ns Ar pos1
and
.Fl Ns Ar pos2 .
.El
.Pp
The following operands are available:
.Bl -tag -width Ar
.It Ar file
The pathname of a file to be sorted, merged, or checked.
If no
.Ar file
operands are specified, or if
a
.Ar file
operand is
.Fl ,
the standard input is used.
.El
.Pp
A field is defined as a minimal sequence of characters followed by a
field separator or a newline character.
By default, the first
blank space of a sequence of blank spaces acts as the field separator.
All blank spaces in a sequence of blank spaces are considered
as part of the next field; for example, all blank spaces at
the beginning of a line are considered to be part of the
first field.
.Pp
Fields are specified
by the
.Fl k
.Ar field1 Ns Op \&, Ns Ar field2
argument.
A missing
.Ar field2
argument defaults to the end of a line.
.Pp
The arguments
.Ar field1
and
.Ar field2
have the form
.Ar m Ns Li \&. Ns Ar n
and can be followed by one or more of the letters
.Cm b , d , f , i ,
.Cm l , n ,
and
.Cm r ,
which correspond to the options discussed above.
A
.Ar field1
position specified by
.Ar m Ns Li \&. Ns Ar n
.Pq Ar m , n No \*[Gt] 0
is interpreted as the
.Ar n Ns th
character in the
.Ar m Ns th
field.
A missing
.Li \&. Ns Ar n
in
.Ar field1
means
.Ql \&.1 ,
indicating the first character of the
.Ar m Ns th
field; if the
.Fl b
option is in effect,
.Ar n
is counted from the first non-blank character in the
.Ar m Ns th
field;
.Ar m Ns Li \&.1b
refers to the first non-blank character in the
.Ar m Ns th
field.
.Pp
A
.Ar field2
position specified by
.Ar m Ns Li \&. Ns Ar n
is interpreted as
the
.Ar n Ns th
character (including separators) of the
.Ar m Ns th
field.
A missing
.Li \&. Ns Ar n
indicates the last character of the
.Ar m Ns th
field;
.Ar m
= \&0
designates the end of a line.
Thus the option
.Fl k
.Sm off
.Xo
.Ar v Li \&. Ar x Li \&,
.Ar w Li \&. Ar y
.Xc
.Sm on
is synonymous with the obsolescent option
.Sm off
.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
.Fl Ar w-\&1 Li \&. Ar y ;
.Sm on
when
.Ar y
is omitted,
.Fl k
.Sm off
.Ar v Li \&. Ar x Li \&, Ar w
.Sm on
is synonymous with
.Sm off
.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
.Fl Ar w+1 Li \&.0 .
.Sm on
The obsolescent
.Cm \(pl Ns Ar pos1
.Fl Ns Ar pos2
option is still supported, except for
.Fl Ns Ar w Ns Li \&.0b ,
which has no
.Fl k
equivalent.
.Sh ENVIRONMENT
If the following environment variable exists, it is used by
.Nm .
.Bl -tag -width Ev
.It Ev TMPDIR
.Nm
uses the contents of the
.Ev TMPDIR
environment variable as the path in which to store
temporary files.
.El
.Sh FILES
.Bl -tag -width outputNUMBER+some -compact
.It Pa /tmp/sort.*
Default temporary files.
.It Ar output Ns NUMBER
Temporary file which is used for output if
.Ar output
already exists.
Once sorting is finished, this file replaces
.Ar output
(via
.Xr link 2
and
.Xr unlink 2 ) .
.El
.Sh EXIT STATUS
Sort exits with one of the following values:
.Bl -tag -width flag -compact
.It 0
Normal behavior.
.It 1
On disorder (or non-uniqueness) with the
.Fl c
option
.It 2
An error occurred.
.El
.Sh SEE ALSO
.Xr comm 1 ,
.Xr join 1 ,
.Xr uniq 1 ,
.Xr qsort 3 ,
.Xr radixsort 3
.Sh HISTORY
A
.Nm
command appeared in
.At v5 .
This
.Nm
implementation appeared in
.Bx 4.4
and is used since
.Nx 1.6 .
.Sh BUGS
Posix requires the locale's thousands separator be ignored in numbers.
It may be faster to sort very large files in pieces and then explicitly
merge them.
.Sh NOTES
This
.Nm
has no limits on input line length (other than imposed by available
memory) or any restrictions on bytes allowed within lines.
.Pp
To protect data
.Nm
.Fl o
calls
.Xr link 2
and
.Xr unlink 2 ,
and thus fails on protected directories.
.Pp
Input files should be text files.
If file doesn't end with record separator (which is typically newline), the
.Nm
utility silently supplies one.
.Pp
The current
.Nm
uses lexicographic radix sorting, which requires
that sort keys be kept in memory (as opposed to previous versions which used quick
and merge sorts and did not.)
Thus performance depends highly on efficient choice of sort keys, and the
.Fl b
option and the
.Ar field2
argument of the
.Fl k
option should be used whenever possible.
Similarly,
.Nm
.Fl k1f
is equivalent to
.Nm
.Fl f
and may take twice as long.

419
usr.bin/sort/sort.c Normal file
View file

@ -0,0 +1,419 @@
/* $NetBSD: sort.c,v 1.61 2011/09/16 15:39:29 joerg Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* Sort sorts a file using an optional user-defined key.
* Sort uses radix sort for internal sorting, and allows
* a choice of merge sort and radix sort for external sorting.
*/
#include <util.h>
#include "sort.h"
#include "fsort.h"
#include "pathnames.h"
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1993\
The Regents of the University of California. All rights reserved.");
#endif /* not lint */
__RCSID("$NetBSD: sort.c,v 1.61 2011/09/16 15:39:29 joerg Exp $");
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <paths.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <locale.h>
int REC_D = '\n';
u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
/*
* weight tables. Gweights is one of ascii, Rascii..
* modified to weight rec_d = 0 (or 255)
*/
u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable };
u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
int REVERSE = 0;
int posix_sort;
unsigned int debug_flags = 0;
static char toutpath[MAXPATHLEN];
const char *tmpdir; /* where temporary files should be put */
static void cleanup(void);
static void onsignal(int);
__dead static void usage(const char *);
int
main(int argc, char *argv[])
{
int ch, i, stdinflag = 0;
char cflag = 0, mflag = 0;
char *outfile, *outpath = 0;
struct field *fldtab;
size_t fldtab_sz, fld_cnt;
struct filelist filelist;
int num_input_files;
FILE *outfp = NULL;
#if !defined(__minix)
struct rlimit rl;
#endif /* !defined(__minix) */
struct stat st;
setlocale(LC_ALL, "");
#if !defined(__minix)
/* bump RLIMIT_NOFILE to maximum our hard limit allows */
if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
err(2, "getrlimit");
rl.rlim_cur = rl.rlim_max;
if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
err(2, "setrlimit");
#endif /* !defined(__minix) */
d_mask[REC_D = '\n'] = REC_D_F;
d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
/* fldtab[0] is the global options. */
fldtab_sz = 3;
fld_cnt = 0;
fldtab = emalloc(fldtab_sz * sizeof(*fldtab));
memset(fldtab, 0, fldtab_sz * sizeof(*fldtab));
#define SORT_OPTS "bcdD:fHik:lmno:rR:sSt:T:ux"
/* Convert "+field" args to -f format */
fixit(&argc, argv, SORT_OPTS);
if (!(tmpdir = getenv("TMPDIR")))
tmpdir = _PATH_TMP;
while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) {
switch (ch) {
case 'b':
fldtab[0].flags |= BI | BT;
break;
case 'c':
cflag = 1;
break;
case 'D': /* Debug flags */
for (i = 0; optarg[i]; i++)
debug_flags |= 1 << (optarg[i] & 31);
break;
case 'd': case 'f': case 'i': case 'n': case 'l':
fldtab[0].flags |= optval(ch, 0);
break;
case 'H':
/* -H was ; use merge sort for blocks of large files' */
/* That is now the default. */
break;
case 'k':
fldtab = erealloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab));
memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0]));
fldtab_sz++;
setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags);
break;
case 'm':
mflag = 1;
break;
case 'o':
outpath = optarg;
break;
case 'r':
REVERSE = 1;
break;
case 's':
/*
* Nominally 'stable sort', keep lines with equal keys
* in input file order. (Default for NetBSD)
* (-s for GNU sort compatibility.)
*/
posix_sort = 0;
break;
case 'S':
/*
* Reverse of -s!
* This needs to enforce a POSIX sort where records
* with equal keys are then sorted by the raw data.
* Currently not implemented!
* (using libc radixsort() v sradixsort() doesn't
* have the desired effect.)
*/
posix_sort = 1;
break;
case 't':
if (SEP_FLAG)
usage("multiple field delimiters");
SEP_FLAG = 1;
d_mask[' '] &= ~FLD_D;
d_mask['\t'] &= ~FLD_D;
d_mask[(u_char)*optarg] |= FLD_D;
if (d_mask[(u_char)*optarg] & REC_D_F)
errx(2, "record/field delimiter clash");
break;
case 'R':
if (REC_D != '\n')
usage("multiple record delimiters");
REC_D = *optarg;
if (REC_D == '\n')
break;
if (optarg[1] != '\0') {
char *ep;
int t = 0;
if (optarg[0] == '\\')
optarg++, t = 8;
REC_D = (int)strtol(optarg, &ep, t);
if (*ep != '\0' || REC_D < 0 ||
REC_D >= (int)__arraycount(d_mask))
errx(2, "invalid record delimiter %s",
optarg);
}
d_mask['\n'] = d_mask[' '];
d_mask[REC_D] = REC_D_F;
break;
case 'T':
/* -T tmpdir */
tmpdir = optarg;
break;
case 'u':
UNIQUE = 1;
break;
case '?':
default:
usage(NULL);
}
}
if (UNIQUE)
/* Don't sort on raw record if keys match */
posix_sort = 0;
if (cflag && argc > optind+1)
errx(2, "too many input files for -c option");
if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
outpath = argv[argc-1];
argc -= 2;
}
if (mflag && argc - optind > (MAXFCT - (16+1))*16)
errx(2, "too many input files for -m option");
for (i = optind; i < argc; i++) {
/* allow one occurrence of /dev/stdin */
if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) {
if (stdinflag)
warnx("ignoring extra \"%s\" in file list",
argv[i]);
else
stdinflag = 1;
/* change to /dev/stdin if '-' */
if (argv[i][0] == '-') {
static char path_stdin[] = _PATH_STDIN;
argv[i] = path_stdin;
}
} else if ((ch = access(argv[i], R_OK)))
err(2, "%s", argv[i]);
}
if (fldtab[1].icol.num == 0) {
/* No sort key specified */
if (fldtab[0].flags & (I|D|F|N|L)) {
/* Modified - generate a key that covers the line */
fldtab[0].flags &= ~(BI|BT);
setfield("1", &fldtab[++fld_cnt], fldtab->flags);
fldreset(fldtab);
} else {
/* Unmodified, just compare the line */
SINGL_FLD = 1;
fldtab[0].icol.num = 1;
}
} else {
fldreset(fldtab);
}
settables();
if (optind == argc) {
static const char * const names[] = { _PATH_STDIN, NULL };
filelist.names = names;
num_input_files = 1;
} else {
filelist.names = (const char * const *) &argv[optind];
num_input_files = argc - optind;
}
if (cflag) {
order(&filelist, fldtab);
/* NOT REACHED */
}
if (!outpath) {
toutpath[0] = '\0'; /* path not used in this case */
outfile = outpath = toutpath;
outfp = stdout;
} else if (lstat(outpath, &st) == 0
&& !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
/* output file exists and isn't character or block device */
struct sigaction act;
static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE,
#if defined(__minix)
SIGVTALRM, SIGPROF, 0};
#else
SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0};
#endif /* defined(__minix) */
int outfd;
errno = 0;
if (access(outpath, W_OK))
err(2, "%s", outpath);
(void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX",
outpath);
if ((outfd = mkstemp(toutpath)) == -1)
err(2, "Cannot create temporary file `%s'", toutpath);
(void)atexit(cleanup);
act.sa_handler = onsignal;
(void) sigemptyset(&act.sa_mask);
act.sa_flags = SA_RESTART | SA_RESETHAND;
for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
sigaction(sigtable[i], &act, 0);
outfile = toutpath;
if ((outfp = fdopen(outfd, "w")) == NULL)
err(2, "Cannot open temporary file `%s'", toutpath);
} else {
outfile = outpath;
if ((outfp = fopen(outfile, "w")) == NULL)
err(2, "output file %s", outfile);
}
if (mflag)
fmerge(&filelist, num_input_files, outfp, fldtab);
else
fsort(&filelist, num_input_files, outfp, fldtab);
if (outfile != outpath) {
if (access(outfile, F_OK))
err(2, "%s", outfile);
/*
* Copy file permissions bits of the original file.
* st is initialized above, when we create the
* temporary spool file.
*/
if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) {
err(2, "cannot chmod %s: output left in %s",
outpath, outfile);
}
(void)unlink(outpath);
if (link(outfile, outpath))
err(2, "cannot link %s: output left in %s",
outpath, outfile);
(void)unlink(outfile);
toutpath[0] = 0;
}
exit(0);
}
static void
onsignal(int sig)
{
cleanup();
}
static void
cleanup(void)
{
if (toutpath[0])
(void)unlink(toutpath);
}
static void
usage(const char *msg)
{
if (msg != NULL)
(void)fprintf(stderr, "%s: %s\n", getprogname(), msg);
(void)fprintf(stderr,
"usage: %s [-bcdfHilmnrSsu] [-k field1[,field2]] [-o output]"
" [-R char] [-T dir]", getprogname());
(void)fprintf(stderr,
" [-t char] [file ...]\n");
exit(2);
}
RECHEADER *
allocrec(RECHEADER *rec, size_t size)
{
return (erealloc(rec, size + sizeof(long) - 1));
}

201
usr.bin/sort/sort.h Normal file
View file

@ -0,0 +1,201 @@
/* $NetBSD: sort.h,v 1.34 2011/09/16 15:39:29 joerg Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)sort.h 8.1 (Berkeley) 6/6/93
*/
#include <sys/param.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NBINS 256
/* values for masks, weights, and other flags. */
/* R and F get used to index weight_tables[] */
#define R 0x01 /* Field is reversed */
#define F 0x02 /* weight lower and upper case the same */
#define I 0x04 /* mask out non-printable characters */
#define D 0x08 /* sort alphanumeric characters only */
#define N 0x10 /* Field is a number */
#define BI 0x20 /* ignore blanks in icol */
#define BT 0x40 /* ignore blanks in tcol */
#define L 0x80 /* Sort by field length */
/* masks for delimiters: blanks, fields, and termination. */
#define BLANK 1 /* ' ', '\t'; '\n' if -R is invoked */
#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
#define REC_D_F 4 /* '\n' default; from -R otherwise */
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define FCLOSE(file) { \
if (EOF == fclose(file)) \
err(2, "%p", file); \
}
#define EWRITE(ptr, size, n, f) { \
if (!fwrite(ptr, size, n, f)) \
err(2, NULL); \
}
/* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort
* in a sane way.
* Anyone who wants to sort data records longer than 2GB definitely needs a
* different program! */
typedef unsigned int length_t;
/* A record is a key/line pair starting at rec.data. It has a total length
* and an offset to the start of the line half of the pair.
*/
typedef struct recheader {
length_t length; /* total length of key and line */
length_t offset; /* to line */
int keylen; /* length of key */
u_char data[]; /* key then line */
} RECHEADER;
/* This is the column as seen by struct field. It is used by enterfield.
* They are matched with corresponding coldescs during initialization.
*/
struct column {
struct coldesc *p;
int num;
int indent;
};
/* a coldesc has a number and pointers to the beginning and end of the
* corresponding column in the current line. This is determined in enterkey.
*/
typedef struct coldesc {
u_char *start;
u_char *end;
int num;
} COLDESC;
/* A field has an initial and final column; an omitted final column
* implies the end of the line. Flags regulate omission of blanks and
* numerical sorts; mask determines which characters are ignored (from -i, -d);
* weights determines the sort weights of a character (from -f, -r).
*
* The first field contain the global flags etc.
* The list terminates when icol = 0.
*/
struct field {
struct column icol;
struct column tcol;
u_int flags;
u_char *mask;
u_char *weights;
};
struct filelist {
const char * const * names;
};
typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *);
typedef void (*put_func_t)(const RECHEADER *, FILE *);
extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
extern u_char *const weight_tables[4]; /* ascii, Rascii, Ftable, RFtable */
extern u_char d_mask[NBINS];
extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE;
extern int posix_sort;
extern int REC_D;
extern const char *tmpdir;
extern struct coldesc *clist;
extern int ncols;
#define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
extern unsigned int debug_flags;
RECHEADER *allocrec(RECHEADER *, size_t);
void append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *));
void concat(FILE *, FILE *);
length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
void fixit(int *, char **, const char *);
void fldreset(struct field *);
FILE *ftmp(void);
void fmerge(struct filelist *, int, FILE *, struct field *);
void save_for_merge(FILE *, get_func_t, struct field *);
void merge_sort(FILE *, put_func_t, struct field *);
void fsort(struct filelist *, int, FILE *, struct field *);
int geteasy(FILE *, RECHEADER *, u_char *, struct field *);
int makekey(FILE *, RECHEADER *, u_char *, struct field *);
int makeline(FILE *, RECHEADER *, u_char *, struct field *);
void makeline_copydown(RECHEADER *);
int optval(int, int);
__dead void order(struct filelist *, struct field *);
void putline(const RECHEADER *, FILE *);
void putrec(const RECHEADER *, FILE *);
void putkeydump(const RECHEADER *, FILE *);
void rd_append(int, int, int, FILE *, u_char *, u_char *);
void radix_sort(RECHEADER **, RECHEADER **, int);
int setfield(const char *, struct field *, int);
void settables(void);

106
usr.bin/sort/tmp.c Normal file
View file

@ -0,0 +1,106 @@
/* $NetBSD: tmp.c,v 1.16 2009/11/06 18:34:22 joerg Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__RCSID("$NetBSD: tmp.c,v 1.16 2009/11/06 18:34:22 joerg Exp $");
#include <sys/param.h>
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "sort.h"
#include "pathnames.h"
#define _NAME_TMP "sort.XXXXXXXX"
FILE *
ftmp(void)
{
sigset_t set, oset;
FILE *fp;
int fd;
char path[MAXPATHLEN];
(void)snprintf(path, sizeof(path), "%s%s%s", tmpdir,
(tmpdir[strlen(tmpdir)-1] != '/') ? "/" : "", _NAME_TMP);
sigfillset(&set);
(void)sigprocmask(SIG_BLOCK, &set, &oset);
if ((fd = mkstemp(path)) < 0)
err(2, "ftmp: mkstemp(\"%s\")", path);
if (!(fp = fdopen(fd, "w+")))
err(2, "ftmp: fdopen(\"%s\")", path);
if (!DEBUG('t'))
(void)unlink(path);
(void)sigprocmask(SIG_SETMASK, &oset, NULL);
return (fp);
}