Upgrading sort, which is needed by lorder
Change-Id: I64ac0509f4360c947a677600db77e7612a7cbebd
This commit is contained in:
parent
a7ab29bf57
commit
0fbbaa43e9
21 changed files with 3416 additions and 1307 deletions
|
@ -23,7 +23,7 @@ SUBDIR= add_route arp ash at backup banner basename btrace cal \
|
|||
ramdisk rarpd rawspeed rcp rdate readclock \
|
||||
reboot remsync rev rget rlogin \
|
||||
rotate rsh rshd service setup shar acksize \
|
||||
sleep slip sort spell split sprofalyze sprofdiff srccrc \
|
||||
sleep slip spell split sprofalyze sprofdiff srccrc \
|
||||
stty svclog svrctl swifi sync synctree sysenv \
|
||||
syslogd tail tcpd tcpdp tcpstat tee telnet \
|
||||
telnetd term termcap tget time touch tr \
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
PROG= sort
|
||||
MAN=
|
||||
|
||||
.include <bsd.prog.mk>
|
1217
commands/sort/sort.c
1217
commands/sort/sort.c
File diff suppressed because it is too large
Load diff
|
@ -15,7 +15,7 @@ MAN= ash.1 at.1 banner.1 basename.1 \
|
|||
paste.1 ping.1 playwave.1 pr.1 prep.1 \
|
||||
profile.1 ps.1 pwd.1 rcp.1 recwave.1 \
|
||||
ref.1 remsync.1 rget.1 rlogin.1 rsh.1 rz.1 \
|
||||
shar.1 acksize.1 sleep.1 sort.1 spell.1 \
|
||||
shar.1 acksize.1 sleep.1 spell.1 \
|
||||
split.1 stty.1 svc.1 svrctl.1 \
|
||||
synctree.1 sysenv.1 sz.1 tail.1 tee.1 telnet.1 template.1 \
|
||||
term.1 termcap.1 tget.1 time.1 tr.1 true.1 \
|
||||
|
|
|
@ -1,83 +0,0 @@
|
|||
.TH SORT 1
|
||||
.SH NAME
|
||||
sort \- sort a file of ASCII lines
|
||||
.SH SYNOPSIS
|
||||
\fBsort\fR [\fB\-bcdf\&imnru\fR]\fR [\fB\-t\fIc\fR] [\fB\-o \fIname\fR] [\fB+\fIpos1\fR] [\fB\-\fIpos2\fR] \fIfile\fR ...\fR
|
||||
.br
|
||||
.de FL
|
||||
.TP
|
||||
\\fB\\$1\\fR
|
||||
\\$2
|
||||
..
|
||||
.de EX
|
||||
.TP 20
|
||||
\\fB\\$1\\fR
|
||||
# \\$2
|
||||
..
|
||||
.SH OPTIONS
|
||||
.TP 5
|
||||
.B \-b
|
||||
# Skip leading blanks when making comparisons
|
||||
.TP 5
|
||||
.B \-c
|
||||
# Check to see if a file is sorted
|
||||
.TP 5
|
||||
.B \-d
|
||||
# Dictionary order: ignore punctuation
|
||||
.TP 5
|
||||
.B \-f
|
||||
# Fold upper case onto lower case
|
||||
.TP 5
|
||||
.B \-i
|
||||
# Ignore nonASCII characters
|
||||
.TP 5
|
||||
.B \-m
|
||||
# Merge presorted files
|
||||
.TP 5
|
||||
.B \-n
|
||||
# Numeric sort order (decimal)
|
||||
.TP 5
|
||||
.B \-x
|
||||
# Numeric sort order (hex)
|
||||
.TP 5
|
||||
.B \-o
|
||||
# Next argument is output file
|
||||
.TP 5
|
||||
.B \-r
|
||||
# Reverse the sort order
|
||||
.TP 5
|
||||
.B \-t
|
||||
# Following character is field separator
|
||||
.TP 5
|
||||
.B \-u
|
||||
# Unique mode (delete duplicate lines)
|
||||
.SH EXAMPLES
|
||||
.TP 20
|
||||
.B sort \-nr file
|
||||
# Sort keys numerically, reversed
|
||||
.TP 20
|
||||
.B sort +2 \-4 file
|
||||
# Sort using fields 2 and 3 as key
|
||||
.TP 20
|
||||
.B sort +2 \-t: \-o out
|
||||
# Field separator is \fI:\fP
|
||||
.TP 20
|
||||
.B sort +.3 \-.6
|
||||
# Characters 3 through 5 form the key
|
||||
.SH DESCRIPTION
|
||||
.PP
|
||||
.I Sort
|
||||
sorts one or more files.
|
||||
If no files are specified, \fIstdin\fR is sorted.
|
||||
Output is written on standard output, unless \fB\-o\fP is specified.
|
||||
The options \fB+\fIpos1 \fB\-\fIpos2\fR use only fields \fIpos1\fR
|
||||
up to but not including \fIpos2\fR as the sort key, where a field is a
|
||||
string of characters delimited by spaces and tabs, unless a different field
|
||||
delimiter is specified with \fB\-t\fR.
|
||||
Both \fIpos1\fR and \fIpos2\fR have the form \fIm.n\fR where \fIm\fR tells
|
||||
the number of fields and \fIn\fR tells the number of characters.
|
||||
Either \fIm\fR or \fIn\fR may be omitted.
|
||||
.SH "SEE ALSO"
|
||||
.BR comm (1),
|
||||
.BR grep (1),
|
||||
.BR uniq (1).
|
|
@ -69,6 +69,7 @@
|
|||
2012/10/17 12:00:00,usr.bin/Makefile
|
||||
2012/10/17 12:00:00,usr.bin/Makefile.inc
|
||||
2012/10/17 12:00:00,usr.bin/passwd/Makefile
|
||||
2012/10/17 12:00:00,usr.bin/sort
|
||||
2012/10/17 12:00:00,usr.bin/xinstall
|
||||
2012/10/17 12:00:00,usr.sbin/Makefile
|
||||
2012/10/17 12:00:00,usr.sbin/Makefile.inc
|
||||
|
|
|
@ -18,7 +18,7 @@ SUBDIR= \
|
|||
newgrp \
|
||||
passwd \
|
||||
sed seq \
|
||||
stat su \
|
||||
sort stat su \
|
||||
tic \
|
||||
uniq \
|
||||
xinstall
|
||||
|
|
15
usr.bin/sort/Makefile
Normal file
15
usr.bin/sort/Makefile
Normal file
|
@ -0,0 +1,15 @@
|
|||
# $NetBSD: Makefile,v 1.8 2009/09/10 22:02:40 dsl Exp $
|
||||
# from: @(#)Makefile 8.1 (Berkeley) 6/6/93
|
||||
|
||||
PROG= sort
|
||||
SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c
|
||||
SRCS+= radix_sort.c
|
||||
|
||||
LDADD+=-lutil
|
||||
DPADD+=${LIBUTIL}
|
||||
|
||||
.if defined(__MINIX)
|
||||
CPPFLAGS+= -Dlchown=chown -Dlchmod=chmod
|
||||
.endif # defined(__MINIX)
|
||||
|
||||
.include <bsd.prog.mk>
|
94
usr.bin/sort/append.c
Normal file
94
usr.bin/sort/append.c
Normal file
|
@ -0,0 +1,94 @@
|
|||
/* $NetBSD: append.c,v 1.23 2009/11/06 18:34:22 joerg Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Ben Harris and Jaromir Dolecek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sort.h"
|
||||
|
||||
__RCSID("$NetBSD: append.c,v 1.23 2009/11/06 18:34:22 joerg Exp $");
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/*
|
||||
* copy sorted lines to output
|
||||
* Ignore duplicates (marked with -ve keylen)
|
||||
*/
|
||||
void
|
||||
append(RECHEADER **keylist, int nelem, FILE *fp, put_func_t put)
|
||||
{
|
||||
RECHEADER **cpos, **lastkey;
|
||||
RECHEADER *crec;
|
||||
|
||||
lastkey = keylist + nelem;
|
||||
if (REVERSE) {
|
||||
for (cpos = lastkey; cpos-- > keylist;) {
|
||||
crec = *cpos;
|
||||
if (crec->keylen >= 0)
|
||||
put(crec, fp);
|
||||
}
|
||||
} else {
|
||||
for (cpos = keylist; cpos < lastkey; cpos++) {
|
||||
crec = *cpos;
|
||||
if (crec->keylen >= 0)
|
||||
put(crec, fp);
|
||||
}
|
||||
}
|
||||
}
|
377
usr.bin/sort/fields.c
Normal file
377
usr.bin/sort/fields.c
Normal file
|
@ -0,0 +1,377 @@
|
|||
/* $NetBSD: fields.c,v 1.32 2010/12/18 23:09:48 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Ben Harris and Jaromir Dolecek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Subroutines to generate sort keys. */
|
||||
|
||||
#include "sort.h"
|
||||
|
||||
__RCSID("$NetBSD: fields.c,v 1.32 2010/12/18 23:09:48 christos Exp $");
|
||||
|
||||
#define SKIP_BLANKS(ptr) { \
|
||||
if (BLANK & d_mask[*(ptr)]) \
|
||||
while (BLANK & d_mask[*(++(ptr))]); \
|
||||
}
|
||||
|
||||
#define NEXTCOL(pos) { \
|
||||
if (!SEP_FLAG) \
|
||||
while (BLANK & l_d_mask[*(++pos)]); \
|
||||
while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\
|
||||
}
|
||||
|
||||
static u_char *enterfield(u_char *, const u_char *, struct field *, int);
|
||||
static u_char *number(u_char *, const u_char *, u_char *, u_char *, int);
|
||||
static u_char *length(u_char *, const u_char *, u_char *, u_char *, int);
|
||||
|
||||
#define DECIMAL_POINT '.'
|
||||
|
||||
/*
|
||||
* constructs sort key with leading recheader, followed by the key,
|
||||
* followed by the original line.
|
||||
*/
|
||||
length_t
|
||||
enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data,
|
||||
size_t line_size, struct field fieldtable[])
|
||||
/* keybuf: pointer to start of key */
|
||||
{
|
||||
int i;
|
||||
u_char *l_d_mask;
|
||||
u_char *lineend, *pos;
|
||||
const u_char *endkey;
|
||||
u_char *keypos;
|
||||
struct coldesc *clpos;
|
||||
int col = 1;
|
||||
struct field *ftpos;
|
||||
|
||||
l_d_mask = d_mask;
|
||||
pos = line_data - 1;
|
||||
lineend = line_data + line_size-1;
|
||||
/* don't include rec_delimiter */
|
||||
|
||||
for (i = 0; i < ncols; i++) {
|
||||
clpos = clist + i;
|
||||
for (; (col < clpos->num) && (pos < lineend); col++) {
|
||||
NEXTCOL(pos);
|
||||
}
|
||||
if (pos >= lineend)
|
||||
break;
|
||||
clpos->start = SEP_FLAG ? pos + 1 : pos;
|
||||
NEXTCOL(pos);
|
||||
clpos->end = pos;
|
||||
col++;
|
||||
if (pos >= lineend) {
|
||||
clpos->end = lineend;
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (; i <= ncols; i++)
|
||||
clist[i].start = clist[i].end = lineend;
|
||||
if (clist[0].start < line_data)
|
||||
clist[0].start++;
|
||||
|
||||
/*
|
||||
* We write the sort keys (concatenated) followed by the
|
||||
* original line data (for output) as the 'keybuf' data.
|
||||
* keybuf->length is the number of key bytes + data bytes.
|
||||
* keybuf->offset is the number of key bytes.
|
||||
* We add a record separator weight after the key in case
|
||||
* (as is usual) we need to preserve the order of equal lines,
|
||||
* and for 'sort -u'.
|
||||
* The key itself will have had the correct weight applied.
|
||||
*/
|
||||
keypos = keybuf->data;
|
||||
endkey = keybuf_end - line_size - 1;
|
||||
if (endkey <= keypos)
|
||||
/* No room for any key bytes */
|
||||
return 1;
|
||||
|
||||
for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) {
|
||||
if ((keypos = enterfield(keypos, endkey, ftpos,
|
||||
fieldtable->flags)) == NULL)
|
||||
return (1);
|
||||
}
|
||||
|
||||
keybuf->offset = keypos - keybuf->data;
|
||||
keybuf->length = keybuf->offset + line_size;
|
||||
|
||||
/*
|
||||
* Posix requires that equal keys be further sorted by the
|
||||
* entire original record.
|
||||
* NetBSD has (at least for some time) kept equal keys in
|
||||
* their original order.
|
||||
* For 'sort -u' posix_sort is unset.
|
||||
*/
|
||||
keybuf->keylen = posix_sort ? keybuf->length : keybuf->offset;
|
||||
|
||||
memcpy(keypos, line_data, line_size);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* constructs a field (as defined by -k) within a key
|
||||
*/
|
||||
static u_char *
|
||||
enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld,
|
||||
int gflags)
|
||||
{
|
||||
u_char *start, *end, *lineend, *mask, *lweight;
|
||||
struct column icol, tcol;
|
||||
u_int flags;
|
||||
|
||||
icol = cur_fld->icol;
|
||||
tcol = cur_fld->tcol;
|
||||
flags = cur_fld->flags;
|
||||
start = icol.p->start;
|
||||
lineend = clist[ncols].end;
|
||||
if (flags & BI)
|
||||
SKIP_BLANKS(start);
|
||||
start += icol.indent;
|
||||
start = min(start, lineend);
|
||||
|
||||
if (!tcol.num)
|
||||
end = lineend;
|
||||
else {
|
||||
if (tcol.indent) {
|
||||
end = tcol.p->start;
|
||||
if (flags & BT)
|
||||
SKIP_BLANKS(end);
|
||||
end += tcol.indent;
|
||||
end = min(end, lineend);
|
||||
} else
|
||||
end = tcol.p->end;
|
||||
}
|
||||
|
||||
if (flags & L)
|
||||
return length(tablepos, endkey, start, end, flags);
|
||||
if (flags & N)
|
||||
return number(tablepos, endkey, start, end, flags);
|
||||
|
||||
/* Bound check space - assuming nothing is skipped */
|
||||
if (tablepos + (end - start) + 1 >= endkey)
|
||||
return NULL;
|
||||
|
||||
mask = cur_fld->mask;
|
||||
lweight = cur_fld->weights;
|
||||
for (; start < end; start++) {
|
||||
if (!mask || mask[*start]) {
|
||||
*tablepos++ = lweight[*start];
|
||||
}
|
||||
}
|
||||
/* Add extra byte (absent from lweight) to sort short keys correctly */
|
||||
*tablepos++ = lweight[REC_D];
|
||||
return tablepos;
|
||||
}
|
||||
|
||||
/*
|
||||
* Numbers are converted to a floating point format (exponent & mantissa)
|
||||
* so that they compare correctly as sequence of unsigned bytes.
|
||||
* Bytes 0x00 and 0xff are used to terminate positive and negative numbers
|
||||
* to ensure that 0.123 sorts after 0.12 and -0.123 sorts before -0.12.
|
||||
*
|
||||
* The first byte contain the overall sign, exponent sign and some of the
|
||||
* exponent. These have to be ordered (-ve value, decreasing exponent),
|
||||
* zero, (+ve value, increasing exponent).
|
||||
*
|
||||
* The first byte is 0x80 for zero, 0xc0 for +ve with exponent 0.
|
||||
* -ve values are the 1's compliments (so 0x7f isn't used!).
|
||||
*
|
||||
* This only leaves 63 byte values for +ve exponents - which isn't enough.
|
||||
* The largest 4 exponent values are used to hold a byte count of the
|
||||
* number of following bytes that contain 8 exponent bits per byte,
|
||||
* This lets us sort exponents from -2^31 to +2^31.
|
||||
*
|
||||
* The mantissa is stored 2 digits per byte offset by 0x40, for negative
|
||||
* numbers the order must be reversed (they are bit inverted).
|
||||
*
|
||||
* Reverse sorts are done by inverting the sign of the number.
|
||||
*/
|
||||
#define MAX_EXP_ENC ((int)sizeof(int))
|
||||
|
||||
static u_char *
|
||||
number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
|
||||
int reverse)
|
||||
{
|
||||
int exponent = -1;
|
||||
int had_dp = 0;
|
||||
u_char *tline;
|
||||
char ch;
|
||||
unsigned int val;
|
||||
u_char *last_nz_pos;
|
||||
u_char negate;
|
||||
|
||||
if (reverse & R)
|
||||
negate = 0xff;
|
||||
else
|
||||
negate = 0;
|
||||
|
||||
/* Give ourselves space for the key terminator */
|
||||
bufend--;
|
||||
|
||||
/* Ensure we have enough space for the exponent */
|
||||
if (pos + 1 + MAX_EXP_ENC > bufend)
|
||||
return (NULL);
|
||||
|
||||
SKIP_BLANKS(line);
|
||||
if (*line == '-') { /* set the sign */
|
||||
negate ^= 0xff;
|
||||
line++;
|
||||
}
|
||||
/* eat initial zeroes */
|
||||
for (; *line == '0' && line < lineend; line++)
|
||||
continue;
|
||||
|
||||
/* calculate exponents */
|
||||
if (*line == DECIMAL_POINT) {
|
||||
/* Decimal fraction */
|
||||
had_dp = 1;
|
||||
while (*++line == '0' && line < lineend)
|
||||
exponent--;
|
||||
} else {
|
||||
/* Large (absolute) value, count digits */
|
||||
for (tline = line; *tline >= '0' &&
|
||||
*tline <= '9' && tline < lineend; tline++)
|
||||
exponent++;
|
||||
}
|
||||
|
||||
/* If the first/next character isn't a digit, value is zero */
|
||||
if (*line < '1' || *line > '9' || line >= lineend) {
|
||||
/* This may be "0", "0.00", "000" or "fubar" but sorts as 0 */
|
||||
/* XXX what about NaN, NAN, inf and INF */
|
||||
*pos++ = 0x80;
|
||||
return pos;
|
||||
}
|
||||
|
||||
/* Maybe here we should allow for e+12 (etc) */
|
||||
|
||||
if (exponent < 0x40 - MAX_EXP_ENC && -exponent < 0x40 - MAX_EXP_ENC) {
|
||||
/* Value ok for simple encoding */
|
||||
/* exponent 0 is 0xc0 for +ve numbers and 0x40 for -ve ones */
|
||||
exponent += 0xc0;
|
||||
*pos++ = negate ^ exponent;
|
||||
} else {
|
||||
/* Out or range for a single byte */
|
||||
int c, t;
|
||||
t = exponent > 0 ? exponent : -exponent;
|
||||
/* Count how many 8-bit bytes are needed */
|
||||
for (c = 0; ; c++) {
|
||||
t >>= 8;
|
||||
if (t == 0)
|
||||
break;
|
||||
}
|
||||
/* 'c' better be 0..3 here - but probably 0..1 */
|
||||
/* Offset just outside valid range */
|
||||
t = c + 0x40 - MAX_EXP_ENC;
|
||||
if (exponent < 0)
|
||||
t = -t;
|
||||
*pos++ = negate ^ (t + 0xc0);
|
||||
/* now add each byte, most significant first */
|
||||
for (; c >= 0; c--)
|
||||
*pos++ = negate ^ (exponent >> (c * 8));
|
||||
}
|
||||
|
||||
/* Finally add mantissa, 2 digits per byte */
|
||||
for (last_nz_pos = pos; line < lineend; ) {
|
||||
if (pos >= bufend)
|
||||
return NULL;
|
||||
ch = *line++;
|
||||
val = (ch - '0') * 10;
|
||||
if (val > 90) {
|
||||
if (ch == DECIMAL_POINT && !had_dp) {
|
||||
had_dp = 1;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
while (line < lineend) {
|
||||
ch = *line++;
|
||||
if (ch == DECIMAL_POINT && !had_dp) {
|
||||
had_dp = 1;
|
||||
continue;
|
||||
}
|
||||
if (ch < '0' || ch > '9')
|
||||
line = lineend;
|
||||
else
|
||||
val += ch - '0';
|
||||
break;
|
||||
}
|
||||
*pos++ = negate ^ (val + 0x40);
|
||||
if (val != 0)
|
||||
last_nz_pos = pos;
|
||||
}
|
||||
|
||||
/* Add key terminator, deleting any trailing "00" */
|
||||
*last_nz_pos++ = negate;
|
||||
|
||||
return (last_nz_pos);
|
||||
}
|
||||
|
||||
static u_char *
|
||||
length(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
|
||||
int flag)
|
||||
{
|
||||
u_char buf[32];
|
||||
int l;
|
||||
SKIP_BLANKS(line);
|
||||
l = snprintf((char *)buf, sizeof(buf), "%td", lineend - line);
|
||||
return number(pos, bufend, buf, buf + l, flag);
|
||||
}
|
276
usr.bin/sort/files.c
Normal file
276
usr.bin/sort/files.c
Normal file
|
@ -0,0 +1,276 @@
|
|||
/* $NetBSD: files.c,v 1.41 2009/11/06 18:34:22 joerg Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Ben Harris and Jaromir Dolecek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sort.h"
|
||||
#include "fsort.h"
|
||||
|
||||
__RCSID("$NetBSD: files.c,v 1.41 2009/11/06 18:34:22 joerg Exp $");
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/* Align records in temporary files to avoid misaligned copies */
|
||||
#define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1))
|
||||
|
||||
static ssize_t seq(FILE *, u_char **);
|
||||
|
||||
/*
|
||||
* this is called when there is no special key. It's only called
|
||||
* in the first fsort pass.
|
||||
*/
|
||||
|
||||
static u_char *opos;
|
||||
static size_t osz;
|
||||
|
||||
void
|
||||
makeline_copydown(RECHEADER *recbuf)
|
||||
{
|
||||
memmove(recbuf->data, opos, osz);
|
||||
}
|
||||
|
||||
int
|
||||
makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
|
||||
{
|
||||
u_char *pos;
|
||||
int c;
|
||||
|
||||
pos = recbuf->data;
|
||||
if (osz != 0) {
|
||||
/*
|
||||
* Buffer shortage is solved by either of two ways:
|
||||
* o flush previous buffered data and start using the
|
||||
* buffer from start.
|
||||
* makeline_copydown() above must be called.
|
||||
* o realloc buffer
|
||||
*
|
||||
* This code has relied on realloc changing 'bufend',
|
||||
* but that isn't necessarily true.
|
||||
*/
|
||||
pos += osz;
|
||||
osz = 0;
|
||||
}
|
||||
|
||||
while (pos < bufend) {
|
||||
c = getc(fp);
|
||||
if (c == EOF) {
|
||||
if (pos == recbuf->data) {
|
||||
FCLOSE(fp);
|
||||
return EOF;
|
||||
}
|
||||
/* Add terminator to partial line */
|
||||
c = REC_D;
|
||||
}
|
||||
*pos++ = c;
|
||||
if (c == REC_D) {
|
||||
recbuf->offset = 0;
|
||||
recbuf->length = pos - recbuf->data;
|
||||
recbuf->keylen = recbuf->length - 1;
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Ran out of buffer space... */
|
||||
if (recbuf->data < bufend) {
|
||||
/* Remember where the partial record is */
|
||||
osz = pos - recbuf->data;
|
||||
opos = recbuf->data;
|
||||
}
|
||||
return (BUFFEND);
|
||||
}
|
||||
|
||||
/*
|
||||
* This generates keys. It's only called in the first fsort pass
|
||||
*/
|
||||
int
|
||||
makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
|
||||
{
|
||||
static u_char *line_data;
|
||||
static ssize_t line_size;
|
||||
static int overflow = 0;
|
||||
|
||||
/* We get re-entered after returning BUFFEND - save old data */
|
||||
if (overflow) {
|
||||
overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
|
||||
return overflow ? BUFFEND : 0;
|
||||
}
|
||||
|
||||
line_size = seq(fp, &line_data);
|
||||
if (line_size == 0) {
|
||||
FCLOSE(fp);
|
||||
return EOF;
|
||||
}
|
||||
|
||||
if (line_size > bufend - recbuf->data) {
|
||||
overflow = 1;
|
||||
} else {
|
||||
overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
|
||||
}
|
||||
return overflow ? BUFFEND : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* get a line of input from fp
|
||||
*/
|
||||
static ssize_t
|
||||
seq(FILE *fp, u_char **line)
|
||||
{
|
||||
static u_char *buf;
|
||||
static size_t buf_size = DEFLLEN;
|
||||
u_char *end, *pos;
|
||||
int c;
|
||||
u_char *new_buf;
|
||||
|
||||
if (!buf) {
|
||||
/* one-time initialization */
|
||||
buf = malloc(buf_size);
|
||||
if (!buf)
|
||||
err(2, "malloc of linebuf for %zu bytes failed",
|
||||
buf_size);
|
||||
}
|
||||
|
||||
end = buf + buf_size;
|
||||
pos = buf;
|
||||
while ((c = getc(fp)) != EOF) {
|
||||
*pos++ = c;
|
||||
if (c == REC_D) {
|
||||
*line = buf;
|
||||
return pos - buf;
|
||||
}
|
||||
if (pos == end) {
|
||||
/* Long line - double size of buffer */
|
||||
/* XXX: Check here for stupidly long lines */
|
||||
buf_size *= 2;
|
||||
new_buf = realloc(buf, buf_size);
|
||||
if (!new_buf)
|
||||
err(2, "realloc of linebuf to %zu bytes failed",
|
||||
buf_size);
|
||||
|
||||
end = new_buf + buf_size;
|
||||
pos = new_buf + (pos - buf);
|
||||
buf = new_buf;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos != buf) {
|
||||
/* EOF part way through line - add line terminator */
|
||||
*pos++ = REC_D;
|
||||
*line = buf;
|
||||
return pos - buf;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* write a key/line pair to a temporary file
|
||||
*/
|
||||
void
|
||||
putrec(const RECHEADER *rec, FILE *fp)
|
||||
{
|
||||
EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp);
|
||||
}
|
||||
|
||||
/*
|
||||
* write a line to output
|
||||
*/
|
||||
void
|
||||
putline(const RECHEADER *rec, FILE *fp)
|
||||
{
|
||||
EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
|
||||
}
|
||||
|
||||
/*
|
||||
* write dump of key to output (for -Dk)
|
||||
*/
|
||||
void
|
||||
putkeydump(const RECHEADER *rec, FILE *fp)
|
||||
{
|
||||
EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp);
|
||||
}
|
||||
|
||||
/*
|
||||
* get a record from a temporary file. (Used by merge sort.)
|
||||
*/
|
||||
int
|
||||
geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2)
|
||||
{
|
||||
length_t file_len;
|
||||
int i;
|
||||
|
||||
(void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]);
|
||||
|
||||
if ((u_char *)(rec + 1) > end)
|
||||
return (BUFFEND);
|
||||
if (!fread(&rec->length, 1, sizeof rec->length, fp)) {
|
||||
fclose(fp);
|
||||
return (EOF);
|
||||
}
|
||||
file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length);
|
||||
if (end - rec->data < (ptrdiff_t)file_len) {
|
||||
for (i = sizeof rec->length - 1; i >= 0; i--)
|
||||
ungetc(*((char *) rec + i), fp);
|
||||
return (BUFFEND);
|
||||
}
|
||||
|
||||
fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp);
|
||||
return (0);
|
||||
}
|
214
usr.bin/sort/fsort.c
Normal file
214
usr.bin/sort/fsort.c
Normal file
|
@ -0,0 +1,214 @@
|
|||
/* $NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Ben Harris and Jaromir Dolecek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Read in a block of records (until 'enough').
|
||||
* sort, write to temp file.
|
||||
* Merge sort temp files into output file
|
||||
* Small files miss out the temp file stage.
|
||||
* Large files might get multiple merges.
|
||||
*/
|
||||
#include "sort.h"
|
||||
#include "fsort.h"
|
||||
|
||||
__RCSID("$NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $");
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
|
||||
|
||||
void
|
||||
fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
|
||||
{
|
||||
RECHEADER **keylist;
|
||||
RECHEADER **keypos, **keyp;
|
||||
RECHEADER *buffer;
|
||||
size_t bufsize = DEFBUFSIZE;
|
||||
u_char *bufend;
|
||||
int mfct = 0;
|
||||
int c, nelem;
|
||||
get_func_t get;
|
||||
RECHEADER *crec;
|
||||
RECHEADER *nbuffer;
|
||||
FILE *fp, *tmp_fp;
|
||||
int file_no;
|
||||
int max_recs = DEBUG('m') ? 16 : MAXNUM;
|
||||
|
||||
buffer = allocrec(NULL, bufsize);
|
||||
bufend = (u_char *)buffer + bufsize;
|
||||
/* Allocate double length keymap for radix_sort */
|
||||
keylist = malloc(2 * max_recs * sizeof(*keylist));
|
||||
if (buffer == NULL || keylist == NULL)
|
||||
err(2, "failed to malloc initial buffer or keylist");
|
||||
|
||||
if (SINGL_FLD)
|
||||
/* Key and data are one! */
|
||||
get = makeline;
|
||||
else
|
||||
/* Key (merged key fields) added before data */
|
||||
get = makekey;
|
||||
|
||||
file_no = 0;
|
||||
#if defined(__minix)
|
||||
/* LSC FIXME: Not very pretty, but reduce the diff */
|
||||
#include "pathnames.h"
|
||||
if (!strcmp(filelist->names[0], _PATH_STDIN))
|
||||
fp = stdin;
|
||||
else
|
||||
#endif /* defined(__minix) */
|
||||
fp = fopen(filelist->names[0], "r");
|
||||
if (fp == NULL)
|
||||
err(2, "%s", filelist->names[0]);
|
||||
|
||||
/* Loop through reads of chunk of input files that get sorted
|
||||
* and then merged together. */
|
||||
for (;;) {
|
||||
keypos = keylist;
|
||||
nelem = 0;
|
||||
crec = buffer;
|
||||
makeline_copydown(crec);
|
||||
|
||||
/* Loop reading records */
|
||||
for (;;) {
|
||||
c = get(fp, crec, bufend, ftbl);
|
||||
/* 'c' is 0, EOF or BUFFEND */
|
||||
if (c == 0) {
|
||||
/* Save start of key in input buffer */
|
||||
*keypos++ = crec;
|
||||
if (++nelem == max_recs) {
|
||||
c = BUFFEND;
|
||||
break;
|
||||
}
|
||||
crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
|
||||
continue;
|
||||
}
|
||||
if (c == EOF) {
|
||||
/* try next file */
|
||||
if (++file_no >= nfiles)
|
||||
/* no more files */
|
||||
break;
|
||||
#if defined(__minix)
|
||||
if (!strcmp(filelist->names[0], _PATH_STDIN))
|
||||
fp = stdin;
|
||||
else
|
||||
#endif /* defined(__minix) */
|
||||
fp = fopen(filelist->names[file_no], "r");
|
||||
if (fp == NULL)
|
||||
err(2, "%s", filelist->names[file_no]);
|
||||
continue;
|
||||
}
|
||||
if (nelem >= max_recs
|
||||
|| (bufsize >= MAXBUFSIZE && nelem > 8))
|
||||
/* Need to sort and save this lot of data */
|
||||
break;
|
||||
|
||||
/* c == BUFFEND, and we can process more data */
|
||||
/* Allocate a larger buffer for this lot of data */
|
||||
bufsize *= 2;
|
||||
nbuffer = allocrec(buffer, bufsize);
|
||||
if (!nbuffer) {
|
||||
err(2, "failed to realloc buffer to %zu bytes",
|
||||
bufsize);
|
||||
}
|
||||
|
||||
/* patch up keylist[] */
|
||||
for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
|
||||
*keyp = nbuffer + (*keyp - buffer);
|
||||
|
||||
crec = nbuffer + (crec - buffer);
|
||||
buffer = nbuffer;
|
||||
bufend = (u_char *)buffer + bufsize;
|
||||
}
|
||||
|
||||
/* Sort this set of records */
|
||||
radix_sort(keylist, keylist + max_recs, nelem);
|
||||
|
||||
if (c == EOF && mfct == 0) {
|
||||
/* all the data is (sorted) in the buffer */
|
||||
append(keylist, nelem, outfp,
|
||||
DEBUG('k') ? putkeydump : putline);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Save current data to a temporary file for a later merge */
|
||||
if (nelem != 0) {
|
||||
tmp_fp = ftmp();
|
||||
append(keylist, nelem, tmp_fp, putrec);
|
||||
save_for_merge(tmp_fp, geteasy, ftbl);
|
||||
}
|
||||
mfct = 1;
|
||||
|
||||
if (c == EOF) {
|
||||
/* merge to output file */
|
||||
merge_sort(outfp,
|
||||
DEBUG('k') ? putkeydump : putline, ftbl);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free(keylist);
|
||||
keylist = NULL;
|
||||
free(buffer);
|
||||
buffer = NULL;
|
||||
}
|
78
usr.bin/sort/fsort.h
Normal file
78
usr.bin/sort/fsort.h
Normal file
|
@ -0,0 +1,78 @@
|
|||
/* $NetBSD: fsort.h,v 1.17 2009/09/26 21:16:55 dsl Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Ben Harris and Jaromir Dolecek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)fsort.h 8.1 (Berkeley) 6/6/93
|
||||
*/
|
||||
|
||||
#define BUFSIZE (1<<20)
|
||||
#define MAXNUM 131072 /* low guess at average record count */
|
||||
#define BUFFEND (EOF-2)
|
||||
#define MAXFCT 1000
|
||||
#define DEFLLEN 65536
|
||||
|
||||
/*
|
||||
* Default (initial) and maximum size of record buffer for fsort().
|
||||
* Note that no more than MAXNUM records are stored in the buffer,
|
||||
* even if the buffer is not full yet.
|
||||
*/
|
||||
#define DEFBUFSIZE (1 << 20) /* 1MB */
|
||||
#define MAXBUFSIZE (8 << 20) /* 10 MB */
|
448
usr.bin/sort/init.c
Normal file
448
usr.bin/sort/init.c
Normal file
|
@ -0,0 +1,448 @@
|
|||
/* $NetBSD: init.c,v 1.28 2010/12/18 23:09:48 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Ben Harris and Jaromir Dolecek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sort.h"
|
||||
|
||||
__RCSID("$NetBSD: init.c,v 1.28 2010/12/18 23:09:48 christos Exp $");
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
static void insertcol(struct field *);
|
||||
static const char *setcolumn(const char *, struct field *);
|
||||
|
||||
/*
|
||||
* masks of ignored characters.
|
||||
*/
|
||||
static u_char dtable[NBINS], itable[NBINS];
|
||||
|
||||
/*
|
||||
* parsed key options
|
||||
*/
|
||||
struct coldesc *clist = NULL;
|
||||
int ncols = 0;
|
||||
|
||||
/*
|
||||
* clist (list of columns which correspond to one or more icol or tcol)
|
||||
* is in increasing order of columns.
|
||||
* Fields are kept in increasing order of fields.
|
||||
*/
|
||||
|
||||
/*
|
||||
* keep clist in order--inserts a column in a sorted array
|
||||
*/
|
||||
static void
|
||||
insertcol(struct field *field)
|
||||
{
|
||||
int i;
|
||||
struct coldesc *p;
|
||||
|
||||
/* Make space for new item */
|
||||
p = realloc(clist, (ncols + 2) * sizeof(*clist));
|
||||
if (!p)
|
||||
err(1, "realloc");
|
||||
clist = p;
|
||||
memset(&clist[ncols], 0, sizeof(clist[ncols]));
|
||||
|
||||
for (i = 0; i < ncols; i++)
|
||||
if (field->icol.num <= clist[i].num)
|
||||
break;
|
||||
if (field->icol.num != clist[i].num) {
|
||||
memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
|
||||
clist[i].num = field->icol.num;
|
||||
ncols++;
|
||||
}
|
||||
if (field->tcol.num && field->tcol.num != field->icol.num) {
|
||||
for (i = 0; i < ncols; i++)
|
||||
if (field->tcol.num <= clist[i].num)
|
||||
break;
|
||||
if (field->tcol.num != clist[i].num) {
|
||||
memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
|
||||
clist[i].num = field->tcol.num;
|
||||
ncols++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* matches fields with the appropriate columns--n^2 but who cares?
|
||||
*/
|
||||
void
|
||||
fldreset(struct field *fldtab)
|
||||
{
|
||||
int i;
|
||||
|
||||
fldtab[0].tcol.p = clist + ncols - 1;
|
||||
for (++fldtab; fldtab->icol.num; ++fldtab) {
|
||||
for (i = 0; fldtab->icol.num != clist[i].num; i++)
|
||||
;
|
||||
fldtab->icol.p = clist + i;
|
||||
if (!fldtab->tcol.num)
|
||||
continue;
|
||||
for (i = 0; fldtab->tcol.num != clist[i].num; i++)
|
||||
;
|
||||
fldtab->tcol.p = clist + i;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* interprets a column in a -k field
|
||||
*/
|
||||
static const char *
|
||||
setcolumn(const char *pos, struct field *cur_fld)
|
||||
{
|
||||
struct column *col;
|
||||
char *npos;
|
||||
int tmp;
|
||||
col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
|
||||
col->num = (int) strtol(pos, &npos, 10);
|
||||
pos = npos;
|
||||
if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
|
||||
errx(2, "field numbers must be positive");
|
||||
if (*pos == '.') {
|
||||
if (!col->num)
|
||||
errx(2, "cannot indent end of line");
|
||||
++pos;
|
||||
col->indent = (int) strtol(pos, &npos, 10);
|
||||
pos = npos;
|
||||
if (&cur_fld->icol == col)
|
||||
col->indent--;
|
||||
if (col->indent < 0)
|
||||
errx(2, "illegal offset");
|
||||
}
|
||||
for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
|
||||
cur_fld->flags |= tmp;
|
||||
if (cur_fld->icol.num == 0)
|
||||
cur_fld->icol.num = 1;
|
||||
return (pos);
|
||||
}
|
||||
|
||||
int
|
||||
setfield(const char *pos, struct field *cur_fld, int gflag)
|
||||
{
|
||||
cur_fld->mask = NULL;
|
||||
|
||||
pos = setcolumn(pos, cur_fld);
|
||||
if (*pos == '\0') /* key extends to EOL. */
|
||||
cur_fld->tcol.num = 0;
|
||||
else {
|
||||
if (*pos != ',')
|
||||
errx(2, "illegal field descriptor");
|
||||
setcolumn((++pos), cur_fld);
|
||||
}
|
||||
if (!cur_fld->flags)
|
||||
cur_fld->flags = gflag;
|
||||
if (REVERSE)
|
||||
/* A local 'r' doesn't invert the global one */
|
||||
cur_fld->flags &= ~R;
|
||||
|
||||
/* Assign appropriate mask table and weight table. */
|
||||
cur_fld->weights = weight_tables[cur_fld->flags & (R | F)];
|
||||
if (cur_fld->flags & I)
|
||||
cur_fld->mask = itable;
|
||||
else if (cur_fld->flags & D)
|
||||
cur_fld->mask = dtable;
|
||||
|
||||
cur_fld->flags |= (gflag & (BI | BT));
|
||||
if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
|
||||
cur_fld->flags &= ~BT;
|
||||
|
||||
if (cur_fld->tcol.num
|
||||
&& !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
|
||||
&& (cur_fld->tcol.num <= cur_fld->icol.num
|
||||
/* indent if 0 -> end of field, i.e. okay */
|
||||
&& cur_fld->tcol.indent != 0
|
||||
&& cur_fld->tcol.indent < cur_fld->icol.indent))
|
||||
errx(2, "fields out of order");
|
||||
|
||||
insertcol(cur_fld);
|
||||
return (cur_fld->tcol.num);
|
||||
}
|
||||
|
||||
int
|
||||
optval(int desc, int tcolflag)
|
||||
{
|
||||
switch(desc) {
|
||||
case 'b':
|
||||
if (!tcolflag)
|
||||
return BI;
|
||||
else
|
||||
return BT;
|
||||
case 'd': return D;
|
||||
case 'f': return F;
|
||||
case 'i': return I;
|
||||
case 'l': return L;
|
||||
case 'n': return N;
|
||||
case 'r': return R;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the options found in ARG, according to the getopt
|
||||
* spec in OPTS, require an additional argv word as an option
|
||||
* argument.
|
||||
*/
|
||||
static int
|
||||
options_need_argument(const char *arg, const char *opts)
|
||||
{
|
||||
size_t pos;
|
||||
const char *s;
|
||||
|
||||
/*assert(arg[0] == '-');*/
|
||||
|
||||
pos = 1;
|
||||
while (arg[pos]) {
|
||||
s = strchr(opts, arg[pos]);
|
||||
if (s == NULL) {
|
||||
/* invalid option */
|
||||
return 0;
|
||||
}
|
||||
if (s[1] == ':') {
|
||||
/* option requires argument */
|
||||
if (arg[pos+1] == '\0') {
|
||||
/* no argument in this arg */
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
/* argument is in this arg; no more options */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace historic +SPEC arguments with appropriate -kSPEC.
|
||||
*
|
||||
* The form can be either a single +SPEC or a pair +SPEC -SPEC.
|
||||
* The following -SPEC is not recognized unless it follows
|
||||
* immediately.
|
||||
*/
|
||||
void
|
||||
fixit(int *argc, char **argv, const char *opts)
|
||||
{
|
||||
int i, j, sawplus;
|
||||
char *vpos, *tpos, spec[20];
|
||||
int col, indent;
|
||||
size_t sz;
|
||||
|
||||
sawplus = 0;
|
||||
for (i = 1; i < *argc; i++) {
|
||||
/*
|
||||
* This loop must stop exactly where getopt will stop.
|
||||
* Otherwise it turns e.g. "sort x +3" into "sort x
|
||||
* -k4.1", which will croak if +3 was in fact really a
|
||||
* file name. In order to do this reliably we need to
|
||||
* be able to identify argv words that are option
|
||||
* arguments.
|
||||
*/
|
||||
|
||||
if (!strcmp(argv[i], "--")) {
|
||||
/* End of options; stop. */
|
||||
break;
|
||||
}
|
||||
|
||||
if (argv[i][0] == '+') {
|
||||
/* +POS argument */
|
||||
sawplus = 1;
|
||||
} else if (argv[i][0] == '-' && sawplus &&
|
||||
isdigit((unsigned char)argv[i][1])) {
|
||||
/* -POS argument */
|
||||
sawplus = 0;
|
||||
} else if (argv[i][0] == '-') {
|
||||
/* other option */
|
||||
sawplus = 0;
|
||||
if (options_need_argument(argv[i], opts)) {
|
||||
/* skip over the argument */
|
||||
i++;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
/* not an option at all; stop */
|
||||
sawplus = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point argv[i] is an old-style spec. The
|
||||
* sawplus flag used by the above loop logic also
|
||||
* tells us if it's a +SPEC or -SPEC.
|
||||
*/
|
||||
|
||||
/* parse spec */
|
||||
tpos = argv[i]+1;
|
||||
col = (int)strtol(tpos, &tpos, 10);
|
||||
if (*tpos == '.') {
|
||||
++tpos;
|
||||
indent = (int) strtol(tpos, &tpos, 10);
|
||||
} else
|
||||
indent = 0;
|
||||
/* tpos now points to the optional flags */
|
||||
|
||||
/*
|
||||
* In the traditional form, x.0 means beginning of line;
|
||||
* in the new form, x.0 means end of line. Adjust the
|
||||
* value of INDENT accordingly.
|
||||
*/
|
||||
if (sawplus) {
|
||||
/* +POS */
|
||||
col += 1;
|
||||
indent += 1;
|
||||
} else {
|
||||
/* -POS */
|
||||
if (indent > 0)
|
||||
col += 1;
|
||||
}
|
||||
|
||||
/* make the new style spec */
|
||||
sz = snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
|
||||
tpos);
|
||||
|
||||
if (sawplus) {
|
||||
/* Replace the +POS argument with new-style -kSPEC */
|
||||
asprintf(&vpos, "-k%s", spec);
|
||||
argv[i] = vpos;
|
||||
} else {
|
||||
/*
|
||||
* Append the spec to the one from the
|
||||
* preceding +POS argument, and remove the
|
||||
* current argv element entirely.
|
||||
*/
|
||||
asprintf(&vpos, "%s,%s", argv[i-1], spec);
|
||||
free(argv[i-1]);
|
||||
argv[i-1] = vpos;
|
||||
for (j=i; j < *argc; j++)
|
||||
argv[j] = argv[j+1];
|
||||
*argc -= 1;
|
||||
i--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ascii, Rascii, Ftable, and RFtable map
|
||||
*
|
||||
* Sorting 'weight' tables.
|
||||
* Convert 'ascii' characters into their sort order.
|
||||
* The 'F' variants fold lower case to upper equivalent
|
||||
* The 'R' variants are for reverse sorting.
|
||||
*
|
||||
* The record separator (REC_D) never needs a weight, this frees one
|
||||
* byte value as an 'end of key' marker. This must be 0 for normal
|
||||
* weight tables, and 0xff for reverse weight tables - and is used
|
||||
* to terminate keys so that short keys sort before (after if reverse)
|
||||
* longer keys.
|
||||
*
|
||||
* The field separator has a normal weight - although it cannot occur
|
||||
* within a key unless it is the default (space+tab).
|
||||
*
|
||||
* All other bytes map to the appropriate value for the sort order.
|
||||
* Numeric sorts don't need any tables, they are reversed by negation.
|
||||
*
|
||||
* Global reverse sorts are done by writing the sorted keys in reverse
|
||||
* order - the sort itself is stil forwards.
|
||||
* This means that weights are only ever used when generating keys, any
|
||||
* sort of the original data bytes is always forwards and unweighted.
|
||||
*
|
||||
* Note: this is only good for ASCII sorting. For different LC 's,
|
||||
* all bets are off.
|
||||
*
|
||||
* itable[] and dtable[] are the masks for -i (ignore non-printables)
|
||||
* and -d (only sort blank and alphanumerics).
|
||||
*/
|
||||
void
|
||||
settables(void)
|
||||
{
|
||||
int i;
|
||||
int next_weight = 1;
|
||||
int rev_weight = 254;
|
||||
|
||||
ascii[REC_D] = 0;
|
||||
Rascii[REC_D] = 255;
|
||||
Ftable[REC_D] = 0;
|
||||
RFtable[REC_D] = 255;
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
if (i == REC_D)
|
||||
continue;
|
||||
ascii[i] = next_weight;
|
||||
Rascii[i] = rev_weight;
|
||||
if (Ftable[i] == 0) {
|
||||
Ftable[i] = next_weight;
|
||||
RFtable[i] = rev_weight;
|
||||
Ftable[tolower(i)] = next_weight;
|
||||
RFtable[tolower(i)] = rev_weight;
|
||||
}
|
||||
next_weight++;
|
||||
rev_weight--;
|
||||
|
||||
if (i == '\n' || isprint(i))
|
||||
itable[i] = 1;
|
||||
|
||||
if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
|
||||
dtable[i] = 1;
|
||||
}
|
||||
}
|
439
usr.bin/sort/msort.c
Normal file
439
usr.bin/sort/msort.c
Normal file
|
@ -0,0 +1,439 @@
|
|||
/* $NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Ben Harris and Jaromir Dolecek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sort.h"
|
||||
#include "fsort.h"
|
||||
|
||||
__RCSID("$NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $");
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <util.h>
|
||||
|
||||
/* Subroutines using comparisons: merge sort and check order */
|
||||
#define DELETE (1)
|
||||
|
||||
typedef struct mfile {
|
||||
FILE *fp;
|
||||
get_func_t get;
|
||||
RECHEADER *rec;
|
||||
u_char *end;
|
||||
} MFILE;
|
||||
|
||||
static int cmp(RECHEADER *, RECHEADER *);
|
||||
static int insert(struct mfile **, struct mfile *, int, int);
|
||||
static void merge_sort_fstack(FILE *, put_func_t, struct field *);
|
||||
|
||||
/*
|
||||
* Number of files merge() can merge in one pass.
|
||||
*/
|
||||
#define MERGE_FNUM 16
|
||||
|
||||
static struct mfile fstack[MERGE_FNUM];
|
||||
static struct mfile fstack_1[MERGE_FNUM];
|
||||
static struct mfile fstack_2[MERGE_FNUM];
|
||||
static int fstack_count, fstack_1_count, fstack_2_count;
|
||||
|
||||
void
|
||||
save_for_merge(FILE *fp, get_func_t get, struct field *ftbl)
|
||||
{
|
||||
FILE *mfp, *mfp1, *mfp2;
|
||||
|
||||
if (fstack_count == MERGE_FNUM) {
|
||||
/* Must reduce the number of temporary files */
|
||||
mfp = ftmp();
|
||||
merge_sort_fstack(mfp, putrec, ftbl);
|
||||
/* Save output in next layer */
|
||||
if (fstack_1_count == MERGE_FNUM) {
|
||||
mfp1 = ftmp();
|
||||
memcpy(fstack, fstack_1, sizeof fstack);
|
||||
merge_sort_fstack(mfp1, putrec, ftbl);
|
||||
if (fstack_2_count == MERGE_FNUM) {
|
||||
/* More than 4096 files! */
|
||||
mfp2 = ftmp();
|
||||
memcpy(fstack, fstack_2, sizeof fstack);
|
||||
merge_sort_fstack(mfp2, putrec, ftbl);
|
||||
fstack_2[0].fp = mfp2;
|
||||
fstack_2_count = 1;
|
||||
}
|
||||
fstack_2[fstack_2_count].fp = mfp1;
|
||||
fstack_2[fstack_2_count].get = geteasy;
|
||||
fstack_2_count++;
|
||||
fstack_1_count = 0;
|
||||
}
|
||||
fstack_1[fstack_1_count].fp = mfp;
|
||||
fstack_1[fstack_1_count].get = geteasy;
|
||||
fstack_1_count++;
|
||||
fstack_count = 0;
|
||||
}
|
||||
|
||||
fstack[fstack_count].fp = fp;
|
||||
fstack[fstack_count++].get = get;
|
||||
}
|
||||
|
||||
void
|
||||
fmerge(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
|
||||
{
|
||||
get_func_t get = SINGL_FLD ? makeline : makekey;
|
||||
FILE *fp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nfiles; i++) {
|
||||
#if defined(__minix)
|
||||
/* LSC FIXME: Not very pretty, but reduce the diff */
|
||||
#include "pathnames.h"
|
||||
if (!strcmp(filelist->names[0], _PATH_STDIN))
|
||||
fp = stdin;
|
||||
else
|
||||
#endif /* defined(__minix) */
|
||||
fp = fopen(filelist->names[i], "r");
|
||||
if (fp == NULL)
|
||||
err(2, "%s", filelist->names[i]);
|
||||
save_for_merge(fp, get, ftbl);
|
||||
}
|
||||
|
||||
merge_sort(outfp, putline, ftbl);
|
||||
}
|
||||
|
||||
void
|
||||
merge_sort(FILE *outfp, put_func_t put, struct field *ftbl)
|
||||
{
|
||||
int count = fstack_1_count + fstack_2_count;
|
||||
FILE *mfp;
|
||||
int i;
|
||||
|
||||
if (count == 0) {
|
||||
/* All files in initial array */
|
||||
merge_sort_fstack(outfp, put, ftbl);
|
||||
return;
|
||||
}
|
||||
|
||||
count += fstack_count;
|
||||
|
||||
/* Too many files for one merge sort */
|
||||
for (;;) {
|
||||
/* Sort latest 16 files */
|
||||
i = count;
|
||||
if (i > MERGE_FNUM)
|
||||
i = MERGE_FNUM;
|
||||
while (fstack_count > 0)
|
||||
fstack[--i] = fstack[--fstack_count];
|
||||
while (i > 0 && fstack_1_count > 0)
|
||||
fstack[--i] = fstack_1[--fstack_1_count];
|
||||
while (i > 0)
|
||||
fstack[--i] = fstack_2[--fstack_2_count];
|
||||
if (count <= MERGE_FNUM) {
|
||||
/* Got all the data */
|
||||
fstack_count = count;
|
||||
merge_sort_fstack(outfp, put, ftbl);
|
||||
return;
|
||||
}
|
||||
mfp = ftmp();
|
||||
fstack_count = count > MERGE_FNUM ? MERGE_FNUM : count;
|
||||
merge_sort_fstack(mfp, putrec, ftbl);
|
||||
fstack[0].fp = mfp;
|
||||
fstack[0].get = geteasy;
|
||||
fstack_count = 1;
|
||||
count -= MERGE_FNUM - 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
merge_sort_fstack(FILE *outfp, put_func_t put, struct field *ftbl)
|
||||
{
|
||||
struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
|
||||
RECHEADER *new_rec;
|
||||
u_char *new_end;
|
||||
void *tmp;
|
||||
int c, i, nfiles;
|
||||
size_t sz;
|
||||
|
||||
/* Read one record from each file (read again if a duplicate) */
|
||||
for (nfiles = i = 0; i < fstack_count; i++) {
|
||||
cfile = &fstack[i];
|
||||
if (cfile->rec == NULL) {
|
||||
cfile->rec = allocrec(NULL, DEFLLEN);
|
||||
cfile->end = (u_char *)cfile->rec + DEFLLEN;
|
||||
}
|
||||
rewind(cfile->fp);
|
||||
|
||||
for (;;) {
|
||||
c = cfile->get(cfile->fp, cfile->rec, cfile->end, ftbl);
|
||||
if (c == EOF)
|
||||
break;
|
||||
|
||||
if (c == BUFFEND) {
|
||||
/* Double buffer size */
|
||||
sz = (cfile->end - (u_char *)cfile->rec) * 2;
|
||||
cfile->rec = allocrec(cfile->rec, sz);
|
||||
cfile->end = (u_char *)cfile->rec + sz;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nfiles != 0) {
|
||||
if (insert(flist, cfile, nfiles, !DELETE))
|
||||
/* Duplicate removed */
|
||||
continue;
|
||||
} else
|
||||
flist[0] = cfile;
|
||||
nfiles++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (nfiles == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We now loop reading a new record from the file with the
|
||||
* 'sorted first' existing record.
|
||||
* As each record is added, the 'first' record is written to the
|
||||
* output file - maintaining one record from each file in the sorted
|
||||
* list.
|
||||
*/
|
||||
new_rec = allocrec(NULL, DEFLLEN);
|
||||
new_end = (u_char *)new_rec + DEFLLEN;
|
||||
for (;;) {
|
||||
cfile = flist[0];
|
||||
c = cfile->get(cfile->fp, new_rec, new_end, ftbl);
|
||||
if (c == EOF) {
|
||||
/* Write out last record from now-empty input */
|
||||
put(cfile->rec, outfp);
|
||||
if (--nfiles == 0)
|
||||
break;
|
||||
/* Replace from file with now-first sorted record. */
|
||||
/* (Moving base 'flist' saves copying everything!) */
|
||||
flist++;
|
||||
continue;
|
||||
}
|
||||
if (c == BUFFEND) {
|
||||
/* Buffer not large enough - double in size */
|
||||
sz = (new_end - (u_char *)new_rec) * 2;
|
||||
new_rec = allocrec(new_rec, sz);
|
||||
new_end = (u_char *)new_rec +sz;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Swap in new buffer, saving old */
|
||||
tmp = cfile->rec;
|
||||
cfile->rec = new_rec;
|
||||
new_rec = tmp;
|
||||
tmp = cfile->end;
|
||||
cfile->end = new_end;
|
||||
new_end = tmp;
|
||||
|
||||
/* Add into sort, removing the original first entry */
|
||||
c = insert(flist, cfile, nfiles, DELETE);
|
||||
if (c != 0 || (UNIQUE && cfile == flist[0]
|
||||
&& cmp(new_rec, cfile->rec) == 0)) {
|
||||
/* Was an unwanted duplicate, restore buffer */
|
||||
tmp = cfile->rec;
|
||||
cfile->rec = new_rec;
|
||||
new_rec = tmp;
|
||||
tmp = cfile->end;
|
||||
cfile->end = new_end;
|
||||
new_end = tmp;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Write out 'old' record */
|
||||
put(new_rec, outfp);
|
||||
}
|
||||
|
||||
free(new_rec);
|
||||
}
|
||||
|
||||
/*
|
||||
* if delete: inserts rec in flist, deletes flist[0];
|
||||
* otherwise just inserts *rec in flist.
|
||||
* Returns 1 if record is a duplicate to be ignored.
|
||||
*/
|
||||
static int
|
||||
insert(struct mfile **flist, struct mfile *rec, int ttop, int delete)
|
||||
{
|
||||
int mid, top = ttop, bot = 0, cmpv = 1;
|
||||
|
||||
for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) {
|
||||
cmpv = cmp(rec->rec, flist[mid]->rec);
|
||||
if (cmpv == 0 ) {
|
||||
if (UNIQUE)
|
||||
/* Duplicate key, read another record */
|
||||
/* NB: This doesn't guarantee to keep any
|
||||
* particular record. */
|
||||
return 1;
|
||||
/*
|
||||
* Apply sort by input file order.
|
||||
* We could truncate the sort is the fileno are
|
||||
* adjacent - but that is all too hard!
|
||||
* The fileno cannot be equal, since we only have one
|
||||
* record from each file (+ flist[0] which never
|
||||
* comes here).
|
||||
*/
|
||||
cmpv = rec < flist[mid] ? -1 : 1;
|
||||
if (REVERSE)
|
||||
cmpv = -cmpv;
|
||||
}
|
||||
if (cmpv < 0)
|
||||
top = mid;
|
||||
else
|
||||
bot = mid;
|
||||
}
|
||||
|
||||
/* At this point we haven't yet compared against flist[0] */
|
||||
|
||||
if (delete) {
|
||||
/* flist[0] is ourselves, only the caller knows the old data */
|
||||
if (bot != 0) {
|
||||
memmove(flist, flist + 1, bot * sizeof(MFILE *));
|
||||
flist[bot] = rec;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Inserting original set of records */
|
||||
|
||||
if (bot == 0 && cmpv != 0) {
|
||||
/* Doesn't match flist[1], must compare with flist[0] */
|
||||
cmpv = cmp(rec->rec, flist[0]->rec);
|
||||
if (cmpv == 0 && UNIQUE)
|
||||
return 1;
|
||||
/* Add matching keys in file order (ie new is later) */
|
||||
if (cmpv < 0)
|
||||
bot = -1;
|
||||
}
|
||||
bot++;
|
||||
memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE *));
|
||||
flist[bot] = rec;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* check order on one file
|
||||
*/
|
||||
void
|
||||
order(struct filelist *filelist, struct field *ftbl)
|
||||
{
|
||||
get_func_t get = SINGL_FLD ? makeline : makekey;
|
||||
RECHEADER *crec, *prec, *trec;
|
||||
u_char *crec_end, *prec_end, *trec_end;
|
||||
FILE *fp;
|
||||
int c;
|
||||
|
||||
#if defined(__minix)
|
||||
if (!strcmp(filelist->names[0], _PATH_STDIN))
|
||||
fp = stdin;
|
||||
else
|
||||
#endif /* defined(__minix) */
|
||||
fp = fopen(filelist->names[0], "r");
|
||||
if (fp == NULL)
|
||||
err(2, "%s", filelist->names[0]);
|
||||
|
||||
crec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
|
||||
crec_end = crec->data + DEFLLEN;
|
||||
prec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
|
||||
prec_end = prec->data + DEFLLEN;
|
||||
|
||||
/* XXX this does exit(0) for overlong lines */
|
||||
if (get(fp, prec, prec_end, ftbl) != 0)
|
||||
exit(0);
|
||||
while (get(fp, crec, crec_end, ftbl) == 0) {
|
||||
if (0 < (c = cmp(prec, crec))) {
|
||||
crec->data[crec->length-1] = 0;
|
||||
errx(1, "found disorder: %s", crec->data+crec->offset);
|
||||
}
|
||||
if (UNIQUE && !c) {
|
||||
crec->data[crec->length-1] = 0;
|
||||
errx(1, "found non-uniqueness: %s",
|
||||
crec->data+crec->offset);
|
||||
}
|
||||
/*
|
||||
* Swap pointers so that this record is on place pointed
|
||||
* to by prec and new record is read to place pointed to by
|
||||
* crec.
|
||||
*/
|
||||
trec = prec;
|
||||
prec = crec;
|
||||
crec = trec;
|
||||
trec_end = prec_end;
|
||||
prec_end = crec_end;
|
||||
crec_end = trec_end;
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static int
|
||||
cmp(RECHEADER *rec1, RECHEADER *rec2)
|
||||
{
|
||||
int len;
|
||||
int r;
|
||||
|
||||
/* key is weights */
|
||||
len = min(rec1->keylen, rec2->keylen);
|
||||
r = memcmp(rec1->data, rec2->data, len);
|
||||
if (r == 0)
|
||||
r = rec1->keylen - rec2->keylen;
|
||||
if (REVERSE)
|
||||
r = -r;
|
||||
return r;
|
||||
}
|
66
usr.bin/sort/pathnames.h
Normal file
66
usr.bin/sort/pathnames.h
Normal file
|
@ -0,0 +1,66 @@
|
|||
/* $NetBSD: pathnames.h,v 1.6 2008/04/28 20:24:15 martin Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Ben Harris and Jaromir Dolecek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)pathnames.h 8.1 (Berkeley) 6/6/93
|
||||
*/
|
||||
|
||||
#define _PATH_STDIN "/dev/stdin"
|
217
usr.bin/sort/radix_sort.c
Normal file
217
usr.bin/sort/radix_sort.c
Normal file
|
@ -0,0 +1,217 @@
|
|||
/* $NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy and by Dan Bernstein at New York University,
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
#if defined(LIBC_SCCS) && !defined(lint)
|
||||
#if 0
|
||||
static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95";
|
||||
#else
|
||||
__RCSID("$NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $");
|
||||
#endif
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
|
||||
/*
|
||||
* 'stable' radix sort initially from libc/stdlib/radixsort.c
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <util.h>
|
||||
#include "sort.h"
|
||||
|
||||
typedef struct {
|
||||
RECHEADER **sa; /* Base of saved area */
|
||||
int sn; /* Number of entries */
|
||||
int si; /* index into data for compare */
|
||||
} stack;
|
||||
|
||||
static void simplesort(RECHEADER **, int, int);
|
||||
|
||||
#define THRESHOLD 20 /* Divert to simplesort(). */
|
||||
|
||||
#define empty(s) (s >= sp)
|
||||
#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
|
||||
#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
|
||||
#define swap(a, b, t) t = a, a = b, b = t
|
||||
|
||||
void
|
||||
radix_sort(RECHEADER **a, RECHEADER **ta, int n)
|
||||
{
|
||||
u_int count[256], nc, bmin;
|
||||
u_int c;
|
||||
RECHEADER **ak, **tai, **lim;
|
||||
RECHEADER *hdr;
|
||||
int stack_size = 512;
|
||||
stack *s, *sp, *sp0, *sp1, temp;
|
||||
RECHEADER **top[256];
|
||||
u_int *cp, bigc;
|
||||
int data_index = 0;
|
||||
|
||||
if (n < THRESHOLD && !DEBUG('r')) {
|
||||
simplesort(a, n, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
s = emalloc(stack_size * sizeof *s);
|
||||
memset(&count, 0, sizeof count);
|
||||
/* Technically 'top' doesn't need zeroing */
|
||||
memset(&top, 0, sizeof top);
|
||||
|
||||
sp = s;
|
||||
push(a, n, data_index);
|
||||
while (!empty(s)) {
|
||||
pop(a, n, data_index);
|
||||
if (n < THRESHOLD && !DEBUG('r')) {
|
||||
simplesort(a, n, data_index);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Count number of times each 'next byte' occurs */
|
||||
nc = 0;
|
||||
bmin = 255;
|
||||
lim = a + n;
|
||||
for (ak = a, tai = ta; ak < lim; ak++) {
|
||||
hdr = *ak;
|
||||
if (data_index >= hdr->keylen) {
|
||||
/* Short key, copy to start of output */
|
||||
if (UNIQUE && a != sp->sa)
|
||||
/* Stop duplicate being written out */
|
||||
hdr->keylen = -1;
|
||||
*a++ = hdr;
|
||||
n--;
|
||||
continue;
|
||||
}
|
||||
/* Save in temp buffer for distribute */
|
||||
*tai++ = hdr;
|
||||
c = hdr->data[data_index];
|
||||
if (++count[c] == 1) {
|
||||
if (c < bmin)
|
||||
bmin = c;
|
||||
nc++;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* We need save the bounds for each 'next byte' that
|
||||
* occurs more so we can sort each block.
|
||||
*/
|
||||
if (sp + nc > s + stack_size) {
|
||||
stack_size *= 2;
|
||||
sp1 = erealloc(s, stack_size * sizeof *s);
|
||||
sp = sp1 + (sp - s);
|
||||
s = sp1;
|
||||
}
|
||||
|
||||
/* Minor optimisation to do the largest set last */
|
||||
sp0 = sp1 = sp;
|
||||
bigc = 2;
|
||||
/* Convert 'counts' positions, saving bounds for later sorts */
|
||||
ak = a;
|
||||
for (cp = count + bmin; nc > 0; cp++) {
|
||||
while (*cp == 0)
|
||||
cp++;
|
||||
if ((c = *cp) > 1) {
|
||||
if (c > bigc) {
|
||||
bigc = c;
|
||||
sp1 = sp;
|
||||
}
|
||||
push(ak, c, data_index+1);
|
||||
}
|
||||
ak += c;
|
||||
top[cp-count] = ak;
|
||||
*cp = 0; /* Reset count[]. */
|
||||
nc--;
|
||||
}
|
||||
swap(*sp0, *sp1, temp);
|
||||
|
||||
for (ak = ta+n; --ak >= ta;) /* Deal to piles. */
|
||||
*--top[(*ak)->data[data_index]] = *ak;
|
||||
}
|
||||
|
||||
free(s);
|
||||
}
|
||||
|
||||
/* insertion sort, short records are sorted before long ones */
|
||||
static void
|
||||
simplesort(RECHEADER **a, int n, int data_index)
|
||||
{
|
||||
RECHEADER **ak, **ai;
|
||||
RECHEADER *akh;
|
||||
RECHEADER **lim = a + n;
|
||||
const u_char *s, *t;
|
||||
int s_len, t_len;
|
||||
int i;
|
||||
int r;
|
||||
|
||||
if (n <= 1)
|
||||
return;
|
||||
|
||||
for (ak = a+1; ak < lim; ak++) {
|
||||
akh = *ak;
|
||||
s = akh->data;
|
||||
s_len = akh->keylen;
|
||||
for (ai = ak; ;) {
|
||||
ai--;
|
||||
t_len = (*ai)->keylen;
|
||||
if (t_len != -1) {
|
||||
t = (*ai)->data;
|
||||
for (i = data_index; ; i++) {
|
||||
if (i >= s_len || i >= t_len) {
|
||||
r = s_len - t_len;
|
||||
break;
|
||||
}
|
||||
r = s[i] - t[i];
|
||||
if (r != 0)
|
||||
break;
|
||||
}
|
||||
if (r >= 0) {
|
||||
if (r == 0 && UNIQUE) {
|
||||
/* Put record below existing */
|
||||
ai[1] = ai[0];
|
||||
/* Mark as duplicate - ignore */
|
||||
akh->keylen = -1;
|
||||
} else {
|
||||
ai++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
ai[1] = ai[0];
|
||||
if (ai == a)
|
||||
break;
|
||||
}
|
||||
ai[0] = akh;
|
||||
}
|
||||
}
|
462
usr.bin/sort/sort.1
Normal file
462
usr.bin/sort/sort.1
Normal file
|
@ -0,0 +1,462 @@
|
|||
.\" $NetBSD: sort.1,v 1.32 2010/12/18 23:36:23 wiz Exp $
|
||||
.\"
|
||||
.\" Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
.\" All rights reserved.
|
||||
.\"
|
||||
.\" This code is derived from software contributed to The NetBSD Foundation
|
||||
.\" by Ben Harris and Jaromir Dolecek.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
.\" POSSIBILITY OF SUCH DAMAGE.
|
||||
.\"
|
||||
.\" Copyright (c) 1991, 1993
|
||||
.\" The Regents of the University of California. All rights reserved.
|
||||
.\"
|
||||
.\" This code is derived from software contributed to Berkeley by
|
||||
.\" the Institute of Electrical and Electronics Engineers, Inc.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\" 3. Neither the name of the University nor the names of its contributors
|
||||
.\" may be used to endorse or promote products derived from this software
|
||||
.\" without specific prior written permission.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
|
||||
.\"
|
||||
.Dd December 18, 2010
|
||||
.Dt SORT 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm sort
|
||||
.Nd sort or merge text files
|
||||
.Sh SYNOPSIS
|
||||
.Nm sort
|
||||
.Op Fl bcdfHilmnrSsu
|
||||
.Oo
|
||||
.Fl k
|
||||
.Ar field1 Ns Op Li \&, Ns Ar field2
|
||||
.Oc
|
||||
.Op Fl o Ar output
|
||||
.Op Fl R Ar char
|
||||
.Op Fl T Ar dir
|
||||
.Op Fl t Ar char
|
||||
.Op Ar
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Nm
|
||||
utility sorts text files by lines.
|
||||
Comparisons are based on one or more sort keys extracted
|
||||
from each line of input, and are performed lexicographically.
|
||||
By default, if keys are not given,
|
||||
.Nm
|
||||
regards each input line as a single field.
|
||||
.Pp
|
||||
The following options are available:
|
||||
.Bl -tag -width Fl
|
||||
.It Fl c
|
||||
Check that the single input file is sorted.
|
||||
If the file is not sorted,
|
||||
.Nm
|
||||
produces the appropriate error messages and exits with code 1; otherwise,
|
||||
.Nm
|
||||
returns 0.
|
||||
.Nm
|
||||
.Fl c
|
||||
produces no output.
|
||||
.It Fl H
|
||||
Ignored for compatibility with earlier versions of
|
||||
.Nm .
|
||||
.It Fl m
|
||||
Merge only; the input files are assumed to be pre-sorted.
|
||||
.It Fl o Ar output
|
||||
The argument given is the name of an
|
||||
.Ar output
|
||||
file to be used instead of the standard output.
|
||||
This file can be the same as one of the input files.
|
||||
.It Fl S
|
||||
Don't use stable sort.
|
||||
Default is to use stable sort.
|
||||
.It Fl s
|
||||
Use stable sort, keeps records with equal keys in their original order.
|
||||
This is the default.
|
||||
Provided for compatibility with other
|
||||
.Nm
|
||||
implementations only.
|
||||
.It Fl T Ar dir
|
||||
Use
|
||||
.Ar dir
|
||||
as the directory for temporary files.
|
||||
The default is the value specified in the environment variable
|
||||
.Ev TMPDIR or
|
||||
.Pa /tmp
|
||||
if
|
||||
.Ev TMPDIR
|
||||
is not defined.
|
||||
.It Fl u
|
||||
Unique: suppress all but one in each set of lines having equal keys.
|
||||
If used with the
|
||||
.Fl c
|
||||
option, check that there are no lines with duplicate keys.
|
||||
.El
|
||||
.Pp
|
||||
The following options override the default ordering rules.
|
||||
When ordering options appear independent of key field
|
||||
specifications, the requested field ordering rules are
|
||||
applied globally to all sort keys.
|
||||
When attached to a specific key (see
|
||||
.Fl k ) ,
|
||||
the ordering options override
|
||||
all global ordering options for that key.
|
||||
.Bl -tag -width Fl
|
||||
.It Fl d
|
||||
Only blank space and alphanumeric characters
|
||||
.\" according
|
||||
.\" to the current setting of LC_CTYPE
|
||||
are used
|
||||
in making comparisons.
|
||||
.It Fl f
|
||||
Considers all lowercase characters that have uppercase
|
||||
equivalents to be the same for purposes of comparison.
|
||||
.It Fl i
|
||||
Ignore all non-printable characters.
|
||||
.It Fl l
|
||||
Sort by the string length of the field, not by the field itself.
|
||||
.It Fl n
|
||||
An initial numeric string, consisting of optional blank space, optional
|
||||
minus sign, and zero or more digits (including decimal point)
|
||||
.\" with
|
||||
.\" optional radix character and thousands
|
||||
.\" separator
|
||||
.\" (as defined in the current locale),
|
||||
is sorted by arithmetic value.
|
||||
(The
|
||||
.Fl n
|
||||
option no longer implies the
|
||||
.Fl b
|
||||
option.)
|
||||
.It Fl r
|
||||
Reverse the sense of comparisons.
|
||||
.El
|
||||
.Pp
|
||||
The treatment of field separators can be altered using these options:
|
||||
.Bl -tag -width Fl
|
||||
.It Fl b
|
||||
Ignores leading blank space when determining the start
|
||||
and end of a restricted sort key.
|
||||
A
|
||||
.Fl b
|
||||
option specified before the first
|
||||
.Fl k
|
||||
option applies globally to all
|
||||
.Fl k
|
||||
options.
|
||||
Otherwise, the
|
||||
.Fl b
|
||||
option can be attached independently to each
|
||||
.Ar field
|
||||
argument of the
|
||||
.Fl k
|
||||
option (see below).
|
||||
Note that the
|
||||
.Fl b
|
||||
option has no effect unless key fields are specified.
|
||||
.It Fl t Ar char
|
||||
.Ar char
|
||||
is used as the field separator character.
|
||||
The initial
|
||||
.Ar char
|
||||
is not considered to be part of a field when determining
|
||||
key offsets (see below).
|
||||
Each occurrence of
|
||||
.Ar char
|
||||
is significant (for example,
|
||||
.Dq Ar charchar
|
||||
delimits an empty field).
|
||||
If
|
||||
.Fl t
|
||||
is not specified, the default field separator is a sequence of
|
||||
blank-space characters, and consecutive blank spaces do
|
||||
.Em not
|
||||
delimit an empty field; further, the initial blank space
|
||||
.Em is
|
||||
considered part of a field when determining key offsets.
|
||||
.It Fl R Ar char
|
||||
.Ar char
|
||||
is used as the record separator character.
|
||||
This should be used with discretion;
|
||||
.Fl R Ar \*[Lt]alphanumeric\*[Gt]
|
||||
usually produces undesirable results.
|
||||
The default record separator is newline.
|
||||
.It Fl k Ar field1 Ns Op Li \&, Ns Ar field2
|
||||
Designates the starting position,
|
||||
.Ar field1 ,
|
||||
and optional ending position,
|
||||
.Ar field2 ,
|
||||
of a key field.
|
||||
The
|
||||
.Fl k
|
||||
option replaces the obsolescent options
|
||||
.Cm \(pl Ns Ar pos1
|
||||
and
|
||||
.Fl Ns Ar pos2 .
|
||||
.El
|
||||
.Pp
|
||||
The following operands are available:
|
||||
.Bl -tag -width Ar
|
||||
.It Ar file
|
||||
The pathname of a file to be sorted, merged, or checked.
|
||||
If no
|
||||
.Ar file
|
||||
operands are specified, or if
|
||||
a
|
||||
.Ar file
|
||||
operand is
|
||||
.Fl ,
|
||||
the standard input is used.
|
||||
.El
|
||||
.Pp
|
||||
A field is defined as a minimal sequence of characters followed by a
|
||||
field separator or a newline character.
|
||||
By default, the first
|
||||
blank space of a sequence of blank spaces acts as the field separator.
|
||||
All blank spaces in a sequence of blank spaces are considered
|
||||
as part of the next field; for example, all blank spaces at
|
||||
the beginning of a line are considered to be part of the
|
||||
first field.
|
||||
.Pp
|
||||
Fields are specified
|
||||
by the
|
||||
.Fl k
|
||||
.Ar field1 Ns Op \&, Ns Ar field2
|
||||
argument.
|
||||
A missing
|
||||
.Ar field2
|
||||
argument defaults to the end of a line.
|
||||
.Pp
|
||||
The arguments
|
||||
.Ar field1
|
||||
and
|
||||
.Ar field2
|
||||
have the form
|
||||
.Ar m Ns Li \&. Ns Ar n
|
||||
and can be followed by one or more of the letters
|
||||
.Cm b , d , f , i ,
|
||||
.Cm l , n ,
|
||||
and
|
||||
.Cm r ,
|
||||
which correspond to the options discussed above.
|
||||
A
|
||||
.Ar field1
|
||||
position specified by
|
||||
.Ar m Ns Li \&. Ns Ar n
|
||||
.Pq Ar m , n No \*[Gt] 0
|
||||
is interpreted as the
|
||||
.Ar n Ns th
|
||||
character in the
|
||||
.Ar m Ns th
|
||||
field.
|
||||
A missing
|
||||
.Li \&. Ns Ar n
|
||||
in
|
||||
.Ar field1
|
||||
means
|
||||
.Ql \&.1 ,
|
||||
indicating the first character of the
|
||||
.Ar m Ns th
|
||||
field; if the
|
||||
.Fl b
|
||||
option is in effect,
|
||||
.Ar n
|
||||
is counted from the first non-blank character in the
|
||||
.Ar m Ns th
|
||||
field;
|
||||
.Ar m Ns Li \&.1b
|
||||
refers to the first non-blank character in the
|
||||
.Ar m Ns th
|
||||
field.
|
||||
.Pp
|
||||
A
|
||||
.Ar field2
|
||||
position specified by
|
||||
.Ar m Ns Li \&. Ns Ar n
|
||||
is interpreted as
|
||||
the
|
||||
.Ar n Ns th
|
||||
character (including separators) of the
|
||||
.Ar m Ns th
|
||||
field.
|
||||
A missing
|
||||
.Li \&. Ns Ar n
|
||||
indicates the last character of the
|
||||
.Ar m Ns th
|
||||
field;
|
||||
.Ar m
|
||||
= \&0
|
||||
designates the end of a line.
|
||||
Thus the option
|
||||
.Fl k
|
||||
.Sm off
|
||||
.Xo
|
||||
.Ar v Li \&. Ar x Li \&,
|
||||
.Ar w Li \&. Ar y
|
||||
.Xc
|
||||
.Sm on
|
||||
is synonymous with the obsolescent option
|
||||
.Sm off
|
||||
.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
|
||||
.Fl Ar w-\&1 Li \&. Ar y ;
|
||||
.Sm on
|
||||
when
|
||||
.Ar y
|
||||
is omitted,
|
||||
.Fl k
|
||||
.Sm off
|
||||
.Ar v Li \&. Ar x Li \&, Ar w
|
||||
.Sm on
|
||||
is synonymous with
|
||||
.Sm off
|
||||
.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
|
||||
.Fl Ar w+1 Li \&.0 .
|
||||
.Sm on
|
||||
The obsolescent
|
||||
.Cm \(pl Ns Ar pos1
|
||||
.Fl Ns Ar pos2
|
||||
option is still supported, except for
|
||||
.Fl Ns Ar w Ns Li \&.0b ,
|
||||
which has no
|
||||
.Fl k
|
||||
equivalent.
|
||||
.Sh ENVIRONMENT
|
||||
If the following environment variable exists, it is used by
|
||||
.Nm .
|
||||
.Bl -tag -width Ev
|
||||
.It Ev TMPDIR
|
||||
.Nm
|
||||
uses the contents of the
|
||||
.Ev TMPDIR
|
||||
environment variable as the path in which to store
|
||||
temporary files.
|
||||
.El
|
||||
.Sh FILES
|
||||
.Bl -tag -width outputNUMBER+some -compact
|
||||
.It Pa /tmp/sort.*
|
||||
Default temporary files.
|
||||
.It Ar output Ns NUMBER
|
||||
Temporary file which is used for output if
|
||||
.Ar output
|
||||
already exists.
|
||||
Once sorting is finished, this file replaces
|
||||
.Ar output
|
||||
(via
|
||||
.Xr link 2
|
||||
and
|
||||
.Xr unlink 2 ) .
|
||||
.El
|
||||
.Sh EXIT STATUS
|
||||
Sort exits with one of the following values:
|
||||
.Bl -tag -width flag -compact
|
||||
.It 0
|
||||
Normal behavior.
|
||||
.It 1
|
||||
On disorder (or non-uniqueness) with the
|
||||
.Fl c
|
||||
option
|
||||
.It 2
|
||||
An error occurred.
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr comm 1 ,
|
||||
.Xr join 1 ,
|
||||
.Xr uniq 1 ,
|
||||
.Xr qsort 3 ,
|
||||
.Xr radixsort 3
|
||||
.Sh HISTORY
|
||||
A
|
||||
.Nm
|
||||
command appeared in
|
||||
.At v5 .
|
||||
This
|
||||
.Nm
|
||||
implementation appeared in
|
||||
.Bx 4.4
|
||||
and is used since
|
||||
.Nx 1.6 .
|
||||
.Sh BUGS
|
||||
Posix requires the locale's thousands separator be ignored in numbers.
|
||||
It may be faster to sort very large files in pieces and then explicitly
|
||||
merge them.
|
||||
.Sh NOTES
|
||||
This
|
||||
.Nm
|
||||
has no limits on input line length (other than imposed by available
|
||||
memory) or any restrictions on bytes allowed within lines.
|
||||
.Pp
|
||||
To protect data
|
||||
.Nm
|
||||
.Fl o
|
||||
calls
|
||||
.Xr link 2
|
||||
and
|
||||
.Xr unlink 2 ,
|
||||
and thus fails on protected directories.
|
||||
.Pp
|
||||
Input files should be text files.
|
||||
If file doesn't end with record separator (which is typically newline), the
|
||||
.Nm
|
||||
utility silently supplies one.
|
||||
.Pp
|
||||
The current
|
||||
.Nm
|
||||
uses lexicographic radix sorting, which requires
|
||||
that sort keys be kept in memory (as opposed to previous versions which used quick
|
||||
and merge sorts and did not.)
|
||||
Thus performance depends highly on efficient choice of sort keys, and the
|
||||
.Fl b
|
||||
option and the
|
||||
.Ar field2
|
||||
argument of the
|
||||
.Fl k
|
||||
option should be used whenever possible.
|
||||
Similarly,
|
||||
.Nm
|
||||
.Fl k1f
|
||||
is equivalent to
|
||||
.Nm
|
||||
.Fl f
|
||||
and may take twice as long.
|
419
usr.bin/sort/sort.c
Normal file
419
usr.bin/sort/sort.c
Normal file
|
@ -0,0 +1,419 @@
|
|||
/* $NetBSD: sort.c,v 1.61 2011/09/16 15:39:29 joerg Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Ben Harris and Jaromir Dolecek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Sort sorts a file using an optional user-defined key.
|
||||
* Sort uses radix sort for internal sorting, and allows
|
||||
* a choice of merge sort and radix sort for external sorting.
|
||||
*/
|
||||
|
||||
#include <util.h>
|
||||
#include "sort.h"
|
||||
#include "fsort.h"
|
||||
#include "pathnames.h"
|
||||
|
||||
#ifndef lint
|
||||
__COPYRIGHT("@(#) Copyright (c) 1993\
|
||||
The Regents of the University of California. All rights reserved.");
|
||||
#endif /* not lint */
|
||||
|
||||
__RCSID("$NetBSD: sort.c,v 1.61 2011/09/16 15:39:29 joerg Exp $");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include <paths.h>
|
||||
#include <signal.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <locale.h>
|
||||
|
||||
int REC_D = '\n';
|
||||
u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
|
||||
|
||||
/*
|
||||
* weight tables. Gweights is one of ascii, Rascii..
|
||||
* modified to weight rec_d = 0 (or 255)
|
||||
*/
|
||||
u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable };
|
||||
u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
|
||||
|
||||
int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
|
||||
int REVERSE = 0;
|
||||
int posix_sort;
|
||||
|
||||
unsigned int debug_flags = 0;
|
||||
|
||||
static char toutpath[MAXPATHLEN];
|
||||
|
||||
const char *tmpdir; /* where temporary files should be put */
|
||||
|
||||
static void cleanup(void);
|
||||
static void onsignal(int);
|
||||
__dead static void usage(const char *);
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int ch, i, stdinflag = 0;
|
||||
char cflag = 0, mflag = 0;
|
||||
char *outfile, *outpath = 0;
|
||||
struct field *fldtab;
|
||||
size_t fldtab_sz, fld_cnt;
|
||||
struct filelist filelist;
|
||||
int num_input_files;
|
||||
FILE *outfp = NULL;
|
||||
#if !defined(__minix)
|
||||
struct rlimit rl;
|
||||
#endif /* !defined(__minix) */
|
||||
struct stat st;
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
#if !defined(__minix)
|
||||
/* bump RLIMIT_NOFILE to maximum our hard limit allows */
|
||||
if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
|
||||
err(2, "getrlimit");
|
||||
rl.rlim_cur = rl.rlim_max;
|
||||
if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
|
||||
err(2, "setrlimit");
|
||||
#endif /* !defined(__minix) */
|
||||
|
||||
d_mask[REC_D = '\n'] = REC_D_F;
|
||||
d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
|
||||
|
||||
/* fldtab[0] is the global options. */
|
||||
fldtab_sz = 3;
|
||||
fld_cnt = 0;
|
||||
fldtab = emalloc(fldtab_sz * sizeof(*fldtab));
|
||||
memset(fldtab, 0, fldtab_sz * sizeof(*fldtab));
|
||||
|
||||
#define SORT_OPTS "bcdD:fHik:lmno:rR:sSt:T:ux"
|
||||
|
||||
/* Convert "+field" args to -f format */
|
||||
fixit(&argc, argv, SORT_OPTS);
|
||||
|
||||
if (!(tmpdir = getenv("TMPDIR")))
|
||||
tmpdir = _PATH_TMP;
|
||||
|
||||
while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) {
|
||||
switch (ch) {
|
||||
case 'b':
|
||||
fldtab[0].flags |= BI | BT;
|
||||
break;
|
||||
case 'c':
|
||||
cflag = 1;
|
||||
break;
|
||||
case 'D': /* Debug flags */
|
||||
for (i = 0; optarg[i]; i++)
|
||||
debug_flags |= 1 << (optarg[i] & 31);
|
||||
break;
|
||||
case 'd': case 'f': case 'i': case 'n': case 'l':
|
||||
fldtab[0].flags |= optval(ch, 0);
|
||||
break;
|
||||
case 'H':
|
||||
/* -H was ; use merge sort for blocks of large files' */
|
||||
/* That is now the default. */
|
||||
break;
|
||||
case 'k':
|
||||
fldtab = erealloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab));
|
||||
memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0]));
|
||||
fldtab_sz++;
|
||||
|
||||
setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags);
|
||||
break;
|
||||
case 'm':
|
||||
mflag = 1;
|
||||
break;
|
||||
case 'o':
|
||||
outpath = optarg;
|
||||
break;
|
||||
case 'r':
|
||||
REVERSE = 1;
|
||||
break;
|
||||
case 's':
|
||||
/*
|
||||
* Nominally 'stable sort', keep lines with equal keys
|
||||
* in input file order. (Default for NetBSD)
|
||||
* (-s for GNU sort compatibility.)
|
||||
*/
|
||||
posix_sort = 0;
|
||||
break;
|
||||
case 'S':
|
||||
/*
|
||||
* Reverse of -s!
|
||||
* This needs to enforce a POSIX sort where records
|
||||
* with equal keys are then sorted by the raw data.
|
||||
* Currently not implemented!
|
||||
* (using libc radixsort() v sradixsort() doesn't
|
||||
* have the desired effect.)
|
||||
*/
|
||||
posix_sort = 1;
|
||||
break;
|
||||
case 't':
|
||||
if (SEP_FLAG)
|
||||
usage("multiple field delimiters");
|
||||
SEP_FLAG = 1;
|
||||
d_mask[' '] &= ~FLD_D;
|
||||
d_mask['\t'] &= ~FLD_D;
|
||||
d_mask[(u_char)*optarg] |= FLD_D;
|
||||
if (d_mask[(u_char)*optarg] & REC_D_F)
|
||||
errx(2, "record/field delimiter clash");
|
||||
break;
|
||||
case 'R':
|
||||
if (REC_D != '\n')
|
||||
usage("multiple record delimiters");
|
||||
REC_D = *optarg;
|
||||
if (REC_D == '\n')
|
||||
break;
|
||||
if (optarg[1] != '\0') {
|
||||
char *ep;
|
||||
int t = 0;
|
||||
if (optarg[0] == '\\')
|
||||
optarg++, t = 8;
|
||||
REC_D = (int)strtol(optarg, &ep, t);
|
||||
if (*ep != '\0' || REC_D < 0 ||
|
||||
REC_D >= (int)__arraycount(d_mask))
|
||||
errx(2, "invalid record delimiter %s",
|
||||
optarg);
|
||||
}
|
||||
d_mask['\n'] = d_mask[' '];
|
||||
d_mask[REC_D] = REC_D_F;
|
||||
break;
|
||||
case 'T':
|
||||
/* -T tmpdir */
|
||||
tmpdir = optarg;
|
||||
break;
|
||||
case 'u':
|
||||
UNIQUE = 1;
|
||||
break;
|
||||
case '?':
|
||||
default:
|
||||
usage(NULL);
|
||||
}
|
||||
}
|
||||
|
||||
if (UNIQUE)
|
||||
/* Don't sort on raw record if keys match */
|
||||
posix_sort = 0;
|
||||
|
||||
if (cflag && argc > optind+1)
|
||||
errx(2, "too many input files for -c option");
|
||||
if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
|
||||
outpath = argv[argc-1];
|
||||
argc -= 2;
|
||||
}
|
||||
if (mflag && argc - optind > (MAXFCT - (16+1))*16)
|
||||
errx(2, "too many input files for -m option");
|
||||
|
||||
for (i = optind; i < argc; i++) {
|
||||
/* allow one occurrence of /dev/stdin */
|
||||
if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) {
|
||||
if (stdinflag)
|
||||
warnx("ignoring extra \"%s\" in file list",
|
||||
argv[i]);
|
||||
else
|
||||
stdinflag = 1;
|
||||
|
||||
/* change to /dev/stdin if '-' */
|
||||
if (argv[i][0] == '-') {
|
||||
static char path_stdin[] = _PATH_STDIN;
|
||||
argv[i] = path_stdin;
|
||||
}
|
||||
|
||||
} else if ((ch = access(argv[i], R_OK)))
|
||||
err(2, "%s", argv[i]);
|
||||
}
|
||||
|
||||
if (fldtab[1].icol.num == 0) {
|
||||
/* No sort key specified */
|
||||
if (fldtab[0].flags & (I|D|F|N|L)) {
|
||||
/* Modified - generate a key that covers the line */
|
||||
fldtab[0].flags &= ~(BI|BT);
|
||||
setfield("1", &fldtab[++fld_cnt], fldtab->flags);
|
||||
fldreset(fldtab);
|
||||
} else {
|
||||
/* Unmodified, just compare the line */
|
||||
SINGL_FLD = 1;
|
||||
fldtab[0].icol.num = 1;
|
||||
}
|
||||
} else {
|
||||
fldreset(fldtab);
|
||||
}
|
||||
|
||||
settables();
|
||||
|
||||
if (optind == argc) {
|
||||
static const char * const names[] = { _PATH_STDIN, NULL };
|
||||
filelist.names = names;
|
||||
num_input_files = 1;
|
||||
} else {
|
||||
filelist.names = (const char * const *) &argv[optind];
|
||||
num_input_files = argc - optind;
|
||||
}
|
||||
|
||||
if (cflag) {
|
||||
order(&filelist, fldtab);
|
||||
/* NOT REACHED */
|
||||
}
|
||||
|
||||
if (!outpath) {
|
||||
toutpath[0] = '\0'; /* path not used in this case */
|
||||
outfile = outpath = toutpath;
|
||||
outfp = stdout;
|
||||
} else if (lstat(outpath, &st) == 0
|
||||
&& !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
|
||||
/* output file exists and isn't character or block device */
|
||||
struct sigaction act;
|
||||
static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE,
|
||||
#if defined(__minix)
|
||||
SIGVTALRM, SIGPROF, 0};
|
||||
#else
|
||||
SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0};
|
||||
#endif /* defined(__minix) */
|
||||
int outfd;
|
||||
errno = 0;
|
||||
if (access(outpath, W_OK))
|
||||
err(2, "%s", outpath);
|
||||
(void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX",
|
||||
outpath);
|
||||
if ((outfd = mkstemp(toutpath)) == -1)
|
||||
err(2, "Cannot create temporary file `%s'", toutpath);
|
||||
(void)atexit(cleanup);
|
||||
act.sa_handler = onsignal;
|
||||
(void) sigemptyset(&act.sa_mask);
|
||||
act.sa_flags = SA_RESTART | SA_RESETHAND;
|
||||
for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
|
||||
sigaction(sigtable[i], &act, 0);
|
||||
outfile = toutpath;
|
||||
if ((outfp = fdopen(outfd, "w")) == NULL)
|
||||
err(2, "Cannot open temporary file `%s'", toutpath);
|
||||
} else {
|
||||
outfile = outpath;
|
||||
|
||||
if ((outfp = fopen(outfile, "w")) == NULL)
|
||||
err(2, "output file %s", outfile);
|
||||
}
|
||||
|
||||
if (mflag)
|
||||
fmerge(&filelist, num_input_files, outfp, fldtab);
|
||||
else
|
||||
fsort(&filelist, num_input_files, outfp, fldtab);
|
||||
|
||||
if (outfile != outpath) {
|
||||
if (access(outfile, F_OK))
|
||||
err(2, "%s", outfile);
|
||||
|
||||
/*
|
||||
* Copy file permissions bits of the original file.
|
||||
* st is initialized above, when we create the
|
||||
* temporary spool file.
|
||||
*/
|
||||
if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) {
|
||||
err(2, "cannot chmod %s: output left in %s",
|
||||
outpath, outfile);
|
||||
}
|
||||
|
||||
(void)unlink(outpath);
|
||||
if (link(outfile, outpath))
|
||||
err(2, "cannot link %s: output left in %s",
|
||||
outpath, outfile);
|
||||
(void)unlink(outfile);
|
||||
toutpath[0] = 0;
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static void
|
||||
onsignal(int sig)
|
||||
{
|
||||
cleanup();
|
||||
}
|
||||
|
||||
static void
|
||||
cleanup(void)
|
||||
{
|
||||
if (toutpath[0])
|
||||
(void)unlink(toutpath);
|
||||
}
|
||||
|
||||
static void
|
||||
usage(const char *msg)
|
||||
{
|
||||
if (msg != NULL)
|
||||
(void)fprintf(stderr, "%s: %s\n", getprogname(), msg);
|
||||
(void)fprintf(stderr,
|
||||
"usage: %s [-bcdfHilmnrSsu] [-k field1[,field2]] [-o output]"
|
||||
" [-R char] [-T dir]", getprogname());
|
||||
(void)fprintf(stderr,
|
||||
" [-t char] [file ...]\n");
|
||||
exit(2);
|
||||
}
|
||||
|
||||
RECHEADER *
|
||||
allocrec(RECHEADER *rec, size_t size)
|
||||
{
|
||||
|
||||
return (erealloc(rec, size + sizeof(long) - 1));
|
||||
}
|
201
usr.bin/sort/sort.h
Normal file
201
usr.bin/sort/sort.h
Normal file
|
@ -0,0 +1,201 @@
|
|||
/* $NetBSD: sort.h,v 1.34 2011/09/16 15:39:29 joerg Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Ben Harris and Jaromir Dolecek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)sort.h 8.1 (Berkeley) 6/6/93
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NBINS 256
|
||||
|
||||
/* values for masks, weights, and other flags. */
|
||||
/* R and F get used to index weight_tables[] */
|
||||
#define R 0x01 /* Field is reversed */
|
||||
#define F 0x02 /* weight lower and upper case the same */
|
||||
#define I 0x04 /* mask out non-printable characters */
|
||||
#define D 0x08 /* sort alphanumeric characters only */
|
||||
#define N 0x10 /* Field is a number */
|
||||
#define BI 0x20 /* ignore blanks in icol */
|
||||
#define BT 0x40 /* ignore blanks in tcol */
|
||||
#define L 0x80 /* Sort by field length */
|
||||
|
||||
/* masks for delimiters: blanks, fields, and termination. */
|
||||
#define BLANK 1 /* ' ', '\t'; '\n' if -R is invoked */
|
||||
#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
|
||||
#define REC_D_F 4 /* '\n' default; from -R otherwise */
|
||||
|
||||
#define min(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define max(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define FCLOSE(file) { \
|
||||
if (EOF == fclose(file)) \
|
||||
err(2, "%p", file); \
|
||||
}
|
||||
|
||||
#define EWRITE(ptr, size, n, f) { \
|
||||
if (!fwrite(ptr, size, n, f)) \
|
||||
err(2, NULL); \
|
||||
}
|
||||
|
||||
/* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort
|
||||
* in a sane way.
|
||||
* Anyone who wants to sort data records longer than 2GB definitely needs a
|
||||
* different program! */
|
||||
typedef unsigned int length_t;
|
||||
|
||||
/* A record is a key/line pair starting at rec.data. It has a total length
|
||||
* and an offset to the start of the line half of the pair.
|
||||
*/
|
||||
typedef struct recheader {
|
||||
length_t length; /* total length of key and line */
|
||||
length_t offset; /* to line */
|
||||
int keylen; /* length of key */
|
||||
u_char data[]; /* key then line */
|
||||
} RECHEADER;
|
||||
|
||||
/* This is the column as seen by struct field. It is used by enterfield.
|
||||
* They are matched with corresponding coldescs during initialization.
|
||||
*/
|
||||
struct column {
|
||||
struct coldesc *p;
|
||||
int num;
|
||||
int indent;
|
||||
};
|
||||
|
||||
/* a coldesc has a number and pointers to the beginning and end of the
|
||||
* corresponding column in the current line. This is determined in enterkey.
|
||||
*/
|
||||
typedef struct coldesc {
|
||||
u_char *start;
|
||||
u_char *end;
|
||||
int num;
|
||||
} COLDESC;
|
||||
|
||||
/* A field has an initial and final column; an omitted final column
|
||||
* implies the end of the line. Flags regulate omission of blanks and
|
||||
* numerical sorts; mask determines which characters are ignored (from -i, -d);
|
||||
* weights determines the sort weights of a character (from -f, -r).
|
||||
*
|
||||
* The first field contain the global flags etc.
|
||||
* The list terminates when icol = 0.
|
||||
*/
|
||||
struct field {
|
||||
struct column icol;
|
||||
struct column tcol;
|
||||
u_int flags;
|
||||
u_char *mask;
|
||||
u_char *weights;
|
||||
};
|
||||
|
||||
struct filelist {
|
||||
const char * const * names;
|
||||
};
|
||||
|
||||
typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *);
|
||||
typedef void (*put_func_t)(const RECHEADER *, FILE *);
|
||||
|
||||
extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
|
||||
extern u_char *const weight_tables[4]; /* ascii, Rascii, Ftable, RFtable */
|
||||
extern u_char d_mask[NBINS];
|
||||
extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE;
|
||||
extern int posix_sort;
|
||||
extern int REC_D;
|
||||
extern const char *tmpdir;
|
||||
extern struct coldesc *clist;
|
||||
extern int ncols;
|
||||
|
||||
#define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
|
||||
extern unsigned int debug_flags;
|
||||
|
||||
RECHEADER *allocrec(RECHEADER *, size_t);
|
||||
void append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *));
|
||||
void concat(FILE *, FILE *);
|
||||
length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
|
||||
void fixit(int *, char **, const char *);
|
||||
void fldreset(struct field *);
|
||||
FILE *ftmp(void);
|
||||
void fmerge(struct filelist *, int, FILE *, struct field *);
|
||||
void save_for_merge(FILE *, get_func_t, struct field *);
|
||||
void merge_sort(FILE *, put_func_t, struct field *);
|
||||
void fsort(struct filelist *, int, FILE *, struct field *);
|
||||
int geteasy(FILE *, RECHEADER *, u_char *, struct field *);
|
||||
int makekey(FILE *, RECHEADER *, u_char *, struct field *);
|
||||
int makeline(FILE *, RECHEADER *, u_char *, struct field *);
|
||||
void makeline_copydown(RECHEADER *);
|
||||
int optval(int, int);
|
||||
__dead void order(struct filelist *, struct field *);
|
||||
void putline(const RECHEADER *, FILE *);
|
||||
void putrec(const RECHEADER *, FILE *);
|
||||
void putkeydump(const RECHEADER *, FILE *);
|
||||
void rd_append(int, int, int, FILE *, u_char *, u_char *);
|
||||
void radix_sort(RECHEADER **, RECHEADER **, int);
|
||||
int setfield(const char *, struct field *, int);
|
||||
void settables(void);
|
106
usr.bin/sort/tmp.c
Normal file
106
usr.bin/sort/tmp.c
Normal file
|
@ -0,0 +1,106 @@
|
|||
/* $NetBSD: tmp.c,v 1.16 2009/11/06 18:34:22 joerg Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Ben Harris and Jaromir Dolecek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
|
||||
__RCSID("$NetBSD: tmp.c,v 1.16 2009/11/06 18:34:22 joerg Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "sort.h"
|
||||
#include "pathnames.h"
|
||||
|
||||
#define _NAME_TMP "sort.XXXXXXXX"
|
||||
|
||||
FILE *
|
||||
ftmp(void)
|
||||
{
|
||||
sigset_t set, oset;
|
||||
FILE *fp;
|
||||
int fd;
|
||||
char path[MAXPATHLEN];
|
||||
|
||||
(void)snprintf(path, sizeof(path), "%s%s%s", tmpdir,
|
||||
(tmpdir[strlen(tmpdir)-1] != '/') ? "/" : "", _NAME_TMP);
|
||||
|
||||
sigfillset(&set);
|
||||
(void)sigprocmask(SIG_BLOCK, &set, &oset);
|
||||
if ((fd = mkstemp(path)) < 0)
|
||||
err(2, "ftmp: mkstemp(\"%s\")", path);
|
||||
if (!(fp = fdopen(fd, "w+")))
|
||||
err(2, "ftmp: fdopen(\"%s\")", path);
|
||||
if (!DEBUG('t'))
|
||||
(void)unlink(path);
|
||||
|
||||
(void)sigprocmask(SIG_SETMASK, &oset, NULL);
|
||||
return (fp);
|
||||
}
|
Loading…
Reference in a new issue