Upgrading sort, which is needed by lorder
Change-Id: I64ac0509f4360c947a677600db77e7612a7cbebd
This commit is contained in:
parent
a7ab29bf57
commit
0fbbaa43e9
21 changed files with 3416 additions and 1307 deletions
|
@ -23,7 +23,7 @@ SUBDIR= add_route arp ash at backup banner basename btrace cal \
|
||||||
ramdisk rarpd rawspeed rcp rdate readclock \
|
ramdisk rarpd rawspeed rcp rdate readclock \
|
||||||
reboot remsync rev rget rlogin \
|
reboot remsync rev rget rlogin \
|
||||||
rotate rsh rshd service setup shar acksize \
|
rotate rsh rshd service setup shar acksize \
|
||||||
sleep slip sort spell split sprofalyze sprofdiff srccrc \
|
sleep slip spell split sprofalyze sprofdiff srccrc \
|
||||||
stty svclog svrctl swifi sync synctree sysenv \
|
stty svclog svrctl swifi sync synctree sysenv \
|
||||||
syslogd tail tcpd tcpdp tcpstat tee telnet \
|
syslogd tail tcpd tcpdp tcpstat tee telnet \
|
||||||
telnetd term termcap tget time touch tr \
|
telnetd term termcap tget time touch tr \
|
||||||
|
|
|
@ -1,4 +0,0 @@
|
||||||
PROG= sort
|
|
||||||
MAN=
|
|
||||||
|
|
||||||
.include <bsd.prog.mk>
|
|
1217
commands/sort/sort.c
1217
commands/sort/sort.c
File diff suppressed because it is too large
Load diff
|
@ -15,7 +15,7 @@ MAN= ash.1 at.1 banner.1 basename.1 \
|
||||||
paste.1 ping.1 playwave.1 pr.1 prep.1 \
|
paste.1 ping.1 playwave.1 pr.1 prep.1 \
|
||||||
profile.1 ps.1 pwd.1 rcp.1 recwave.1 \
|
profile.1 ps.1 pwd.1 rcp.1 recwave.1 \
|
||||||
ref.1 remsync.1 rget.1 rlogin.1 rsh.1 rz.1 \
|
ref.1 remsync.1 rget.1 rlogin.1 rsh.1 rz.1 \
|
||||||
shar.1 acksize.1 sleep.1 sort.1 spell.1 \
|
shar.1 acksize.1 sleep.1 spell.1 \
|
||||||
split.1 stty.1 svc.1 svrctl.1 \
|
split.1 stty.1 svc.1 svrctl.1 \
|
||||||
synctree.1 sysenv.1 sz.1 tail.1 tee.1 telnet.1 template.1 \
|
synctree.1 sysenv.1 sz.1 tail.1 tee.1 telnet.1 template.1 \
|
||||||
term.1 termcap.1 tget.1 time.1 tr.1 true.1 \
|
term.1 termcap.1 tget.1 time.1 tr.1 true.1 \
|
||||||
|
|
|
@ -1,83 +0,0 @@
|
||||||
.TH SORT 1
|
|
||||||
.SH NAME
|
|
||||||
sort \- sort a file of ASCII lines
|
|
||||||
.SH SYNOPSIS
|
|
||||||
\fBsort\fR [\fB\-bcdf\&imnru\fR]\fR [\fB\-t\fIc\fR] [\fB\-o \fIname\fR] [\fB+\fIpos1\fR] [\fB\-\fIpos2\fR] \fIfile\fR ...\fR
|
|
||||||
.br
|
|
||||||
.de FL
|
|
||||||
.TP
|
|
||||||
\\fB\\$1\\fR
|
|
||||||
\\$2
|
|
||||||
..
|
|
||||||
.de EX
|
|
||||||
.TP 20
|
|
||||||
\\fB\\$1\\fR
|
|
||||||
# \\$2
|
|
||||||
..
|
|
||||||
.SH OPTIONS
|
|
||||||
.TP 5
|
|
||||||
.B \-b
|
|
||||||
# Skip leading blanks when making comparisons
|
|
||||||
.TP 5
|
|
||||||
.B \-c
|
|
||||||
# Check to see if a file is sorted
|
|
||||||
.TP 5
|
|
||||||
.B \-d
|
|
||||||
# Dictionary order: ignore punctuation
|
|
||||||
.TP 5
|
|
||||||
.B \-f
|
|
||||||
# Fold upper case onto lower case
|
|
||||||
.TP 5
|
|
||||||
.B \-i
|
|
||||||
# Ignore nonASCII characters
|
|
||||||
.TP 5
|
|
||||||
.B \-m
|
|
||||||
# Merge presorted files
|
|
||||||
.TP 5
|
|
||||||
.B \-n
|
|
||||||
# Numeric sort order (decimal)
|
|
||||||
.TP 5
|
|
||||||
.B \-x
|
|
||||||
# Numeric sort order (hex)
|
|
||||||
.TP 5
|
|
||||||
.B \-o
|
|
||||||
# Next argument is output file
|
|
||||||
.TP 5
|
|
||||||
.B \-r
|
|
||||||
# Reverse the sort order
|
|
||||||
.TP 5
|
|
||||||
.B \-t
|
|
||||||
# Following character is field separator
|
|
||||||
.TP 5
|
|
||||||
.B \-u
|
|
||||||
# Unique mode (delete duplicate lines)
|
|
||||||
.SH EXAMPLES
|
|
||||||
.TP 20
|
|
||||||
.B sort \-nr file
|
|
||||||
# Sort keys numerically, reversed
|
|
||||||
.TP 20
|
|
||||||
.B sort +2 \-4 file
|
|
||||||
# Sort using fields 2 and 3 as key
|
|
||||||
.TP 20
|
|
||||||
.B sort +2 \-t: \-o out
|
|
||||||
# Field separator is \fI:\fP
|
|
||||||
.TP 20
|
|
||||||
.B sort +.3 \-.6
|
|
||||||
# Characters 3 through 5 form the key
|
|
||||||
.SH DESCRIPTION
|
|
||||||
.PP
|
|
||||||
.I Sort
|
|
||||||
sorts one or more files.
|
|
||||||
If no files are specified, \fIstdin\fR is sorted.
|
|
||||||
Output is written on standard output, unless \fB\-o\fP is specified.
|
|
||||||
The options \fB+\fIpos1 \fB\-\fIpos2\fR use only fields \fIpos1\fR
|
|
||||||
up to but not including \fIpos2\fR as the sort key, where a field is a
|
|
||||||
string of characters delimited by spaces and tabs, unless a different field
|
|
||||||
delimiter is specified with \fB\-t\fR.
|
|
||||||
Both \fIpos1\fR and \fIpos2\fR have the form \fIm.n\fR where \fIm\fR tells
|
|
||||||
the number of fields and \fIn\fR tells the number of characters.
|
|
||||||
Either \fIm\fR or \fIn\fR may be omitted.
|
|
||||||
.SH "SEE ALSO"
|
|
||||||
.BR comm (1),
|
|
||||||
.BR grep (1),
|
|
||||||
.BR uniq (1).
|
|
|
@ -69,6 +69,7 @@
|
||||||
2012/10/17 12:00:00,usr.bin/Makefile
|
2012/10/17 12:00:00,usr.bin/Makefile
|
||||||
2012/10/17 12:00:00,usr.bin/Makefile.inc
|
2012/10/17 12:00:00,usr.bin/Makefile.inc
|
||||||
2012/10/17 12:00:00,usr.bin/passwd/Makefile
|
2012/10/17 12:00:00,usr.bin/passwd/Makefile
|
||||||
|
2012/10/17 12:00:00,usr.bin/sort
|
||||||
2012/10/17 12:00:00,usr.bin/xinstall
|
2012/10/17 12:00:00,usr.bin/xinstall
|
||||||
2012/10/17 12:00:00,usr.sbin/Makefile
|
2012/10/17 12:00:00,usr.sbin/Makefile
|
||||||
2012/10/17 12:00:00,usr.sbin/Makefile.inc
|
2012/10/17 12:00:00,usr.sbin/Makefile.inc
|
||||||
|
|
|
@ -18,7 +18,7 @@ SUBDIR= \
|
||||||
newgrp \
|
newgrp \
|
||||||
passwd \
|
passwd \
|
||||||
sed seq \
|
sed seq \
|
||||||
stat su \
|
sort stat su \
|
||||||
tic \
|
tic \
|
||||||
uniq \
|
uniq \
|
||||||
xinstall
|
xinstall
|
||||||
|
|
15
usr.bin/sort/Makefile
Normal file
15
usr.bin/sort/Makefile
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# $NetBSD: Makefile,v 1.8 2009/09/10 22:02:40 dsl Exp $
|
||||||
|
# from: @(#)Makefile 8.1 (Berkeley) 6/6/93
|
||||||
|
|
||||||
|
PROG= sort
|
||||||
|
SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c
|
||||||
|
SRCS+= radix_sort.c
|
||||||
|
|
||||||
|
LDADD+=-lutil
|
||||||
|
DPADD+=${LIBUTIL}
|
||||||
|
|
||||||
|
.if defined(__MINIX)
|
||||||
|
CPPFLAGS+= -Dlchown=chown -Dlchmod=chmod
|
||||||
|
.endif # defined(__MINIX)
|
||||||
|
|
||||||
|
.include <bsd.prog.mk>
|
94
usr.bin/sort/append.c
Normal file
94
usr.bin/sort/append.c
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
/* $NetBSD: append.c,v 1.23 2009/11/06 18:34:22 joerg Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Ben Harris and Jaromir Dolecek.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "sort.h"
|
||||||
|
|
||||||
|
__RCSID("$NetBSD: append.c,v 1.23 2009/11/06 18:34:22 joerg Exp $");
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* copy sorted lines to output
|
||||||
|
* Ignore duplicates (marked with -ve keylen)
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
append(RECHEADER **keylist, int nelem, FILE *fp, put_func_t put)
|
||||||
|
{
|
||||||
|
RECHEADER **cpos, **lastkey;
|
||||||
|
RECHEADER *crec;
|
||||||
|
|
||||||
|
lastkey = keylist + nelem;
|
||||||
|
if (REVERSE) {
|
||||||
|
for (cpos = lastkey; cpos-- > keylist;) {
|
||||||
|
crec = *cpos;
|
||||||
|
if (crec->keylen >= 0)
|
||||||
|
put(crec, fp);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (cpos = keylist; cpos < lastkey; cpos++) {
|
||||||
|
crec = *cpos;
|
||||||
|
if (crec->keylen >= 0)
|
||||||
|
put(crec, fp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
377
usr.bin/sort/fields.c
Normal file
377
usr.bin/sort/fields.c
Normal file
|
@ -0,0 +1,377 @@
|
||||||
|
/* $NetBSD: fields.c,v 1.32 2010/12/18 23:09:48 christos Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Ben Harris and Jaromir Dolecek.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Subroutines to generate sort keys. */
|
||||||
|
|
||||||
|
#include "sort.h"
|
||||||
|
|
||||||
|
__RCSID("$NetBSD: fields.c,v 1.32 2010/12/18 23:09:48 christos Exp $");
|
||||||
|
|
||||||
|
#define SKIP_BLANKS(ptr) { \
|
||||||
|
if (BLANK & d_mask[*(ptr)]) \
|
||||||
|
while (BLANK & d_mask[*(++(ptr))]); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NEXTCOL(pos) { \
|
||||||
|
if (!SEP_FLAG) \
|
||||||
|
while (BLANK & l_d_mask[*(++pos)]); \
|
||||||
|
while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\
|
||||||
|
}
|
||||||
|
|
||||||
|
static u_char *enterfield(u_char *, const u_char *, struct field *, int);
|
||||||
|
static u_char *number(u_char *, const u_char *, u_char *, u_char *, int);
|
||||||
|
static u_char *length(u_char *, const u_char *, u_char *, u_char *, int);
|
||||||
|
|
||||||
|
#define DECIMAL_POINT '.'
|
||||||
|
|
||||||
|
/*
|
||||||
|
* constructs sort key with leading recheader, followed by the key,
|
||||||
|
* followed by the original line.
|
||||||
|
*/
|
||||||
|
length_t
|
||||||
|
enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data,
|
||||||
|
size_t line_size, struct field fieldtable[])
|
||||||
|
/* keybuf: pointer to start of key */
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
u_char *l_d_mask;
|
||||||
|
u_char *lineend, *pos;
|
||||||
|
const u_char *endkey;
|
||||||
|
u_char *keypos;
|
||||||
|
struct coldesc *clpos;
|
||||||
|
int col = 1;
|
||||||
|
struct field *ftpos;
|
||||||
|
|
||||||
|
l_d_mask = d_mask;
|
||||||
|
pos = line_data - 1;
|
||||||
|
lineend = line_data + line_size-1;
|
||||||
|
/* don't include rec_delimiter */
|
||||||
|
|
||||||
|
for (i = 0; i < ncols; i++) {
|
||||||
|
clpos = clist + i;
|
||||||
|
for (; (col < clpos->num) && (pos < lineend); col++) {
|
||||||
|
NEXTCOL(pos);
|
||||||
|
}
|
||||||
|
if (pos >= lineend)
|
||||||
|
break;
|
||||||
|
clpos->start = SEP_FLAG ? pos + 1 : pos;
|
||||||
|
NEXTCOL(pos);
|
||||||
|
clpos->end = pos;
|
||||||
|
col++;
|
||||||
|
if (pos >= lineend) {
|
||||||
|
clpos->end = lineend;
|
||||||
|
i++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (; i <= ncols; i++)
|
||||||
|
clist[i].start = clist[i].end = lineend;
|
||||||
|
if (clist[0].start < line_data)
|
||||||
|
clist[0].start++;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We write the sort keys (concatenated) followed by the
|
||||||
|
* original line data (for output) as the 'keybuf' data.
|
||||||
|
* keybuf->length is the number of key bytes + data bytes.
|
||||||
|
* keybuf->offset is the number of key bytes.
|
||||||
|
* We add a record separator weight after the key in case
|
||||||
|
* (as is usual) we need to preserve the order of equal lines,
|
||||||
|
* and for 'sort -u'.
|
||||||
|
* The key itself will have had the correct weight applied.
|
||||||
|
*/
|
||||||
|
keypos = keybuf->data;
|
||||||
|
endkey = keybuf_end - line_size - 1;
|
||||||
|
if (endkey <= keypos)
|
||||||
|
/* No room for any key bytes */
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) {
|
||||||
|
if ((keypos = enterfield(keypos, endkey, ftpos,
|
||||||
|
fieldtable->flags)) == NULL)
|
||||||
|
return (1);
|
||||||
|
}
|
||||||
|
|
||||||
|
keybuf->offset = keypos - keybuf->data;
|
||||||
|
keybuf->length = keybuf->offset + line_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Posix requires that equal keys be further sorted by the
|
||||||
|
* entire original record.
|
||||||
|
* NetBSD has (at least for some time) kept equal keys in
|
||||||
|
* their original order.
|
||||||
|
* For 'sort -u' posix_sort is unset.
|
||||||
|
*/
|
||||||
|
keybuf->keylen = posix_sort ? keybuf->length : keybuf->offset;
|
||||||
|
|
||||||
|
memcpy(keypos, line_data, line_size);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* constructs a field (as defined by -k) within a key
|
||||||
|
*/
|
||||||
|
static u_char *
|
||||||
|
enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld,
|
||||||
|
int gflags)
|
||||||
|
{
|
||||||
|
u_char *start, *end, *lineend, *mask, *lweight;
|
||||||
|
struct column icol, tcol;
|
||||||
|
u_int flags;
|
||||||
|
|
||||||
|
icol = cur_fld->icol;
|
||||||
|
tcol = cur_fld->tcol;
|
||||||
|
flags = cur_fld->flags;
|
||||||
|
start = icol.p->start;
|
||||||
|
lineend = clist[ncols].end;
|
||||||
|
if (flags & BI)
|
||||||
|
SKIP_BLANKS(start);
|
||||||
|
start += icol.indent;
|
||||||
|
start = min(start, lineend);
|
||||||
|
|
||||||
|
if (!tcol.num)
|
||||||
|
end = lineend;
|
||||||
|
else {
|
||||||
|
if (tcol.indent) {
|
||||||
|
end = tcol.p->start;
|
||||||
|
if (flags & BT)
|
||||||
|
SKIP_BLANKS(end);
|
||||||
|
end += tcol.indent;
|
||||||
|
end = min(end, lineend);
|
||||||
|
} else
|
||||||
|
end = tcol.p->end;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & L)
|
||||||
|
return length(tablepos, endkey, start, end, flags);
|
||||||
|
if (flags & N)
|
||||||
|
return number(tablepos, endkey, start, end, flags);
|
||||||
|
|
||||||
|
/* Bound check space - assuming nothing is skipped */
|
||||||
|
if (tablepos + (end - start) + 1 >= endkey)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
mask = cur_fld->mask;
|
||||||
|
lweight = cur_fld->weights;
|
||||||
|
for (; start < end; start++) {
|
||||||
|
if (!mask || mask[*start]) {
|
||||||
|
*tablepos++ = lweight[*start];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Add extra byte (absent from lweight) to sort short keys correctly */
|
||||||
|
*tablepos++ = lweight[REC_D];
|
||||||
|
return tablepos;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Numbers are converted to a floating point format (exponent & mantissa)
|
||||||
|
* so that they compare correctly as sequence of unsigned bytes.
|
||||||
|
* Bytes 0x00 and 0xff are used to terminate positive and negative numbers
|
||||||
|
* to ensure that 0.123 sorts after 0.12 and -0.123 sorts before -0.12.
|
||||||
|
*
|
||||||
|
* The first byte contain the overall sign, exponent sign and some of the
|
||||||
|
* exponent. These have to be ordered (-ve value, decreasing exponent),
|
||||||
|
* zero, (+ve value, increasing exponent).
|
||||||
|
*
|
||||||
|
* The first byte is 0x80 for zero, 0xc0 for +ve with exponent 0.
|
||||||
|
* -ve values are the 1's compliments (so 0x7f isn't used!).
|
||||||
|
*
|
||||||
|
* This only leaves 63 byte values for +ve exponents - which isn't enough.
|
||||||
|
* The largest 4 exponent values are used to hold a byte count of the
|
||||||
|
* number of following bytes that contain 8 exponent bits per byte,
|
||||||
|
* This lets us sort exponents from -2^31 to +2^31.
|
||||||
|
*
|
||||||
|
* The mantissa is stored 2 digits per byte offset by 0x40, for negative
|
||||||
|
* numbers the order must be reversed (they are bit inverted).
|
||||||
|
*
|
||||||
|
* Reverse sorts are done by inverting the sign of the number.
|
||||||
|
*/
|
||||||
|
#define MAX_EXP_ENC ((int)sizeof(int))
|
||||||
|
|
||||||
|
static u_char *
|
||||||
|
number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
|
||||||
|
int reverse)
|
||||||
|
{
|
||||||
|
int exponent = -1;
|
||||||
|
int had_dp = 0;
|
||||||
|
u_char *tline;
|
||||||
|
char ch;
|
||||||
|
unsigned int val;
|
||||||
|
u_char *last_nz_pos;
|
||||||
|
u_char negate;
|
||||||
|
|
||||||
|
if (reverse & R)
|
||||||
|
negate = 0xff;
|
||||||
|
else
|
||||||
|
negate = 0;
|
||||||
|
|
||||||
|
/* Give ourselves space for the key terminator */
|
||||||
|
bufend--;
|
||||||
|
|
||||||
|
/* Ensure we have enough space for the exponent */
|
||||||
|
if (pos + 1 + MAX_EXP_ENC > bufend)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
SKIP_BLANKS(line);
|
||||||
|
if (*line == '-') { /* set the sign */
|
||||||
|
negate ^= 0xff;
|
||||||
|
line++;
|
||||||
|
}
|
||||||
|
/* eat initial zeroes */
|
||||||
|
for (; *line == '0' && line < lineend; line++)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* calculate exponents */
|
||||||
|
if (*line == DECIMAL_POINT) {
|
||||||
|
/* Decimal fraction */
|
||||||
|
had_dp = 1;
|
||||||
|
while (*++line == '0' && line < lineend)
|
||||||
|
exponent--;
|
||||||
|
} else {
|
||||||
|
/* Large (absolute) value, count digits */
|
||||||
|
for (tline = line; *tline >= '0' &&
|
||||||
|
*tline <= '9' && tline < lineend; tline++)
|
||||||
|
exponent++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the first/next character isn't a digit, value is zero */
|
||||||
|
if (*line < '1' || *line > '9' || line >= lineend) {
|
||||||
|
/* This may be "0", "0.00", "000" or "fubar" but sorts as 0 */
|
||||||
|
/* XXX what about NaN, NAN, inf and INF */
|
||||||
|
*pos++ = 0x80;
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Maybe here we should allow for e+12 (etc) */
|
||||||
|
|
||||||
|
if (exponent < 0x40 - MAX_EXP_ENC && -exponent < 0x40 - MAX_EXP_ENC) {
|
||||||
|
/* Value ok for simple encoding */
|
||||||
|
/* exponent 0 is 0xc0 for +ve numbers and 0x40 for -ve ones */
|
||||||
|
exponent += 0xc0;
|
||||||
|
*pos++ = negate ^ exponent;
|
||||||
|
} else {
|
||||||
|
/* Out or range for a single byte */
|
||||||
|
int c, t;
|
||||||
|
t = exponent > 0 ? exponent : -exponent;
|
||||||
|
/* Count how many 8-bit bytes are needed */
|
||||||
|
for (c = 0; ; c++) {
|
||||||
|
t >>= 8;
|
||||||
|
if (t == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* 'c' better be 0..3 here - but probably 0..1 */
|
||||||
|
/* Offset just outside valid range */
|
||||||
|
t = c + 0x40 - MAX_EXP_ENC;
|
||||||
|
if (exponent < 0)
|
||||||
|
t = -t;
|
||||||
|
*pos++ = negate ^ (t + 0xc0);
|
||||||
|
/* now add each byte, most significant first */
|
||||||
|
for (; c >= 0; c--)
|
||||||
|
*pos++ = negate ^ (exponent >> (c * 8));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finally add mantissa, 2 digits per byte */
|
||||||
|
for (last_nz_pos = pos; line < lineend; ) {
|
||||||
|
if (pos >= bufend)
|
||||||
|
return NULL;
|
||||||
|
ch = *line++;
|
||||||
|
val = (ch - '0') * 10;
|
||||||
|
if (val > 90) {
|
||||||
|
if (ch == DECIMAL_POINT && !had_dp) {
|
||||||
|
had_dp = 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
while (line < lineend) {
|
||||||
|
ch = *line++;
|
||||||
|
if (ch == DECIMAL_POINT && !had_dp) {
|
||||||
|
had_dp = 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (ch < '0' || ch > '9')
|
||||||
|
line = lineend;
|
||||||
|
else
|
||||||
|
val += ch - '0';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
*pos++ = negate ^ (val + 0x40);
|
||||||
|
if (val != 0)
|
||||||
|
last_nz_pos = pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add key terminator, deleting any trailing "00" */
|
||||||
|
*last_nz_pos++ = negate;
|
||||||
|
|
||||||
|
return (last_nz_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
static u_char *
|
||||||
|
length(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
|
||||||
|
int flag)
|
||||||
|
{
|
||||||
|
u_char buf[32];
|
||||||
|
int l;
|
||||||
|
SKIP_BLANKS(line);
|
||||||
|
l = snprintf((char *)buf, sizeof(buf), "%td", lineend - line);
|
||||||
|
return number(pos, bufend, buf, buf + l, flag);
|
||||||
|
}
|
276
usr.bin/sort/files.c
Normal file
276
usr.bin/sort/files.c
Normal file
|
@ -0,0 +1,276 @@
|
||||||
|
/* $NetBSD: files.c,v 1.41 2009/11/06 18:34:22 joerg Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Ben Harris and Jaromir Dolecek.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "sort.h"
|
||||||
|
#include "fsort.h"
|
||||||
|
|
||||||
|
__RCSID("$NetBSD: files.c,v 1.41 2009/11/06 18:34:22 joerg Exp $");
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
/* Align records in temporary files to avoid misaligned copies */
|
||||||
|
#define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1))
|
||||||
|
|
||||||
|
static ssize_t seq(FILE *, u_char **);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* this is called when there is no special key. It's only called
|
||||||
|
* in the first fsort pass.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static u_char *opos;
|
||||||
|
static size_t osz;
|
||||||
|
|
||||||
|
void
|
||||||
|
makeline_copydown(RECHEADER *recbuf)
|
||||||
|
{
|
||||||
|
memmove(recbuf->data, opos, osz);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
|
||||||
|
{
|
||||||
|
u_char *pos;
|
||||||
|
int c;
|
||||||
|
|
||||||
|
pos = recbuf->data;
|
||||||
|
if (osz != 0) {
|
||||||
|
/*
|
||||||
|
* Buffer shortage is solved by either of two ways:
|
||||||
|
* o flush previous buffered data and start using the
|
||||||
|
* buffer from start.
|
||||||
|
* makeline_copydown() above must be called.
|
||||||
|
* o realloc buffer
|
||||||
|
*
|
||||||
|
* This code has relied on realloc changing 'bufend',
|
||||||
|
* but that isn't necessarily true.
|
||||||
|
*/
|
||||||
|
pos += osz;
|
||||||
|
osz = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (pos < bufend) {
|
||||||
|
c = getc(fp);
|
||||||
|
if (c == EOF) {
|
||||||
|
if (pos == recbuf->data) {
|
||||||
|
FCLOSE(fp);
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
|
/* Add terminator to partial line */
|
||||||
|
c = REC_D;
|
||||||
|
}
|
||||||
|
*pos++ = c;
|
||||||
|
if (c == REC_D) {
|
||||||
|
recbuf->offset = 0;
|
||||||
|
recbuf->length = pos - recbuf->data;
|
||||||
|
recbuf->keylen = recbuf->length - 1;
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ran out of buffer space... */
|
||||||
|
if (recbuf->data < bufend) {
|
||||||
|
/* Remember where the partial record is */
|
||||||
|
osz = pos - recbuf->data;
|
||||||
|
opos = recbuf->data;
|
||||||
|
}
|
||||||
|
return (BUFFEND);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This generates keys. It's only called in the first fsort pass
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
|
||||||
|
{
|
||||||
|
static u_char *line_data;
|
||||||
|
static ssize_t line_size;
|
||||||
|
static int overflow = 0;
|
||||||
|
|
||||||
|
/* We get re-entered after returning BUFFEND - save old data */
|
||||||
|
if (overflow) {
|
||||||
|
overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
|
||||||
|
return overflow ? BUFFEND : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
line_size = seq(fp, &line_data);
|
||||||
|
if (line_size == 0) {
|
||||||
|
FCLOSE(fp);
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line_size > bufend - recbuf->data) {
|
||||||
|
overflow = 1;
|
||||||
|
} else {
|
||||||
|
overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
|
||||||
|
}
|
||||||
|
return overflow ? BUFFEND : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get a line of input from fp
|
||||||
|
*/
|
||||||
|
static ssize_t
|
||||||
|
seq(FILE *fp, u_char **line)
|
||||||
|
{
|
||||||
|
static u_char *buf;
|
||||||
|
static size_t buf_size = DEFLLEN;
|
||||||
|
u_char *end, *pos;
|
||||||
|
int c;
|
||||||
|
u_char *new_buf;
|
||||||
|
|
||||||
|
if (!buf) {
|
||||||
|
/* one-time initialization */
|
||||||
|
buf = malloc(buf_size);
|
||||||
|
if (!buf)
|
||||||
|
err(2, "malloc of linebuf for %zu bytes failed",
|
||||||
|
buf_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
end = buf + buf_size;
|
||||||
|
pos = buf;
|
||||||
|
while ((c = getc(fp)) != EOF) {
|
||||||
|
*pos++ = c;
|
||||||
|
if (c == REC_D) {
|
||||||
|
*line = buf;
|
||||||
|
return pos - buf;
|
||||||
|
}
|
||||||
|
if (pos == end) {
|
||||||
|
/* Long line - double size of buffer */
|
||||||
|
/* XXX: Check here for stupidly long lines */
|
||||||
|
buf_size *= 2;
|
||||||
|
new_buf = realloc(buf, buf_size);
|
||||||
|
if (!new_buf)
|
||||||
|
err(2, "realloc of linebuf to %zu bytes failed",
|
||||||
|
buf_size);
|
||||||
|
|
||||||
|
end = new_buf + buf_size;
|
||||||
|
pos = new_buf + (pos - buf);
|
||||||
|
buf = new_buf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pos != buf) {
|
||||||
|
/* EOF part way through line - add line terminator */
|
||||||
|
*pos++ = REC_D;
|
||||||
|
*line = buf;
|
||||||
|
return pos - buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* write a key/line pair to a temporary file
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
putrec(const RECHEADER *rec, FILE *fp)
|
||||||
|
{
|
||||||
|
EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* write a line to output
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
putline(const RECHEADER *rec, FILE *fp)
|
||||||
|
{
|
||||||
|
EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* write dump of key to output (for -Dk)
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
putkeydump(const RECHEADER *rec, FILE *fp)
|
||||||
|
{
|
||||||
|
EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get a record from a temporary file. (Used by merge sort.)
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2)
|
||||||
|
{
|
||||||
|
length_t file_len;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
(void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]);
|
||||||
|
|
||||||
|
if ((u_char *)(rec + 1) > end)
|
||||||
|
return (BUFFEND);
|
||||||
|
if (!fread(&rec->length, 1, sizeof rec->length, fp)) {
|
||||||
|
fclose(fp);
|
||||||
|
return (EOF);
|
||||||
|
}
|
||||||
|
file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length);
|
||||||
|
if (end - rec->data < (ptrdiff_t)file_len) {
|
||||||
|
for (i = sizeof rec->length - 1; i >= 0; i--)
|
||||||
|
ungetc(*((char *) rec + i), fp);
|
||||||
|
return (BUFFEND);
|
||||||
|
}
|
||||||
|
|
||||||
|
fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp);
|
||||||
|
return (0);
|
||||||
|
}
|
214
usr.bin/sort/fsort.c
Normal file
214
usr.bin/sort/fsort.c
Normal file
|
@ -0,0 +1,214 @@
|
||||||
|
/* $NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Ben Harris and Jaromir Dolecek.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read in a block of records (until 'enough').
|
||||||
|
* sort, write to temp file.
|
||||||
|
* Merge sort temp files into output file
|
||||||
|
* Small files miss out the temp file stage.
|
||||||
|
* Large files might get multiple merges.
|
||||||
|
*/
|
||||||
|
#include "sort.h"
|
||||||
|
#include "fsort.h"
|
||||||
|
|
||||||
|
__RCSID("$NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $");
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
|
||||||
|
|
||||||
|
void
|
||||||
|
fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
|
||||||
|
{
|
||||||
|
RECHEADER **keylist;
|
||||||
|
RECHEADER **keypos, **keyp;
|
||||||
|
RECHEADER *buffer;
|
||||||
|
size_t bufsize = DEFBUFSIZE;
|
||||||
|
u_char *bufend;
|
||||||
|
int mfct = 0;
|
||||||
|
int c, nelem;
|
||||||
|
get_func_t get;
|
||||||
|
RECHEADER *crec;
|
||||||
|
RECHEADER *nbuffer;
|
||||||
|
FILE *fp, *tmp_fp;
|
||||||
|
int file_no;
|
||||||
|
int max_recs = DEBUG('m') ? 16 : MAXNUM;
|
||||||
|
|
||||||
|
buffer = allocrec(NULL, bufsize);
|
||||||
|
bufend = (u_char *)buffer + bufsize;
|
||||||
|
/* Allocate double length keymap for radix_sort */
|
||||||
|
keylist = malloc(2 * max_recs * sizeof(*keylist));
|
||||||
|
if (buffer == NULL || keylist == NULL)
|
||||||
|
err(2, "failed to malloc initial buffer or keylist");
|
||||||
|
|
||||||
|
if (SINGL_FLD)
|
||||||
|
/* Key and data are one! */
|
||||||
|
get = makeline;
|
||||||
|
else
|
||||||
|
/* Key (merged key fields) added before data */
|
||||||
|
get = makekey;
|
||||||
|
|
||||||
|
file_no = 0;
|
||||||
|
#if defined(__minix)
|
||||||
|
/* LSC FIXME: Not very pretty, but reduce the diff */
|
||||||
|
#include "pathnames.h"
|
||||||
|
if (!strcmp(filelist->names[0], _PATH_STDIN))
|
||||||
|
fp = stdin;
|
||||||
|
else
|
||||||
|
#endif /* defined(__minix) */
|
||||||
|
fp = fopen(filelist->names[0], "r");
|
||||||
|
if (fp == NULL)
|
||||||
|
err(2, "%s", filelist->names[0]);
|
||||||
|
|
||||||
|
/* Loop through reads of chunk of input files that get sorted
|
||||||
|
* and then merged together. */
|
||||||
|
for (;;) {
|
||||||
|
keypos = keylist;
|
||||||
|
nelem = 0;
|
||||||
|
crec = buffer;
|
||||||
|
makeline_copydown(crec);
|
||||||
|
|
||||||
|
/* Loop reading records */
|
||||||
|
for (;;) {
|
||||||
|
c = get(fp, crec, bufend, ftbl);
|
||||||
|
/* 'c' is 0, EOF or BUFFEND */
|
||||||
|
if (c == 0) {
|
||||||
|
/* Save start of key in input buffer */
|
||||||
|
*keypos++ = crec;
|
||||||
|
if (++nelem == max_recs) {
|
||||||
|
c = BUFFEND;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (c == EOF) {
|
||||||
|
/* try next file */
|
||||||
|
if (++file_no >= nfiles)
|
||||||
|
/* no more files */
|
||||||
|
break;
|
||||||
|
#if defined(__minix)
|
||||||
|
if (!strcmp(filelist->names[0], _PATH_STDIN))
|
||||||
|
fp = stdin;
|
||||||
|
else
|
||||||
|
#endif /* defined(__minix) */
|
||||||
|
fp = fopen(filelist->names[file_no], "r");
|
||||||
|
if (fp == NULL)
|
||||||
|
err(2, "%s", filelist->names[file_no]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (nelem >= max_recs
|
||||||
|
|| (bufsize >= MAXBUFSIZE && nelem > 8))
|
||||||
|
/* Need to sort and save this lot of data */
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* c == BUFFEND, and we can process more data */
|
||||||
|
/* Allocate a larger buffer for this lot of data */
|
||||||
|
bufsize *= 2;
|
||||||
|
nbuffer = allocrec(buffer, bufsize);
|
||||||
|
if (!nbuffer) {
|
||||||
|
err(2, "failed to realloc buffer to %zu bytes",
|
||||||
|
bufsize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* patch up keylist[] */
|
||||||
|
for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
|
||||||
|
*keyp = nbuffer + (*keyp - buffer);
|
||||||
|
|
||||||
|
crec = nbuffer + (crec - buffer);
|
||||||
|
buffer = nbuffer;
|
||||||
|
bufend = (u_char *)buffer + bufsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sort this set of records */
|
||||||
|
radix_sort(keylist, keylist + max_recs, nelem);
|
||||||
|
|
||||||
|
if (c == EOF && mfct == 0) {
|
||||||
|
/* all the data is (sorted) in the buffer */
|
||||||
|
append(keylist, nelem, outfp,
|
||||||
|
DEBUG('k') ? putkeydump : putline);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Save current data to a temporary file for a later merge */
|
||||||
|
if (nelem != 0) {
|
||||||
|
tmp_fp = ftmp();
|
||||||
|
append(keylist, nelem, tmp_fp, putrec);
|
||||||
|
save_for_merge(tmp_fp, geteasy, ftbl);
|
||||||
|
}
|
||||||
|
mfct = 1;
|
||||||
|
|
||||||
|
if (c == EOF) {
|
||||||
|
/* merge to output file */
|
||||||
|
merge_sort(outfp,
|
||||||
|
DEBUG('k') ? putkeydump : putline, ftbl);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(keylist);
|
||||||
|
keylist = NULL;
|
||||||
|
free(buffer);
|
||||||
|
buffer = NULL;
|
||||||
|
}
|
78
usr.bin/sort/fsort.h
Normal file
78
usr.bin/sort/fsort.h
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
/* $NetBSD: fsort.h,v 1.17 2009/09/26 21:16:55 dsl Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Ben Harris and Jaromir Dolecek.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)fsort.h 8.1 (Berkeley) 6/6/93
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define BUFSIZE (1<<20)
|
||||||
|
#define MAXNUM 131072 /* low guess at average record count */
|
||||||
|
#define BUFFEND (EOF-2)
|
||||||
|
#define MAXFCT 1000
|
||||||
|
#define DEFLLEN 65536
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Default (initial) and maximum size of record buffer for fsort().
|
||||||
|
* Note that no more than MAXNUM records are stored in the buffer,
|
||||||
|
* even if the buffer is not full yet.
|
||||||
|
*/
|
||||||
|
#define DEFBUFSIZE (1 << 20) /* 1MB */
|
||||||
|
#define MAXBUFSIZE (8 << 20) /* 10 MB */
|
448
usr.bin/sort/init.c
Normal file
448
usr.bin/sort/init.c
Normal file
|
@ -0,0 +1,448 @@
|
||||||
|
/* $NetBSD: init.c,v 1.28 2010/12/18 23:09:48 christos Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Ben Harris and Jaromir Dolecek.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "sort.h"
|
||||||
|
|
||||||
|
__RCSID("$NetBSD: init.c,v 1.28 2010/12/18 23:09:48 christos Exp $");
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
static void insertcol(struct field *);
|
||||||
|
static const char *setcolumn(const char *, struct field *);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* masks of ignored characters.
|
||||||
|
*/
|
||||||
|
static u_char dtable[NBINS], itable[NBINS];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* parsed key options
|
||||||
|
*/
|
||||||
|
struct coldesc *clist = NULL;
|
||||||
|
int ncols = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* clist (list of columns which correspond to one or more icol or tcol)
|
||||||
|
* is in increasing order of columns.
|
||||||
|
* Fields are kept in increasing order of fields.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* keep clist in order--inserts a column in a sorted array
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
insertcol(struct field *field)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct coldesc *p;
|
||||||
|
|
||||||
|
/* Make space for new item */
|
||||||
|
p = realloc(clist, (ncols + 2) * sizeof(*clist));
|
||||||
|
if (!p)
|
||||||
|
err(1, "realloc");
|
||||||
|
clist = p;
|
||||||
|
memset(&clist[ncols], 0, sizeof(clist[ncols]));
|
||||||
|
|
||||||
|
for (i = 0; i < ncols; i++)
|
||||||
|
if (field->icol.num <= clist[i].num)
|
||||||
|
break;
|
||||||
|
if (field->icol.num != clist[i].num) {
|
||||||
|
memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
|
||||||
|
clist[i].num = field->icol.num;
|
||||||
|
ncols++;
|
||||||
|
}
|
||||||
|
if (field->tcol.num && field->tcol.num != field->icol.num) {
|
||||||
|
for (i = 0; i < ncols; i++)
|
||||||
|
if (field->tcol.num <= clist[i].num)
|
||||||
|
break;
|
||||||
|
if (field->tcol.num != clist[i].num) {
|
||||||
|
memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
|
||||||
|
clist[i].num = field->tcol.num;
|
||||||
|
ncols++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* matches fields with the appropriate columns--n^2 but who cares?
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
fldreset(struct field *fldtab)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
fldtab[0].tcol.p = clist + ncols - 1;
|
||||||
|
for (++fldtab; fldtab->icol.num; ++fldtab) {
|
||||||
|
for (i = 0; fldtab->icol.num != clist[i].num; i++)
|
||||||
|
;
|
||||||
|
fldtab->icol.p = clist + i;
|
||||||
|
if (!fldtab->tcol.num)
|
||||||
|
continue;
|
||||||
|
for (i = 0; fldtab->tcol.num != clist[i].num; i++)
|
||||||
|
;
|
||||||
|
fldtab->tcol.p = clist + i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* interprets a column in a -k field
|
||||||
|
*/
|
||||||
|
static const char *
|
||||||
|
setcolumn(const char *pos, struct field *cur_fld)
|
||||||
|
{
|
||||||
|
struct column *col;
|
||||||
|
char *npos;
|
||||||
|
int tmp;
|
||||||
|
col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
|
||||||
|
col->num = (int) strtol(pos, &npos, 10);
|
||||||
|
pos = npos;
|
||||||
|
if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
|
||||||
|
errx(2, "field numbers must be positive");
|
||||||
|
if (*pos == '.') {
|
||||||
|
if (!col->num)
|
||||||
|
errx(2, "cannot indent end of line");
|
||||||
|
++pos;
|
||||||
|
col->indent = (int) strtol(pos, &npos, 10);
|
||||||
|
pos = npos;
|
||||||
|
if (&cur_fld->icol == col)
|
||||||
|
col->indent--;
|
||||||
|
if (col->indent < 0)
|
||||||
|
errx(2, "illegal offset");
|
||||||
|
}
|
||||||
|
for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
|
||||||
|
cur_fld->flags |= tmp;
|
||||||
|
if (cur_fld->icol.num == 0)
|
||||||
|
cur_fld->icol.num = 1;
|
||||||
|
return (pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
setfield(const char *pos, struct field *cur_fld, int gflag)
|
||||||
|
{
|
||||||
|
cur_fld->mask = NULL;
|
||||||
|
|
||||||
|
pos = setcolumn(pos, cur_fld);
|
||||||
|
if (*pos == '\0') /* key extends to EOL. */
|
||||||
|
cur_fld->tcol.num = 0;
|
||||||
|
else {
|
||||||
|
if (*pos != ',')
|
||||||
|
errx(2, "illegal field descriptor");
|
||||||
|
setcolumn((++pos), cur_fld);
|
||||||
|
}
|
||||||
|
if (!cur_fld->flags)
|
||||||
|
cur_fld->flags = gflag;
|
||||||
|
if (REVERSE)
|
||||||
|
/* A local 'r' doesn't invert the global one */
|
||||||
|
cur_fld->flags &= ~R;
|
||||||
|
|
||||||
|
/* Assign appropriate mask table and weight table. */
|
||||||
|
cur_fld->weights = weight_tables[cur_fld->flags & (R | F)];
|
||||||
|
if (cur_fld->flags & I)
|
||||||
|
cur_fld->mask = itable;
|
||||||
|
else if (cur_fld->flags & D)
|
||||||
|
cur_fld->mask = dtable;
|
||||||
|
|
||||||
|
cur_fld->flags |= (gflag & (BI | BT));
|
||||||
|
if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
|
||||||
|
cur_fld->flags &= ~BT;
|
||||||
|
|
||||||
|
if (cur_fld->tcol.num
|
||||||
|
&& !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
|
||||||
|
&& (cur_fld->tcol.num <= cur_fld->icol.num
|
||||||
|
/* indent if 0 -> end of field, i.e. okay */
|
||||||
|
&& cur_fld->tcol.indent != 0
|
||||||
|
&& cur_fld->tcol.indent < cur_fld->icol.indent))
|
||||||
|
errx(2, "fields out of order");
|
||||||
|
|
||||||
|
insertcol(cur_fld);
|
||||||
|
return (cur_fld->tcol.num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
optval(int desc, int tcolflag)
|
||||||
|
{
|
||||||
|
switch(desc) {
|
||||||
|
case 'b':
|
||||||
|
if (!tcolflag)
|
||||||
|
return BI;
|
||||||
|
else
|
||||||
|
return BT;
|
||||||
|
case 'd': return D;
|
||||||
|
case 'f': return F;
|
||||||
|
case 'i': return I;
|
||||||
|
case 'l': return L;
|
||||||
|
case 'n': return N;
|
||||||
|
case 'r': return R;
|
||||||
|
default: return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return true if the options found in ARG, according to the getopt
|
||||||
|
* spec in OPTS, require an additional argv word as an option
|
||||||
|
* argument.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
options_need_argument(const char *arg, const char *opts)
|
||||||
|
{
|
||||||
|
size_t pos;
|
||||||
|
const char *s;
|
||||||
|
|
||||||
|
/*assert(arg[0] == '-');*/
|
||||||
|
|
||||||
|
pos = 1;
|
||||||
|
while (arg[pos]) {
|
||||||
|
s = strchr(opts, arg[pos]);
|
||||||
|
if (s == NULL) {
|
||||||
|
/* invalid option */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (s[1] == ':') {
|
||||||
|
/* option requires argument */
|
||||||
|
if (arg[pos+1] == '\0') {
|
||||||
|
/* no argument in this arg */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* argument is in this arg; no more options */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Replace historic +SPEC arguments with appropriate -kSPEC.
|
||||||
|
*
|
||||||
|
* The form can be either a single +SPEC or a pair +SPEC -SPEC.
|
||||||
|
* The following -SPEC is not recognized unless it follows
|
||||||
|
* immediately.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
fixit(int *argc, char **argv, const char *opts)
|
||||||
|
{
|
||||||
|
int i, j, sawplus;
|
||||||
|
char *vpos, *tpos, spec[20];
|
||||||
|
int col, indent;
|
||||||
|
size_t sz;
|
||||||
|
|
||||||
|
sawplus = 0;
|
||||||
|
for (i = 1; i < *argc; i++) {
|
||||||
|
/*
|
||||||
|
* This loop must stop exactly where getopt will stop.
|
||||||
|
* Otherwise it turns e.g. "sort x +3" into "sort x
|
||||||
|
* -k4.1", which will croak if +3 was in fact really a
|
||||||
|
* file name. In order to do this reliably we need to
|
||||||
|
* be able to identify argv words that are option
|
||||||
|
* arguments.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (!strcmp(argv[i], "--")) {
|
||||||
|
/* End of options; stop. */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argv[i][0] == '+') {
|
||||||
|
/* +POS argument */
|
||||||
|
sawplus = 1;
|
||||||
|
} else if (argv[i][0] == '-' && sawplus &&
|
||||||
|
isdigit((unsigned char)argv[i][1])) {
|
||||||
|
/* -POS argument */
|
||||||
|
sawplus = 0;
|
||||||
|
} else if (argv[i][0] == '-') {
|
||||||
|
/* other option */
|
||||||
|
sawplus = 0;
|
||||||
|
if (options_need_argument(argv[i], opts)) {
|
||||||
|
/* skip over the argument */
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
/* not an option at all; stop */
|
||||||
|
sawplus = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At this point argv[i] is an old-style spec. The
|
||||||
|
* sawplus flag used by the above loop logic also
|
||||||
|
* tells us if it's a +SPEC or -SPEC.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* parse spec */
|
||||||
|
tpos = argv[i]+1;
|
||||||
|
col = (int)strtol(tpos, &tpos, 10);
|
||||||
|
if (*tpos == '.') {
|
||||||
|
++tpos;
|
||||||
|
indent = (int) strtol(tpos, &tpos, 10);
|
||||||
|
} else
|
||||||
|
indent = 0;
|
||||||
|
/* tpos now points to the optional flags */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In the traditional form, x.0 means beginning of line;
|
||||||
|
* in the new form, x.0 means end of line. Adjust the
|
||||||
|
* value of INDENT accordingly.
|
||||||
|
*/
|
||||||
|
if (sawplus) {
|
||||||
|
/* +POS */
|
||||||
|
col += 1;
|
||||||
|
indent += 1;
|
||||||
|
} else {
|
||||||
|
/* -POS */
|
||||||
|
if (indent > 0)
|
||||||
|
col += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* make the new style spec */
|
||||||
|
sz = snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
|
||||||
|
tpos);
|
||||||
|
|
||||||
|
if (sawplus) {
|
||||||
|
/* Replace the +POS argument with new-style -kSPEC */
|
||||||
|
asprintf(&vpos, "-k%s", spec);
|
||||||
|
argv[i] = vpos;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Append the spec to the one from the
|
||||||
|
* preceding +POS argument, and remove the
|
||||||
|
* current argv element entirely.
|
||||||
|
*/
|
||||||
|
asprintf(&vpos, "%s,%s", argv[i-1], spec);
|
||||||
|
free(argv[i-1]);
|
||||||
|
argv[i-1] = vpos;
|
||||||
|
for (j=i; j < *argc; j++)
|
||||||
|
argv[j] = argv[j+1];
|
||||||
|
*argc -= 1;
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ascii, Rascii, Ftable, and RFtable map
|
||||||
|
*
|
||||||
|
* Sorting 'weight' tables.
|
||||||
|
* Convert 'ascii' characters into their sort order.
|
||||||
|
* The 'F' variants fold lower case to upper equivalent
|
||||||
|
* The 'R' variants are for reverse sorting.
|
||||||
|
*
|
||||||
|
* The record separator (REC_D) never needs a weight, this frees one
|
||||||
|
* byte value as an 'end of key' marker. This must be 0 for normal
|
||||||
|
* weight tables, and 0xff for reverse weight tables - and is used
|
||||||
|
* to terminate keys so that short keys sort before (after if reverse)
|
||||||
|
* longer keys.
|
||||||
|
*
|
||||||
|
* The field separator has a normal weight - although it cannot occur
|
||||||
|
* within a key unless it is the default (space+tab).
|
||||||
|
*
|
||||||
|
* All other bytes map to the appropriate value for the sort order.
|
||||||
|
* Numeric sorts don't need any tables, they are reversed by negation.
|
||||||
|
*
|
||||||
|
* Global reverse sorts are done by writing the sorted keys in reverse
|
||||||
|
* order - the sort itself is stil forwards.
|
||||||
|
* This means that weights are only ever used when generating keys, any
|
||||||
|
* sort of the original data bytes is always forwards and unweighted.
|
||||||
|
*
|
||||||
|
* Note: this is only good for ASCII sorting. For different LC 's,
|
||||||
|
* all bets are off.
|
||||||
|
*
|
||||||
|
* itable[] and dtable[] are the masks for -i (ignore non-printables)
|
||||||
|
* and -d (only sort blank and alphanumerics).
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
settables(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int next_weight = 1;
|
||||||
|
int rev_weight = 254;
|
||||||
|
|
||||||
|
ascii[REC_D] = 0;
|
||||||
|
Rascii[REC_D] = 255;
|
||||||
|
Ftable[REC_D] = 0;
|
||||||
|
RFtable[REC_D] = 255;
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++) {
|
||||||
|
if (i == REC_D)
|
||||||
|
continue;
|
||||||
|
ascii[i] = next_weight;
|
||||||
|
Rascii[i] = rev_weight;
|
||||||
|
if (Ftable[i] == 0) {
|
||||||
|
Ftable[i] = next_weight;
|
||||||
|
RFtable[i] = rev_weight;
|
||||||
|
Ftable[tolower(i)] = next_weight;
|
||||||
|
RFtable[tolower(i)] = rev_weight;
|
||||||
|
}
|
||||||
|
next_weight++;
|
||||||
|
rev_weight--;
|
||||||
|
|
||||||
|
if (i == '\n' || isprint(i))
|
||||||
|
itable[i] = 1;
|
||||||
|
|
||||||
|
if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
|
||||||
|
dtable[i] = 1;
|
||||||
|
}
|
||||||
|
}
|
439
usr.bin/sort/msort.c
Normal file
439
usr.bin/sort/msort.c
Normal file
|
@ -0,0 +1,439 @@
|
||||||
|
/* $NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Ben Harris and Jaromir Dolecek.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "sort.h"
|
||||||
|
#include "fsort.h"
|
||||||
|
|
||||||
|
__RCSID("$NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $");
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <util.h>
|
||||||
|
|
||||||
|
/* Subroutines using comparisons: merge sort and check order */
|
||||||
|
#define DELETE (1)
|
||||||
|
|
||||||
|
typedef struct mfile {
|
||||||
|
FILE *fp;
|
||||||
|
get_func_t get;
|
||||||
|
RECHEADER *rec;
|
||||||
|
u_char *end;
|
||||||
|
} MFILE;
|
||||||
|
|
||||||
|
static int cmp(RECHEADER *, RECHEADER *);
|
||||||
|
static int insert(struct mfile **, struct mfile *, int, int);
|
||||||
|
static void merge_sort_fstack(FILE *, put_func_t, struct field *);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of files merge() can merge in one pass.
|
||||||
|
*/
|
||||||
|
#define MERGE_FNUM 16
|
||||||
|
|
||||||
|
static struct mfile fstack[MERGE_FNUM];
|
||||||
|
static struct mfile fstack_1[MERGE_FNUM];
|
||||||
|
static struct mfile fstack_2[MERGE_FNUM];
|
||||||
|
static int fstack_count, fstack_1_count, fstack_2_count;
|
||||||
|
|
||||||
|
void
|
||||||
|
save_for_merge(FILE *fp, get_func_t get, struct field *ftbl)
|
||||||
|
{
|
||||||
|
FILE *mfp, *mfp1, *mfp2;
|
||||||
|
|
||||||
|
if (fstack_count == MERGE_FNUM) {
|
||||||
|
/* Must reduce the number of temporary files */
|
||||||
|
mfp = ftmp();
|
||||||
|
merge_sort_fstack(mfp, putrec, ftbl);
|
||||||
|
/* Save output in next layer */
|
||||||
|
if (fstack_1_count == MERGE_FNUM) {
|
||||||
|
mfp1 = ftmp();
|
||||||
|
memcpy(fstack, fstack_1, sizeof fstack);
|
||||||
|
merge_sort_fstack(mfp1, putrec, ftbl);
|
||||||
|
if (fstack_2_count == MERGE_FNUM) {
|
||||||
|
/* More than 4096 files! */
|
||||||
|
mfp2 = ftmp();
|
||||||
|
memcpy(fstack, fstack_2, sizeof fstack);
|
||||||
|
merge_sort_fstack(mfp2, putrec, ftbl);
|
||||||
|
fstack_2[0].fp = mfp2;
|
||||||
|
fstack_2_count = 1;
|
||||||
|
}
|
||||||
|
fstack_2[fstack_2_count].fp = mfp1;
|
||||||
|
fstack_2[fstack_2_count].get = geteasy;
|
||||||
|
fstack_2_count++;
|
||||||
|
fstack_1_count = 0;
|
||||||
|
}
|
||||||
|
fstack_1[fstack_1_count].fp = mfp;
|
||||||
|
fstack_1[fstack_1_count].get = geteasy;
|
||||||
|
fstack_1_count++;
|
||||||
|
fstack_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
fstack[fstack_count].fp = fp;
|
||||||
|
fstack[fstack_count++].get = get;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
fmerge(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
|
||||||
|
{
|
||||||
|
get_func_t get = SINGL_FLD ? makeline : makekey;
|
||||||
|
FILE *fp;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < nfiles; i++) {
|
||||||
|
#if defined(__minix)
|
||||||
|
/* LSC FIXME: Not very pretty, but reduce the diff */
|
||||||
|
#include "pathnames.h"
|
||||||
|
if (!strcmp(filelist->names[0], _PATH_STDIN))
|
||||||
|
fp = stdin;
|
||||||
|
else
|
||||||
|
#endif /* defined(__minix) */
|
||||||
|
fp = fopen(filelist->names[i], "r");
|
||||||
|
if (fp == NULL)
|
||||||
|
err(2, "%s", filelist->names[i]);
|
||||||
|
save_for_merge(fp, get, ftbl);
|
||||||
|
}
|
||||||
|
|
||||||
|
merge_sort(outfp, putline, ftbl);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
merge_sort(FILE *outfp, put_func_t put, struct field *ftbl)
|
||||||
|
{
|
||||||
|
int count = fstack_1_count + fstack_2_count;
|
||||||
|
FILE *mfp;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (count == 0) {
|
||||||
|
/* All files in initial array */
|
||||||
|
merge_sort_fstack(outfp, put, ftbl);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
count += fstack_count;
|
||||||
|
|
||||||
|
/* Too many files for one merge sort */
|
||||||
|
for (;;) {
|
||||||
|
/* Sort latest 16 files */
|
||||||
|
i = count;
|
||||||
|
if (i > MERGE_FNUM)
|
||||||
|
i = MERGE_FNUM;
|
||||||
|
while (fstack_count > 0)
|
||||||
|
fstack[--i] = fstack[--fstack_count];
|
||||||
|
while (i > 0 && fstack_1_count > 0)
|
||||||
|
fstack[--i] = fstack_1[--fstack_1_count];
|
||||||
|
while (i > 0)
|
||||||
|
fstack[--i] = fstack_2[--fstack_2_count];
|
||||||
|
if (count <= MERGE_FNUM) {
|
||||||
|
/* Got all the data */
|
||||||
|
fstack_count = count;
|
||||||
|
merge_sort_fstack(outfp, put, ftbl);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mfp = ftmp();
|
||||||
|
fstack_count = count > MERGE_FNUM ? MERGE_FNUM : count;
|
||||||
|
merge_sort_fstack(mfp, putrec, ftbl);
|
||||||
|
fstack[0].fp = mfp;
|
||||||
|
fstack[0].get = geteasy;
|
||||||
|
fstack_count = 1;
|
||||||
|
count -= MERGE_FNUM - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
merge_sort_fstack(FILE *outfp, put_func_t put, struct field *ftbl)
|
||||||
|
{
|
||||||
|
struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
|
||||||
|
RECHEADER *new_rec;
|
||||||
|
u_char *new_end;
|
||||||
|
void *tmp;
|
||||||
|
int c, i, nfiles;
|
||||||
|
size_t sz;
|
||||||
|
|
||||||
|
/* Read one record from each file (read again if a duplicate) */
|
||||||
|
for (nfiles = i = 0; i < fstack_count; i++) {
|
||||||
|
cfile = &fstack[i];
|
||||||
|
if (cfile->rec == NULL) {
|
||||||
|
cfile->rec = allocrec(NULL, DEFLLEN);
|
||||||
|
cfile->end = (u_char *)cfile->rec + DEFLLEN;
|
||||||
|
}
|
||||||
|
rewind(cfile->fp);
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
c = cfile->get(cfile->fp, cfile->rec, cfile->end, ftbl);
|
||||||
|
if (c == EOF)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (c == BUFFEND) {
|
||||||
|
/* Double buffer size */
|
||||||
|
sz = (cfile->end - (u_char *)cfile->rec) * 2;
|
||||||
|
cfile->rec = allocrec(cfile->rec, sz);
|
||||||
|
cfile->end = (u_char *)cfile->rec + sz;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nfiles != 0) {
|
||||||
|
if (insert(flist, cfile, nfiles, !DELETE))
|
||||||
|
/* Duplicate removed */
|
||||||
|
continue;
|
||||||
|
} else
|
||||||
|
flist[0] = cfile;
|
||||||
|
nfiles++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nfiles == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We now loop reading a new record from the file with the
|
||||||
|
* 'sorted first' existing record.
|
||||||
|
* As each record is added, the 'first' record is written to the
|
||||||
|
* output file - maintaining one record from each file in the sorted
|
||||||
|
* list.
|
||||||
|
*/
|
||||||
|
new_rec = allocrec(NULL, DEFLLEN);
|
||||||
|
new_end = (u_char *)new_rec + DEFLLEN;
|
||||||
|
for (;;) {
|
||||||
|
cfile = flist[0];
|
||||||
|
c = cfile->get(cfile->fp, new_rec, new_end, ftbl);
|
||||||
|
if (c == EOF) {
|
||||||
|
/* Write out last record from now-empty input */
|
||||||
|
put(cfile->rec, outfp);
|
||||||
|
if (--nfiles == 0)
|
||||||
|
break;
|
||||||
|
/* Replace from file with now-first sorted record. */
|
||||||
|
/* (Moving base 'flist' saves copying everything!) */
|
||||||
|
flist++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (c == BUFFEND) {
|
||||||
|
/* Buffer not large enough - double in size */
|
||||||
|
sz = (new_end - (u_char *)new_rec) * 2;
|
||||||
|
new_rec = allocrec(new_rec, sz);
|
||||||
|
new_end = (u_char *)new_rec +sz;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Swap in new buffer, saving old */
|
||||||
|
tmp = cfile->rec;
|
||||||
|
cfile->rec = new_rec;
|
||||||
|
new_rec = tmp;
|
||||||
|
tmp = cfile->end;
|
||||||
|
cfile->end = new_end;
|
||||||
|
new_end = tmp;
|
||||||
|
|
||||||
|
/* Add into sort, removing the original first entry */
|
||||||
|
c = insert(flist, cfile, nfiles, DELETE);
|
||||||
|
if (c != 0 || (UNIQUE && cfile == flist[0]
|
||||||
|
&& cmp(new_rec, cfile->rec) == 0)) {
|
||||||
|
/* Was an unwanted duplicate, restore buffer */
|
||||||
|
tmp = cfile->rec;
|
||||||
|
cfile->rec = new_rec;
|
||||||
|
new_rec = tmp;
|
||||||
|
tmp = cfile->end;
|
||||||
|
cfile->end = new_end;
|
||||||
|
new_end = tmp;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Write out 'old' record */
|
||||||
|
put(new_rec, outfp);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(new_rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if delete: inserts rec in flist, deletes flist[0];
|
||||||
|
* otherwise just inserts *rec in flist.
|
||||||
|
* Returns 1 if record is a duplicate to be ignored.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
insert(struct mfile **flist, struct mfile *rec, int ttop, int delete)
|
||||||
|
{
|
||||||
|
int mid, top = ttop, bot = 0, cmpv = 1;
|
||||||
|
|
||||||
|
for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) {
|
||||||
|
cmpv = cmp(rec->rec, flist[mid]->rec);
|
||||||
|
if (cmpv == 0 ) {
|
||||||
|
if (UNIQUE)
|
||||||
|
/* Duplicate key, read another record */
|
||||||
|
/* NB: This doesn't guarantee to keep any
|
||||||
|
* particular record. */
|
||||||
|
return 1;
|
||||||
|
/*
|
||||||
|
* Apply sort by input file order.
|
||||||
|
* We could truncate the sort is the fileno are
|
||||||
|
* adjacent - but that is all too hard!
|
||||||
|
* The fileno cannot be equal, since we only have one
|
||||||
|
* record from each file (+ flist[0] which never
|
||||||
|
* comes here).
|
||||||
|
*/
|
||||||
|
cmpv = rec < flist[mid] ? -1 : 1;
|
||||||
|
if (REVERSE)
|
||||||
|
cmpv = -cmpv;
|
||||||
|
}
|
||||||
|
if (cmpv < 0)
|
||||||
|
top = mid;
|
||||||
|
else
|
||||||
|
bot = mid;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* At this point we haven't yet compared against flist[0] */
|
||||||
|
|
||||||
|
if (delete) {
|
||||||
|
/* flist[0] is ourselves, only the caller knows the old data */
|
||||||
|
if (bot != 0) {
|
||||||
|
memmove(flist, flist + 1, bot * sizeof(MFILE *));
|
||||||
|
flist[bot] = rec;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inserting original set of records */
|
||||||
|
|
||||||
|
if (bot == 0 && cmpv != 0) {
|
||||||
|
/* Doesn't match flist[1], must compare with flist[0] */
|
||||||
|
cmpv = cmp(rec->rec, flist[0]->rec);
|
||||||
|
if (cmpv == 0 && UNIQUE)
|
||||||
|
return 1;
|
||||||
|
/* Add matching keys in file order (ie new is later) */
|
||||||
|
if (cmpv < 0)
|
||||||
|
bot = -1;
|
||||||
|
}
|
||||||
|
bot++;
|
||||||
|
memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE *));
|
||||||
|
flist[bot] = rec;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* check order on one file
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
order(struct filelist *filelist, struct field *ftbl)
|
||||||
|
{
|
||||||
|
get_func_t get = SINGL_FLD ? makeline : makekey;
|
||||||
|
RECHEADER *crec, *prec, *trec;
|
||||||
|
u_char *crec_end, *prec_end, *trec_end;
|
||||||
|
FILE *fp;
|
||||||
|
int c;
|
||||||
|
|
||||||
|
#if defined(__minix)
|
||||||
|
if (!strcmp(filelist->names[0], _PATH_STDIN))
|
||||||
|
fp = stdin;
|
||||||
|
else
|
||||||
|
#endif /* defined(__minix) */
|
||||||
|
fp = fopen(filelist->names[0], "r");
|
||||||
|
if (fp == NULL)
|
||||||
|
err(2, "%s", filelist->names[0]);
|
||||||
|
|
||||||
|
crec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
|
||||||
|
crec_end = crec->data + DEFLLEN;
|
||||||
|
prec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
|
||||||
|
prec_end = prec->data + DEFLLEN;
|
||||||
|
|
||||||
|
/* XXX this does exit(0) for overlong lines */
|
||||||
|
if (get(fp, prec, prec_end, ftbl) != 0)
|
||||||
|
exit(0);
|
||||||
|
while (get(fp, crec, crec_end, ftbl) == 0) {
|
||||||
|
if (0 < (c = cmp(prec, crec))) {
|
||||||
|
crec->data[crec->length-1] = 0;
|
||||||
|
errx(1, "found disorder: %s", crec->data+crec->offset);
|
||||||
|
}
|
||||||
|
if (UNIQUE && !c) {
|
||||||
|
crec->data[crec->length-1] = 0;
|
||||||
|
errx(1, "found non-uniqueness: %s",
|
||||||
|
crec->data+crec->offset);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Swap pointers so that this record is on place pointed
|
||||||
|
* to by prec and new record is read to place pointed to by
|
||||||
|
* crec.
|
||||||
|
*/
|
||||||
|
trec = prec;
|
||||||
|
prec = crec;
|
||||||
|
crec = trec;
|
||||||
|
trec_end = prec_end;
|
||||||
|
prec_end = crec_end;
|
||||||
|
crec_end = trec_end;
|
||||||
|
}
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
cmp(RECHEADER *rec1, RECHEADER *rec2)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
/* key is weights */
|
||||||
|
len = min(rec1->keylen, rec2->keylen);
|
||||||
|
r = memcmp(rec1->data, rec2->data, len);
|
||||||
|
if (r == 0)
|
||||||
|
r = rec1->keylen - rec2->keylen;
|
||||||
|
if (REVERSE)
|
||||||
|
r = -r;
|
||||||
|
return r;
|
||||||
|
}
|
66
usr.bin/sort/pathnames.h
Normal file
66
usr.bin/sort/pathnames.h
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
/* $NetBSD: pathnames.h,v 1.6 2008/04/28 20:24:15 martin Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Ben Harris and Jaromir Dolecek.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)pathnames.h 8.1 (Berkeley) 6/6/93
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _PATH_STDIN "/dev/stdin"
|
217
usr.bin/sort/radix_sort.c
Normal file
217
usr.bin/sort/radix_sort.c
Normal file
|
@ -0,0 +1,217 @@
|
||||||
|
/* $NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1990, 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy and by Dan Bernstein at New York University,
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/cdefs.h>
|
||||||
|
#if defined(LIBC_SCCS) && !defined(lint)
|
||||||
|
#if 0
|
||||||
|
static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95";
|
||||||
|
#else
|
||||||
|
__RCSID("$NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $");
|
||||||
|
#endif
|
||||||
|
#endif /* LIBC_SCCS and not lint */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 'stable' radix sort initially from libc/stdlib/radixsort.c
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <util.h>
|
||||||
|
#include "sort.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
RECHEADER **sa; /* Base of saved area */
|
||||||
|
int sn; /* Number of entries */
|
||||||
|
int si; /* index into data for compare */
|
||||||
|
} stack;
|
||||||
|
|
||||||
|
static void simplesort(RECHEADER **, int, int);
|
||||||
|
|
||||||
|
#define THRESHOLD 20 /* Divert to simplesort(). */
|
||||||
|
|
||||||
|
#define empty(s) (s >= sp)
|
||||||
|
#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
|
||||||
|
#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
|
||||||
|
#define swap(a, b, t) t = a, a = b, b = t
|
||||||
|
|
||||||
|
void
|
||||||
|
radix_sort(RECHEADER **a, RECHEADER **ta, int n)
|
||||||
|
{
|
||||||
|
u_int count[256], nc, bmin;
|
||||||
|
u_int c;
|
||||||
|
RECHEADER **ak, **tai, **lim;
|
||||||
|
RECHEADER *hdr;
|
||||||
|
int stack_size = 512;
|
||||||
|
stack *s, *sp, *sp0, *sp1, temp;
|
||||||
|
RECHEADER **top[256];
|
||||||
|
u_int *cp, bigc;
|
||||||
|
int data_index = 0;
|
||||||
|
|
||||||
|
if (n < THRESHOLD && !DEBUG('r')) {
|
||||||
|
simplesort(a, n, 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
s = emalloc(stack_size * sizeof *s);
|
||||||
|
memset(&count, 0, sizeof count);
|
||||||
|
/* Technically 'top' doesn't need zeroing */
|
||||||
|
memset(&top, 0, sizeof top);
|
||||||
|
|
||||||
|
sp = s;
|
||||||
|
push(a, n, data_index);
|
||||||
|
while (!empty(s)) {
|
||||||
|
pop(a, n, data_index);
|
||||||
|
if (n < THRESHOLD && !DEBUG('r')) {
|
||||||
|
simplesort(a, n, data_index);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Count number of times each 'next byte' occurs */
|
||||||
|
nc = 0;
|
||||||
|
bmin = 255;
|
||||||
|
lim = a + n;
|
||||||
|
for (ak = a, tai = ta; ak < lim; ak++) {
|
||||||
|
hdr = *ak;
|
||||||
|
if (data_index >= hdr->keylen) {
|
||||||
|
/* Short key, copy to start of output */
|
||||||
|
if (UNIQUE && a != sp->sa)
|
||||||
|
/* Stop duplicate being written out */
|
||||||
|
hdr->keylen = -1;
|
||||||
|
*a++ = hdr;
|
||||||
|
n--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* Save in temp buffer for distribute */
|
||||||
|
*tai++ = hdr;
|
||||||
|
c = hdr->data[data_index];
|
||||||
|
if (++count[c] == 1) {
|
||||||
|
if (c < bmin)
|
||||||
|
bmin = c;
|
||||||
|
nc++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* We need save the bounds for each 'next byte' that
|
||||||
|
* occurs more so we can sort each block.
|
||||||
|
*/
|
||||||
|
if (sp + nc > s + stack_size) {
|
||||||
|
stack_size *= 2;
|
||||||
|
sp1 = erealloc(s, stack_size * sizeof *s);
|
||||||
|
sp = sp1 + (sp - s);
|
||||||
|
s = sp1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Minor optimisation to do the largest set last */
|
||||||
|
sp0 = sp1 = sp;
|
||||||
|
bigc = 2;
|
||||||
|
/* Convert 'counts' positions, saving bounds for later sorts */
|
||||||
|
ak = a;
|
||||||
|
for (cp = count + bmin; nc > 0; cp++) {
|
||||||
|
while (*cp == 0)
|
||||||
|
cp++;
|
||||||
|
if ((c = *cp) > 1) {
|
||||||
|
if (c > bigc) {
|
||||||
|
bigc = c;
|
||||||
|
sp1 = sp;
|
||||||
|
}
|
||||||
|
push(ak, c, data_index+1);
|
||||||
|
}
|
||||||
|
ak += c;
|
||||||
|
top[cp-count] = ak;
|
||||||
|
*cp = 0; /* Reset count[]. */
|
||||||
|
nc--;
|
||||||
|
}
|
||||||
|
swap(*sp0, *sp1, temp);
|
||||||
|
|
||||||
|
for (ak = ta+n; --ak >= ta;) /* Deal to piles. */
|
||||||
|
*--top[(*ak)->data[data_index]] = *ak;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* insertion sort, short records are sorted before long ones */
|
||||||
|
static void
|
||||||
|
simplesort(RECHEADER **a, int n, int data_index)
|
||||||
|
{
|
||||||
|
RECHEADER **ak, **ai;
|
||||||
|
RECHEADER *akh;
|
||||||
|
RECHEADER **lim = a + n;
|
||||||
|
const u_char *s, *t;
|
||||||
|
int s_len, t_len;
|
||||||
|
int i;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (n <= 1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (ak = a+1; ak < lim; ak++) {
|
||||||
|
akh = *ak;
|
||||||
|
s = akh->data;
|
||||||
|
s_len = akh->keylen;
|
||||||
|
for (ai = ak; ;) {
|
||||||
|
ai--;
|
||||||
|
t_len = (*ai)->keylen;
|
||||||
|
if (t_len != -1) {
|
||||||
|
t = (*ai)->data;
|
||||||
|
for (i = data_index; ; i++) {
|
||||||
|
if (i >= s_len || i >= t_len) {
|
||||||
|
r = s_len - t_len;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
r = s[i] - t[i];
|
||||||
|
if (r != 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (r >= 0) {
|
||||||
|
if (r == 0 && UNIQUE) {
|
||||||
|
/* Put record below existing */
|
||||||
|
ai[1] = ai[0];
|
||||||
|
/* Mark as duplicate - ignore */
|
||||||
|
akh->keylen = -1;
|
||||||
|
} else {
|
||||||
|
ai++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ai[1] = ai[0];
|
||||||
|
if (ai == a)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ai[0] = akh;
|
||||||
|
}
|
||||||
|
}
|
462
usr.bin/sort/sort.1
Normal file
462
usr.bin/sort/sort.1
Normal file
|
@ -0,0 +1,462 @@
|
||||||
|
.\" $NetBSD: sort.1,v 1.32 2010/12/18 23:36:23 wiz Exp $
|
||||||
|
.\"
|
||||||
|
.\" Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
.\" All rights reserved.
|
||||||
|
.\"
|
||||||
|
.\" This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
.\" by Ben Harris and Jaromir Dolecek.
|
||||||
|
.\"
|
||||||
|
.\" Redistribution and use in source and binary forms, with or without
|
||||||
|
.\" modification, are permitted provided that the following conditions
|
||||||
|
.\" are met:
|
||||||
|
.\" 1. Redistributions of source code must retain the above copyright
|
||||||
|
.\" notice, this list of conditions and the following disclaimer.
|
||||||
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
.\" notice, this list of conditions and the following disclaimer in the
|
||||||
|
.\" documentation and/or other materials provided with the distribution.
|
||||||
|
.\"
|
||||||
|
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
.\" POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
.\"
|
||||||
|
.\" Copyright (c) 1991, 1993
|
||||||
|
.\" The Regents of the University of California. All rights reserved.
|
||||||
|
.\"
|
||||||
|
.\" This code is derived from software contributed to Berkeley by
|
||||||
|
.\" the Institute of Electrical and Electronics Engineers, Inc.
|
||||||
|
.\"
|
||||||
|
.\" Redistribution and use in source and binary forms, with or without
|
||||||
|
.\" modification, are permitted provided that the following conditions
|
||||||
|
.\" are met:
|
||||||
|
.\" 1. Redistributions of source code must retain the above copyright
|
||||||
|
.\" notice, this list of conditions and the following disclaimer.
|
||||||
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
.\" notice, this list of conditions and the following disclaimer in the
|
||||||
|
.\" documentation and/or other materials provided with the distribution.
|
||||||
|
.\" 3. Neither the name of the University nor the names of its contributors
|
||||||
|
.\" may be used to endorse or promote products derived from this software
|
||||||
|
.\" without specific prior written permission.
|
||||||
|
.\"
|
||||||
|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
.\" SUCH DAMAGE.
|
||||||
|
.\"
|
||||||
|
.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
|
||||||
|
.\"
|
||||||
|
.Dd December 18, 2010
|
||||||
|
.Dt SORT 1
|
||||||
|
.Os
|
||||||
|
.Sh NAME
|
||||||
|
.Nm sort
|
||||||
|
.Nd sort or merge text files
|
||||||
|
.Sh SYNOPSIS
|
||||||
|
.Nm sort
|
||||||
|
.Op Fl bcdfHilmnrSsu
|
||||||
|
.Oo
|
||||||
|
.Fl k
|
||||||
|
.Ar field1 Ns Op Li \&, Ns Ar field2
|
||||||
|
.Oc
|
||||||
|
.Op Fl o Ar output
|
||||||
|
.Op Fl R Ar char
|
||||||
|
.Op Fl T Ar dir
|
||||||
|
.Op Fl t Ar char
|
||||||
|
.Op Ar
|
||||||
|
.Sh DESCRIPTION
|
||||||
|
The
|
||||||
|
.Nm
|
||||||
|
utility sorts text files by lines.
|
||||||
|
Comparisons are based on one or more sort keys extracted
|
||||||
|
from each line of input, and are performed lexicographically.
|
||||||
|
By default, if keys are not given,
|
||||||
|
.Nm
|
||||||
|
regards each input line as a single field.
|
||||||
|
.Pp
|
||||||
|
The following options are available:
|
||||||
|
.Bl -tag -width Fl
|
||||||
|
.It Fl c
|
||||||
|
Check that the single input file is sorted.
|
||||||
|
If the file is not sorted,
|
||||||
|
.Nm
|
||||||
|
produces the appropriate error messages and exits with code 1; otherwise,
|
||||||
|
.Nm
|
||||||
|
returns 0.
|
||||||
|
.Nm
|
||||||
|
.Fl c
|
||||||
|
produces no output.
|
||||||
|
.It Fl H
|
||||||
|
Ignored for compatibility with earlier versions of
|
||||||
|
.Nm .
|
||||||
|
.It Fl m
|
||||||
|
Merge only; the input files are assumed to be pre-sorted.
|
||||||
|
.It Fl o Ar output
|
||||||
|
The argument given is the name of an
|
||||||
|
.Ar output
|
||||||
|
file to be used instead of the standard output.
|
||||||
|
This file can be the same as one of the input files.
|
||||||
|
.It Fl S
|
||||||
|
Don't use stable sort.
|
||||||
|
Default is to use stable sort.
|
||||||
|
.It Fl s
|
||||||
|
Use stable sort, keeps records with equal keys in their original order.
|
||||||
|
This is the default.
|
||||||
|
Provided for compatibility with other
|
||||||
|
.Nm
|
||||||
|
implementations only.
|
||||||
|
.It Fl T Ar dir
|
||||||
|
Use
|
||||||
|
.Ar dir
|
||||||
|
as the directory for temporary files.
|
||||||
|
The default is the value specified in the environment variable
|
||||||
|
.Ev TMPDIR or
|
||||||
|
.Pa /tmp
|
||||||
|
if
|
||||||
|
.Ev TMPDIR
|
||||||
|
is not defined.
|
||||||
|
.It Fl u
|
||||||
|
Unique: suppress all but one in each set of lines having equal keys.
|
||||||
|
If used with the
|
||||||
|
.Fl c
|
||||||
|
option, check that there are no lines with duplicate keys.
|
||||||
|
.El
|
||||||
|
.Pp
|
||||||
|
The following options override the default ordering rules.
|
||||||
|
When ordering options appear independent of key field
|
||||||
|
specifications, the requested field ordering rules are
|
||||||
|
applied globally to all sort keys.
|
||||||
|
When attached to a specific key (see
|
||||||
|
.Fl k ) ,
|
||||||
|
the ordering options override
|
||||||
|
all global ordering options for that key.
|
||||||
|
.Bl -tag -width Fl
|
||||||
|
.It Fl d
|
||||||
|
Only blank space and alphanumeric characters
|
||||||
|
.\" according
|
||||||
|
.\" to the current setting of LC_CTYPE
|
||||||
|
are used
|
||||||
|
in making comparisons.
|
||||||
|
.It Fl f
|
||||||
|
Considers all lowercase characters that have uppercase
|
||||||
|
equivalents to be the same for purposes of comparison.
|
||||||
|
.It Fl i
|
||||||
|
Ignore all non-printable characters.
|
||||||
|
.It Fl l
|
||||||
|
Sort by the string length of the field, not by the field itself.
|
||||||
|
.It Fl n
|
||||||
|
An initial numeric string, consisting of optional blank space, optional
|
||||||
|
minus sign, and zero or more digits (including decimal point)
|
||||||
|
.\" with
|
||||||
|
.\" optional radix character and thousands
|
||||||
|
.\" separator
|
||||||
|
.\" (as defined in the current locale),
|
||||||
|
is sorted by arithmetic value.
|
||||||
|
(The
|
||||||
|
.Fl n
|
||||||
|
option no longer implies the
|
||||||
|
.Fl b
|
||||||
|
option.)
|
||||||
|
.It Fl r
|
||||||
|
Reverse the sense of comparisons.
|
||||||
|
.El
|
||||||
|
.Pp
|
||||||
|
The treatment of field separators can be altered using these options:
|
||||||
|
.Bl -tag -width Fl
|
||||||
|
.It Fl b
|
||||||
|
Ignores leading blank space when determining the start
|
||||||
|
and end of a restricted sort key.
|
||||||
|
A
|
||||||
|
.Fl b
|
||||||
|
option specified before the first
|
||||||
|
.Fl k
|
||||||
|
option applies globally to all
|
||||||
|
.Fl k
|
||||||
|
options.
|
||||||
|
Otherwise, the
|
||||||
|
.Fl b
|
||||||
|
option can be attached independently to each
|
||||||
|
.Ar field
|
||||||
|
argument of the
|
||||||
|
.Fl k
|
||||||
|
option (see below).
|
||||||
|
Note that the
|
||||||
|
.Fl b
|
||||||
|
option has no effect unless key fields are specified.
|
||||||
|
.It Fl t Ar char
|
||||||
|
.Ar char
|
||||||
|
is used as the field separator character.
|
||||||
|
The initial
|
||||||
|
.Ar char
|
||||||
|
is not considered to be part of a field when determining
|
||||||
|
key offsets (see below).
|
||||||
|
Each occurrence of
|
||||||
|
.Ar char
|
||||||
|
is significant (for example,
|
||||||
|
.Dq Ar charchar
|
||||||
|
delimits an empty field).
|
||||||
|
If
|
||||||
|
.Fl t
|
||||||
|
is not specified, the default field separator is a sequence of
|
||||||
|
blank-space characters, and consecutive blank spaces do
|
||||||
|
.Em not
|
||||||
|
delimit an empty field; further, the initial blank space
|
||||||
|
.Em is
|
||||||
|
considered part of a field when determining key offsets.
|
||||||
|
.It Fl R Ar char
|
||||||
|
.Ar char
|
||||||
|
is used as the record separator character.
|
||||||
|
This should be used with discretion;
|
||||||
|
.Fl R Ar \*[Lt]alphanumeric\*[Gt]
|
||||||
|
usually produces undesirable results.
|
||||||
|
The default record separator is newline.
|
||||||
|
.It Fl k Ar field1 Ns Op Li \&, Ns Ar field2
|
||||||
|
Designates the starting position,
|
||||||
|
.Ar field1 ,
|
||||||
|
and optional ending position,
|
||||||
|
.Ar field2 ,
|
||||||
|
of a key field.
|
||||||
|
The
|
||||||
|
.Fl k
|
||||||
|
option replaces the obsolescent options
|
||||||
|
.Cm \(pl Ns Ar pos1
|
||||||
|
and
|
||||||
|
.Fl Ns Ar pos2 .
|
||||||
|
.El
|
||||||
|
.Pp
|
||||||
|
The following operands are available:
|
||||||
|
.Bl -tag -width Ar
|
||||||
|
.It Ar file
|
||||||
|
The pathname of a file to be sorted, merged, or checked.
|
||||||
|
If no
|
||||||
|
.Ar file
|
||||||
|
operands are specified, or if
|
||||||
|
a
|
||||||
|
.Ar file
|
||||||
|
operand is
|
||||||
|
.Fl ,
|
||||||
|
the standard input is used.
|
||||||
|
.El
|
||||||
|
.Pp
|
||||||
|
A field is defined as a minimal sequence of characters followed by a
|
||||||
|
field separator or a newline character.
|
||||||
|
By default, the first
|
||||||
|
blank space of a sequence of blank spaces acts as the field separator.
|
||||||
|
All blank spaces in a sequence of blank spaces are considered
|
||||||
|
as part of the next field; for example, all blank spaces at
|
||||||
|
the beginning of a line are considered to be part of the
|
||||||
|
first field.
|
||||||
|
.Pp
|
||||||
|
Fields are specified
|
||||||
|
by the
|
||||||
|
.Fl k
|
||||||
|
.Ar field1 Ns Op \&, Ns Ar field2
|
||||||
|
argument.
|
||||||
|
A missing
|
||||||
|
.Ar field2
|
||||||
|
argument defaults to the end of a line.
|
||||||
|
.Pp
|
||||||
|
The arguments
|
||||||
|
.Ar field1
|
||||||
|
and
|
||||||
|
.Ar field2
|
||||||
|
have the form
|
||||||
|
.Ar m Ns Li \&. Ns Ar n
|
||||||
|
and can be followed by one or more of the letters
|
||||||
|
.Cm b , d , f , i ,
|
||||||
|
.Cm l , n ,
|
||||||
|
and
|
||||||
|
.Cm r ,
|
||||||
|
which correspond to the options discussed above.
|
||||||
|
A
|
||||||
|
.Ar field1
|
||||||
|
position specified by
|
||||||
|
.Ar m Ns Li \&. Ns Ar n
|
||||||
|
.Pq Ar m , n No \*[Gt] 0
|
||||||
|
is interpreted as the
|
||||||
|
.Ar n Ns th
|
||||||
|
character in the
|
||||||
|
.Ar m Ns th
|
||||||
|
field.
|
||||||
|
A missing
|
||||||
|
.Li \&. Ns Ar n
|
||||||
|
in
|
||||||
|
.Ar field1
|
||||||
|
means
|
||||||
|
.Ql \&.1 ,
|
||||||
|
indicating the first character of the
|
||||||
|
.Ar m Ns th
|
||||||
|
field; if the
|
||||||
|
.Fl b
|
||||||
|
option is in effect,
|
||||||
|
.Ar n
|
||||||
|
is counted from the first non-blank character in the
|
||||||
|
.Ar m Ns th
|
||||||
|
field;
|
||||||
|
.Ar m Ns Li \&.1b
|
||||||
|
refers to the first non-blank character in the
|
||||||
|
.Ar m Ns th
|
||||||
|
field.
|
||||||
|
.Pp
|
||||||
|
A
|
||||||
|
.Ar field2
|
||||||
|
position specified by
|
||||||
|
.Ar m Ns Li \&. Ns Ar n
|
||||||
|
is interpreted as
|
||||||
|
the
|
||||||
|
.Ar n Ns th
|
||||||
|
character (including separators) of the
|
||||||
|
.Ar m Ns th
|
||||||
|
field.
|
||||||
|
A missing
|
||||||
|
.Li \&. Ns Ar n
|
||||||
|
indicates the last character of the
|
||||||
|
.Ar m Ns th
|
||||||
|
field;
|
||||||
|
.Ar m
|
||||||
|
= \&0
|
||||||
|
designates the end of a line.
|
||||||
|
Thus the option
|
||||||
|
.Fl k
|
||||||
|
.Sm off
|
||||||
|
.Xo
|
||||||
|
.Ar v Li \&. Ar x Li \&,
|
||||||
|
.Ar w Li \&. Ar y
|
||||||
|
.Xc
|
||||||
|
.Sm on
|
||||||
|
is synonymous with the obsolescent option
|
||||||
|
.Sm off
|
||||||
|
.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
|
||||||
|
.Fl Ar w-\&1 Li \&. Ar y ;
|
||||||
|
.Sm on
|
||||||
|
when
|
||||||
|
.Ar y
|
||||||
|
is omitted,
|
||||||
|
.Fl k
|
||||||
|
.Sm off
|
||||||
|
.Ar v Li \&. Ar x Li \&, Ar w
|
||||||
|
.Sm on
|
||||||
|
is synonymous with
|
||||||
|
.Sm off
|
||||||
|
.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
|
||||||
|
.Fl Ar w+1 Li \&.0 .
|
||||||
|
.Sm on
|
||||||
|
The obsolescent
|
||||||
|
.Cm \(pl Ns Ar pos1
|
||||||
|
.Fl Ns Ar pos2
|
||||||
|
option is still supported, except for
|
||||||
|
.Fl Ns Ar w Ns Li \&.0b ,
|
||||||
|
which has no
|
||||||
|
.Fl k
|
||||||
|
equivalent.
|
||||||
|
.Sh ENVIRONMENT
|
||||||
|
If the following environment variable exists, it is used by
|
||||||
|
.Nm .
|
||||||
|
.Bl -tag -width Ev
|
||||||
|
.It Ev TMPDIR
|
||||||
|
.Nm
|
||||||
|
uses the contents of the
|
||||||
|
.Ev TMPDIR
|
||||||
|
environment variable as the path in which to store
|
||||||
|
temporary files.
|
||||||
|
.El
|
||||||
|
.Sh FILES
|
||||||
|
.Bl -tag -width outputNUMBER+some -compact
|
||||||
|
.It Pa /tmp/sort.*
|
||||||
|
Default temporary files.
|
||||||
|
.It Ar output Ns NUMBER
|
||||||
|
Temporary file which is used for output if
|
||||||
|
.Ar output
|
||||||
|
already exists.
|
||||||
|
Once sorting is finished, this file replaces
|
||||||
|
.Ar output
|
||||||
|
(via
|
||||||
|
.Xr link 2
|
||||||
|
and
|
||||||
|
.Xr unlink 2 ) .
|
||||||
|
.El
|
||||||
|
.Sh EXIT STATUS
|
||||||
|
Sort exits with one of the following values:
|
||||||
|
.Bl -tag -width flag -compact
|
||||||
|
.It 0
|
||||||
|
Normal behavior.
|
||||||
|
.It 1
|
||||||
|
On disorder (or non-uniqueness) with the
|
||||||
|
.Fl c
|
||||||
|
option
|
||||||
|
.It 2
|
||||||
|
An error occurred.
|
||||||
|
.El
|
||||||
|
.Sh SEE ALSO
|
||||||
|
.Xr comm 1 ,
|
||||||
|
.Xr join 1 ,
|
||||||
|
.Xr uniq 1 ,
|
||||||
|
.Xr qsort 3 ,
|
||||||
|
.Xr radixsort 3
|
||||||
|
.Sh HISTORY
|
||||||
|
A
|
||||||
|
.Nm
|
||||||
|
command appeared in
|
||||||
|
.At v5 .
|
||||||
|
This
|
||||||
|
.Nm
|
||||||
|
implementation appeared in
|
||||||
|
.Bx 4.4
|
||||||
|
and is used since
|
||||||
|
.Nx 1.6 .
|
||||||
|
.Sh BUGS
|
||||||
|
Posix requires the locale's thousands separator be ignored in numbers.
|
||||||
|
It may be faster to sort very large files in pieces and then explicitly
|
||||||
|
merge them.
|
||||||
|
.Sh NOTES
|
||||||
|
This
|
||||||
|
.Nm
|
||||||
|
has no limits on input line length (other than imposed by available
|
||||||
|
memory) or any restrictions on bytes allowed within lines.
|
||||||
|
.Pp
|
||||||
|
To protect data
|
||||||
|
.Nm
|
||||||
|
.Fl o
|
||||||
|
calls
|
||||||
|
.Xr link 2
|
||||||
|
and
|
||||||
|
.Xr unlink 2 ,
|
||||||
|
and thus fails on protected directories.
|
||||||
|
.Pp
|
||||||
|
Input files should be text files.
|
||||||
|
If file doesn't end with record separator (which is typically newline), the
|
||||||
|
.Nm
|
||||||
|
utility silently supplies one.
|
||||||
|
.Pp
|
||||||
|
The current
|
||||||
|
.Nm
|
||||||
|
uses lexicographic radix sorting, which requires
|
||||||
|
that sort keys be kept in memory (as opposed to previous versions which used quick
|
||||||
|
and merge sorts and did not.)
|
||||||
|
Thus performance depends highly on efficient choice of sort keys, and the
|
||||||
|
.Fl b
|
||||||
|
option and the
|
||||||
|
.Ar field2
|
||||||
|
argument of the
|
||||||
|
.Fl k
|
||||||
|
option should be used whenever possible.
|
||||||
|
Similarly,
|
||||||
|
.Nm
|
||||||
|
.Fl k1f
|
||||||
|
is equivalent to
|
||||||
|
.Nm
|
||||||
|
.Fl f
|
||||||
|
and may take twice as long.
|
419
usr.bin/sort/sort.c
Normal file
419
usr.bin/sort/sort.c
Normal file
|
@ -0,0 +1,419 @@
|
||||||
|
/* $NetBSD: sort.c,v 1.61 2011/09/16 15:39:29 joerg Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Ben Harris and Jaromir Dolecek.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Sort sorts a file using an optional user-defined key.
|
||||||
|
* Sort uses radix sort for internal sorting, and allows
|
||||||
|
* a choice of merge sort and radix sort for external sorting.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <util.h>
|
||||||
|
#include "sort.h"
|
||||||
|
#include "fsort.h"
|
||||||
|
#include "pathnames.h"
|
||||||
|
|
||||||
|
#ifndef lint
|
||||||
|
__COPYRIGHT("@(#) Copyright (c) 1993\
|
||||||
|
The Regents of the University of California. All rights reserved.");
|
||||||
|
#endif /* not lint */
|
||||||
|
|
||||||
|
__RCSID("$NetBSD: sort.c,v 1.61 2011/09/16 15:39:29 joerg Exp $");
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
|
|
||||||
|
#include <paths.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <locale.h>
|
||||||
|
|
||||||
|
int REC_D = '\n';
|
||||||
|
u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* weight tables. Gweights is one of ascii, Rascii..
|
||||||
|
* modified to weight rec_d = 0 (or 255)
|
||||||
|
*/
|
||||||
|
u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable };
|
||||||
|
u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
|
||||||
|
|
||||||
|
int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
|
||||||
|
int REVERSE = 0;
|
||||||
|
int posix_sort;
|
||||||
|
|
||||||
|
unsigned int debug_flags = 0;
|
||||||
|
|
||||||
|
static char toutpath[MAXPATHLEN];
|
||||||
|
|
||||||
|
const char *tmpdir; /* where temporary files should be put */
|
||||||
|
|
||||||
|
static void cleanup(void);
|
||||||
|
static void onsignal(int);
|
||||||
|
__dead static void usage(const char *);
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int ch, i, stdinflag = 0;
|
||||||
|
char cflag = 0, mflag = 0;
|
||||||
|
char *outfile, *outpath = 0;
|
||||||
|
struct field *fldtab;
|
||||||
|
size_t fldtab_sz, fld_cnt;
|
||||||
|
struct filelist filelist;
|
||||||
|
int num_input_files;
|
||||||
|
FILE *outfp = NULL;
|
||||||
|
#if !defined(__minix)
|
||||||
|
struct rlimit rl;
|
||||||
|
#endif /* !defined(__minix) */
|
||||||
|
struct stat st;
|
||||||
|
|
||||||
|
setlocale(LC_ALL, "");
|
||||||
|
|
||||||
|
#if !defined(__minix)
|
||||||
|
/* bump RLIMIT_NOFILE to maximum our hard limit allows */
|
||||||
|
if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
|
||||||
|
err(2, "getrlimit");
|
||||||
|
rl.rlim_cur = rl.rlim_max;
|
||||||
|
if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
|
||||||
|
err(2, "setrlimit");
|
||||||
|
#endif /* !defined(__minix) */
|
||||||
|
|
||||||
|
d_mask[REC_D = '\n'] = REC_D_F;
|
||||||
|
d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
|
||||||
|
|
||||||
|
/* fldtab[0] is the global options. */
|
||||||
|
fldtab_sz = 3;
|
||||||
|
fld_cnt = 0;
|
||||||
|
fldtab = emalloc(fldtab_sz * sizeof(*fldtab));
|
||||||
|
memset(fldtab, 0, fldtab_sz * sizeof(*fldtab));
|
||||||
|
|
||||||
|
#define SORT_OPTS "bcdD:fHik:lmno:rR:sSt:T:ux"
|
||||||
|
|
||||||
|
/* Convert "+field" args to -f format */
|
||||||
|
fixit(&argc, argv, SORT_OPTS);
|
||||||
|
|
||||||
|
if (!(tmpdir = getenv("TMPDIR")))
|
||||||
|
tmpdir = _PATH_TMP;
|
||||||
|
|
||||||
|
while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) {
|
||||||
|
switch (ch) {
|
||||||
|
case 'b':
|
||||||
|
fldtab[0].flags |= BI | BT;
|
||||||
|
break;
|
||||||
|
case 'c':
|
||||||
|
cflag = 1;
|
||||||
|
break;
|
||||||
|
case 'D': /* Debug flags */
|
||||||
|
for (i = 0; optarg[i]; i++)
|
||||||
|
debug_flags |= 1 << (optarg[i] & 31);
|
||||||
|
break;
|
||||||
|
case 'd': case 'f': case 'i': case 'n': case 'l':
|
||||||
|
fldtab[0].flags |= optval(ch, 0);
|
||||||
|
break;
|
||||||
|
case 'H':
|
||||||
|
/* -H was ; use merge sort for blocks of large files' */
|
||||||
|
/* That is now the default. */
|
||||||
|
break;
|
||||||
|
case 'k':
|
||||||
|
fldtab = erealloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab));
|
||||||
|
memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0]));
|
||||||
|
fldtab_sz++;
|
||||||
|
|
||||||
|
setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags);
|
||||||
|
break;
|
||||||
|
case 'm':
|
||||||
|
mflag = 1;
|
||||||
|
break;
|
||||||
|
case 'o':
|
||||||
|
outpath = optarg;
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
REVERSE = 1;
|
||||||
|
break;
|
||||||
|
case 's':
|
||||||
|
/*
|
||||||
|
* Nominally 'stable sort', keep lines with equal keys
|
||||||
|
* in input file order. (Default for NetBSD)
|
||||||
|
* (-s for GNU sort compatibility.)
|
||||||
|
*/
|
||||||
|
posix_sort = 0;
|
||||||
|
break;
|
||||||
|
case 'S':
|
||||||
|
/*
|
||||||
|
* Reverse of -s!
|
||||||
|
* This needs to enforce a POSIX sort where records
|
||||||
|
* with equal keys are then sorted by the raw data.
|
||||||
|
* Currently not implemented!
|
||||||
|
* (using libc radixsort() v sradixsort() doesn't
|
||||||
|
* have the desired effect.)
|
||||||
|
*/
|
||||||
|
posix_sort = 1;
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
if (SEP_FLAG)
|
||||||
|
usage("multiple field delimiters");
|
||||||
|
SEP_FLAG = 1;
|
||||||
|
d_mask[' '] &= ~FLD_D;
|
||||||
|
d_mask['\t'] &= ~FLD_D;
|
||||||
|
d_mask[(u_char)*optarg] |= FLD_D;
|
||||||
|
if (d_mask[(u_char)*optarg] & REC_D_F)
|
||||||
|
errx(2, "record/field delimiter clash");
|
||||||
|
break;
|
||||||
|
case 'R':
|
||||||
|
if (REC_D != '\n')
|
||||||
|
usage("multiple record delimiters");
|
||||||
|
REC_D = *optarg;
|
||||||
|
if (REC_D == '\n')
|
||||||
|
break;
|
||||||
|
if (optarg[1] != '\0') {
|
||||||
|
char *ep;
|
||||||
|
int t = 0;
|
||||||
|
if (optarg[0] == '\\')
|
||||||
|
optarg++, t = 8;
|
||||||
|
REC_D = (int)strtol(optarg, &ep, t);
|
||||||
|
if (*ep != '\0' || REC_D < 0 ||
|
||||||
|
REC_D >= (int)__arraycount(d_mask))
|
||||||
|
errx(2, "invalid record delimiter %s",
|
||||||
|
optarg);
|
||||||
|
}
|
||||||
|
d_mask['\n'] = d_mask[' '];
|
||||||
|
d_mask[REC_D] = REC_D_F;
|
||||||
|
break;
|
||||||
|
case 'T':
|
||||||
|
/* -T tmpdir */
|
||||||
|
tmpdir = optarg;
|
||||||
|
break;
|
||||||
|
case 'u':
|
||||||
|
UNIQUE = 1;
|
||||||
|
break;
|
||||||
|
case '?':
|
||||||
|
default:
|
||||||
|
usage(NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (UNIQUE)
|
||||||
|
/* Don't sort on raw record if keys match */
|
||||||
|
posix_sort = 0;
|
||||||
|
|
||||||
|
if (cflag && argc > optind+1)
|
||||||
|
errx(2, "too many input files for -c option");
|
||||||
|
if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
|
||||||
|
outpath = argv[argc-1];
|
||||||
|
argc -= 2;
|
||||||
|
}
|
||||||
|
if (mflag && argc - optind > (MAXFCT - (16+1))*16)
|
||||||
|
errx(2, "too many input files for -m option");
|
||||||
|
|
||||||
|
for (i = optind; i < argc; i++) {
|
||||||
|
/* allow one occurrence of /dev/stdin */
|
||||||
|
if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) {
|
||||||
|
if (stdinflag)
|
||||||
|
warnx("ignoring extra \"%s\" in file list",
|
||||||
|
argv[i]);
|
||||||
|
else
|
||||||
|
stdinflag = 1;
|
||||||
|
|
||||||
|
/* change to /dev/stdin if '-' */
|
||||||
|
if (argv[i][0] == '-') {
|
||||||
|
static char path_stdin[] = _PATH_STDIN;
|
||||||
|
argv[i] = path_stdin;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if ((ch = access(argv[i], R_OK)))
|
||||||
|
err(2, "%s", argv[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fldtab[1].icol.num == 0) {
|
||||||
|
/* No sort key specified */
|
||||||
|
if (fldtab[0].flags & (I|D|F|N|L)) {
|
||||||
|
/* Modified - generate a key that covers the line */
|
||||||
|
fldtab[0].flags &= ~(BI|BT);
|
||||||
|
setfield("1", &fldtab[++fld_cnt], fldtab->flags);
|
||||||
|
fldreset(fldtab);
|
||||||
|
} else {
|
||||||
|
/* Unmodified, just compare the line */
|
||||||
|
SINGL_FLD = 1;
|
||||||
|
fldtab[0].icol.num = 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fldreset(fldtab);
|
||||||
|
}
|
||||||
|
|
||||||
|
settables();
|
||||||
|
|
||||||
|
if (optind == argc) {
|
||||||
|
static const char * const names[] = { _PATH_STDIN, NULL };
|
||||||
|
filelist.names = names;
|
||||||
|
num_input_files = 1;
|
||||||
|
} else {
|
||||||
|
filelist.names = (const char * const *) &argv[optind];
|
||||||
|
num_input_files = argc - optind;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cflag) {
|
||||||
|
order(&filelist, fldtab);
|
||||||
|
/* NOT REACHED */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!outpath) {
|
||||||
|
toutpath[0] = '\0'; /* path not used in this case */
|
||||||
|
outfile = outpath = toutpath;
|
||||||
|
outfp = stdout;
|
||||||
|
} else if (lstat(outpath, &st) == 0
|
||||||
|
&& !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
|
||||||
|
/* output file exists and isn't character or block device */
|
||||||
|
struct sigaction act;
|
||||||
|
static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE,
|
||||||
|
#if defined(__minix)
|
||||||
|
SIGVTALRM, SIGPROF, 0};
|
||||||
|
#else
|
||||||
|
SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0};
|
||||||
|
#endif /* defined(__minix) */
|
||||||
|
int outfd;
|
||||||
|
errno = 0;
|
||||||
|
if (access(outpath, W_OK))
|
||||||
|
err(2, "%s", outpath);
|
||||||
|
(void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX",
|
||||||
|
outpath);
|
||||||
|
if ((outfd = mkstemp(toutpath)) == -1)
|
||||||
|
err(2, "Cannot create temporary file `%s'", toutpath);
|
||||||
|
(void)atexit(cleanup);
|
||||||
|
act.sa_handler = onsignal;
|
||||||
|
(void) sigemptyset(&act.sa_mask);
|
||||||
|
act.sa_flags = SA_RESTART | SA_RESETHAND;
|
||||||
|
for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
|
||||||
|
sigaction(sigtable[i], &act, 0);
|
||||||
|
outfile = toutpath;
|
||||||
|
if ((outfp = fdopen(outfd, "w")) == NULL)
|
||||||
|
err(2, "Cannot open temporary file `%s'", toutpath);
|
||||||
|
} else {
|
||||||
|
outfile = outpath;
|
||||||
|
|
||||||
|
if ((outfp = fopen(outfile, "w")) == NULL)
|
||||||
|
err(2, "output file %s", outfile);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mflag)
|
||||||
|
fmerge(&filelist, num_input_files, outfp, fldtab);
|
||||||
|
else
|
||||||
|
fsort(&filelist, num_input_files, outfp, fldtab);
|
||||||
|
|
||||||
|
if (outfile != outpath) {
|
||||||
|
if (access(outfile, F_OK))
|
||||||
|
err(2, "%s", outfile);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy file permissions bits of the original file.
|
||||||
|
* st is initialized above, when we create the
|
||||||
|
* temporary spool file.
|
||||||
|
*/
|
||||||
|
if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) {
|
||||||
|
err(2, "cannot chmod %s: output left in %s",
|
||||||
|
outpath, outfile);
|
||||||
|
}
|
||||||
|
|
||||||
|
(void)unlink(outpath);
|
||||||
|
if (link(outfile, outpath))
|
||||||
|
err(2, "cannot link %s: output left in %s",
|
||||||
|
outpath, outfile);
|
||||||
|
(void)unlink(outfile);
|
||||||
|
toutpath[0] = 0;
|
||||||
|
}
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
onsignal(int sig)
|
||||||
|
{
|
||||||
|
cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
cleanup(void)
|
||||||
|
{
|
||||||
|
if (toutpath[0])
|
||||||
|
(void)unlink(toutpath);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
usage(const char *msg)
|
||||||
|
{
|
||||||
|
if (msg != NULL)
|
||||||
|
(void)fprintf(stderr, "%s: %s\n", getprogname(), msg);
|
||||||
|
(void)fprintf(stderr,
|
||||||
|
"usage: %s [-bcdfHilmnrSsu] [-k field1[,field2]] [-o output]"
|
||||||
|
" [-R char] [-T dir]", getprogname());
|
||||||
|
(void)fprintf(stderr,
|
||||||
|
" [-t char] [file ...]\n");
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
RECHEADER *
|
||||||
|
allocrec(RECHEADER *rec, size_t size)
|
||||||
|
{
|
||||||
|
|
||||||
|
return (erealloc(rec, size + sizeof(long) - 1));
|
||||||
|
}
|
201
usr.bin/sort/sort.h
Normal file
201
usr.bin/sort/sort.h
Normal file
|
@ -0,0 +1,201 @@
|
||||||
|
/* $NetBSD: sort.h,v 1.34 2011/09/16 15:39:29 joerg Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Ben Harris and Jaromir Dolecek.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)sort.h 8.1 (Berkeley) 6/6/93
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/param.h>
|
||||||
|
|
||||||
|
#include <err.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#define NBINS 256
|
||||||
|
|
||||||
|
/* values for masks, weights, and other flags. */
|
||||||
|
/* R and F get used to index weight_tables[] */
|
||||||
|
#define R 0x01 /* Field is reversed */
|
||||||
|
#define F 0x02 /* weight lower and upper case the same */
|
||||||
|
#define I 0x04 /* mask out non-printable characters */
|
||||||
|
#define D 0x08 /* sort alphanumeric characters only */
|
||||||
|
#define N 0x10 /* Field is a number */
|
||||||
|
#define BI 0x20 /* ignore blanks in icol */
|
||||||
|
#define BT 0x40 /* ignore blanks in tcol */
|
||||||
|
#define L 0x80 /* Sort by field length */
|
||||||
|
|
||||||
|
/* masks for delimiters: blanks, fields, and termination. */
|
||||||
|
#define BLANK 1 /* ' ', '\t'; '\n' if -R is invoked */
|
||||||
|
#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
|
||||||
|
#define REC_D_F 4 /* '\n' default; from -R otherwise */
|
||||||
|
|
||||||
|
#define min(a, b) ((a) < (b) ? (a) : (b))
|
||||||
|
#define max(a, b) ((a) > (b) ? (a) : (b))
|
||||||
|
|
||||||
|
#define FCLOSE(file) { \
|
||||||
|
if (EOF == fclose(file)) \
|
||||||
|
err(2, "%p", file); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define EWRITE(ptr, size, n, f) { \
|
||||||
|
if (!fwrite(ptr, size, n, f)) \
|
||||||
|
err(2, NULL); \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort
|
||||||
|
* in a sane way.
|
||||||
|
* Anyone who wants to sort data records longer than 2GB definitely needs a
|
||||||
|
* different program! */
|
||||||
|
typedef unsigned int length_t;
|
||||||
|
|
||||||
|
/* A record is a key/line pair starting at rec.data. It has a total length
|
||||||
|
* and an offset to the start of the line half of the pair.
|
||||||
|
*/
|
||||||
|
typedef struct recheader {
|
||||||
|
length_t length; /* total length of key and line */
|
||||||
|
length_t offset; /* to line */
|
||||||
|
int keylen; /* length of key */
|
||||||
|
u_char data[]; /* key then line */
|
||||||
|
} RECHEADER;
|
||||||
|
|
||||||
|
/* This is the column as seen by struct field. It is used by enterfield.
|
||||||
|
* They are matched with corresponding coldescs during initialization.
|
||||||
|
*/
|
||||||
|
struct column {
|
||||||
|
struct coldesc *p;
|
||||||
|
int num;
|
||||||
|
int indent;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* a coldesc has a number and pointers to the beginning and end of the
|
||||||
|
* corresponding column in the current line. This is determined in enterkey.
|
||||||
|
*/
|
||||||
|
typedef struct coldesc {
|
||||||
|
u_char *start;
|
||||||
|
u_char *end;
|
||||||
|
int num;
|
||||||
|
} COLDESC;
|
||||||
|
|
||||||
|
/* A field has an initial and final column; an omitted final column
|
||||||
|
* implies the end of the line. Flags regulate omission of blanks and
|
||||||
|
* numerical sorts; mask determines which characters are ignored (from -i, -d);
|
||||||
|
* weights determines the sort weights of a character (from -f, -r).
|
||||||
|
*
|
||||||
|
* The first field contain the global flags etc.
|
||||||
|
* The list terminates when icol = 0.
|
||||||
|
*/
|
||||||
|
struct field {
|
||||||
|
struct column icol;
|
||||||
|
struct column tcol;
|
||||||
|
u_int flags;
|
||||||
|
u_char *mask;
|
||||||
|
u_char *weights;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct filelist {
|
||||||
|
const char * const * names;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *);
|
||||||
|
typedef void (*put_func_t)(const RECHEADER *, FILE *);
|
||||||
|
|
||||||
|
extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
|
||||||
|
extern u_char *const weight_tables[4]; /* ascii, Rascii, Ftable, RFtable */
|
||||||
|
extern u_char d_mask[NBINS];
|
||||||
|
extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE;
|
||||||
|
extern int posix_sort;
|
||||||
|
extern int REC_D;
|
||||||
|
extern const char *tmpdir;
|
||||||
|
extern struct coldesc *clist;
|
||||||
|
extern int ncols;
|
||||||
|
|
||||||
|
#define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
|
||||||
|
extern unsigned int debug_flags;
|
||||||
|
|
||||||
|
RECHEADER *allocrec(RECHEADER *, size_t);
|
||||||
|
void append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *));
|
||||||
|
void concat(FILE *, FILE *);
|
||||||
|
length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
|
||||||
|
void fixit(int *, char **, const char *);
|
||||||
|
void fldreset(struct field *);
|
||||||
|
FILE *ftmp(void);
|
||||||
|
void fmerge(struct filelist *, int, FILE *, struct field *);
|
||||||
|
void save_for_merge(FILE *, get_func_t, struct field *);
|
||||||
|
void merge_sort(FILE *, put_func_t, struct field *);
|
||||||
|
void fsort(struct filelist *, int, FILE *, struct field *);
|
||||||
|
int geteasy(FILE *, RECHEADER *, u_char *, struct field *);
|
||||||
|
int makekey(FILE *, RECHEADER *, u_char *, struct field *);
|
||||||
|
int makeline(FILE *, RECHEADER *, u_char *, struct field *);
|
||||||
|
void makeline_copydown(RECHEADER *);
|
||||||
|
int optval(int, int);
|
||||||
|
__dead void order(struct filelist *, struct field *);
|
||||||
|
void putline(const RECHEADER *, FILE *);
|
||||||
|
void putrec(const RECHEADER *, FILE *);
|
||||||
|
void putkeydump(const RECHEADER *, FILE *);
|
||||||
|
void rd_append(int, int, int, FILE *, u_char *, u_char *);
|
||||||
|
void radix_sort(RECHEADER **, RECHEADER **, int);
|
||||||
|
int setfield(const char *, struct field *, int);
|
||||||
|
void settables(void);
|
106
usr.bin/sort/tmp.c
Normal file
106
usr.bin/sort/tmp.c
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
/* $NetBSD: tmp.c,v 1.16 2009/11/06 18:34:22 joerg Exp $ */
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Ben Harris and Jaromir Dolecek.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/cdefs.h>
|
||||||
|
|
||||||
|
__RCSID("$NetBSD: tmp.c,v 1.16 2009/11/06 18:34:22 joerg Exp $");
|
||||||
|
|
||||||
|
#include <sys/param.h>
|
||||||
|
|
||||||
|
#include <err.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "sort.h"
|
||||||
|
#include "pathnames.h"
|
||||||
|
|
||||||
|
#define _NAME_TMP "sort.XXXXXXXX"
|
||||||
|
|
||||||
|
FILE *
|
||||||
|
ftmp(void)
|
||||||
|
{
|
||||||
|
sigset_t set, oset;
|
||||||
|
FILE *fp;
|
||||||
|
int fd;
|
||||||
|
char path[MAXPATHLEN];
|
||||||
|
|
||||||
|
(void)snprintf(path, sizeof(path), "%s%s%s", tmpdir,
|
||||||
|
(tmpdir[strlen(tmpdir)-1] != '/') ? "/" : "", _NAME_TMP);
|
||||||
|
|
||||||
|
sigfillset(&set);
|
||||||
|
(void)sigprocmask(SIG_BLOCK, &set, &oset);
|
||||||
|
if ((fd = mkstemp(path)) < 0)
|
||||||
|
err(2, "ftmp: mkstemp(\"%s\")", path);
|
||||||
|
if (!(fp = fdopen(fd, "w+")))
|
||||||
|
err(2, "ftmp: fdopen(\"%s\")", path);
|
||||||
|
if (!DEBUG('t'))
|
||||||
|
(void)unlink(path);
|
||||||
|
|
||||||
|
(void)sigprocmask(SIG_SETMASK, &oset, NULL);
|
||||||
|
return (fp);
|
||||||
|
}
|
Loading…
Reference in a new issue