Replace uniq with NetBSD version

This commit is contained in:
Vivek Prakash 2011-07-03 21:20:43 +00:00 committed by Arun Thomas
parent e376fd97ac
commit 1ea07af9da
10 changed files with 395 additions and 242 deletions

View file

@ -30,7 +30,7 @@ SUBDIR= aal add_route adduser advent arp ash at autil awk \
syslogd tail talk talkd tar tcpd tcpdp tcpstat tee telnet \ syslogd tail talk talkd tar tcpd tcpdp tcpstat tee telnet \
telnetd term termcap tget time tinyhalt top touch tr \ telnetd term termcap tget time tinyhalt top touch tr \
truncate tsort ttt tty udpstat umount uname unexpand \ truncate tsort ttt tty udpstat umount uname unexpand \
uniq unstack update uud uue version vol wc \ unstack update uud uue version vol wc \
whereis which who write writeisofs fetch \ whereis which who write writeisofs fetch \
xargs yacc yes zdump zic zmodem pkgin_cd \ xargs yacc yes zdump zic zmodem pkgin_cd \
mktemp mktemp

View file

@ -1,4 +0,0 @@
PROG= uniq
MAN=
.include <bsd.prog.mk>

View file

@ -1,199 +0,0 @@
/* uniq - compact repeated lines Author: John Woods */
/* Uniq [-udc] [-n] [+n] [infile [outfile]]
*
* Written 02/08/86 by John Woods, placed into public domain. Enjoy.
*
*/
/* If the symbol WRITE_ERROR is defined, uniq will exit(1) if it gets a
* write error on the output. This is not (of course) how V7 uniq does it,
* so undefine the symbol if you want to lose your output to a full disk
*/
#define WRITE_ERROR 1
#include <ctype.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
char buffer[BUFSIZ];
int uflag = 1; /* default is union of -d and -u outputs */
int dflag = 1; /* flags are mutually exclusive */
int cflag = 0;
int fields = 0;
int chars = 0;
#ifdef __NBSD_LIBC
#define getline unix_getline
#endif
_PROTOTYPE(int main, (int argc, char **argv));
_PROTOTYPE(FILE *xfopen, (char *fn, char *mode));
_PROTOTYPE(char *skip, (char *s));
_PROTOTYPE(int equal, (char *s1, char *s2));
_PROTOTYPE(void show, (char *line, int count));
_PROTOTYPE(int uniq, (void));
_PROTOTYPE(void usage, (void));
_PROTOTYPE(int getline, (char *buf, int count));
FILE *xfopen(fn, mode)
char *fn, *mode;
{
FILE *p;
if ((p = fopen(fn, mode)) == NULL) {
perror("uniq");
fflush(stdout);
exit(1);
}
return(p);
}
int main(argc, argv)
int argc;
char *argv[];
{
char *p;
int inf = -1, outf;
setbuf(stdout, buffer);
for (--argc, ++argv; argc > 0 && (**argv == '-' || **argv == '+');
--argc, ++argv) {
if (**argv == '+')
chars = atoi(*argv + 1);
else if (isdigit(argv[0][1]))
fields = atoi(*argv + 1);
else if (argv[0][1] == '\0')
inf = 0; /* - is stdin */
else
for (p = *argv + 1; *p; p++) {
switch (*p) {
case 'd':
dflag = 1;
uflag = 0;
break;
case 'u':
uflag = 1;
dflag = 0;
break;
case 'c': cflag = 1; break;
default: usage();
}
}
}
/* Input file */
if (argc == 0)
inf = 0;
else if (inf == -1) { /* if - was not given */
fclose(stdin);
xfopen(*argv++, "r");
argc--;
}
if (argc == 0)
outf = 1;
else {
fclose(stdout);
xfopen(*argv++, "w");
argc--;
}
uniq();
fflush(stdout);
return(0);
}
char *skip(s)
char *s;
{
int n;
/* Skip fields */
for (n = fields; n > 0; --n) {
/* Skip blanks */
while (*s && (*s == ' ' || *s == '\t')) s++;
if (!*s) return s;
while (*s && (*s != ' ' && *s != '\t')) s++;
if (!*s) return s;
}
/* Skip characters */
for (n = chars; n > 0; --n) {
if (!*s) return s;
s++;
}
return s;
}
int equal(s1, s2)
char *s1, *s2;
{
return !strcmp(skip(s1), skip(s2));
}
void show(line, count)
char *line;
int count;
{
if (cflag)
printf("%4d %s", count, line);
else {
if ((uflag && count == 1) || (dflag && count != 1))
printf("%s", line);
}
}
/* The meat of the whole affair */
char *nowline, *prevline, buf1[1024], buf2[1024];
int uniq()
{
char *p;
int seen;
/* Setup */
prevline = buf1;
if (getline(prevline, 1024) < 0) return(0);
seen = 1;
nowline = buf2;
/* Get nowline and compare if not equal, dump prevline and swap
* pointers else continue, bumping seen count */
while (getline(nowline, 1024) > 0) {
if (!equal(prevline, nowline)) {
show(prevline, seen);
seen = 1;
p = nowline;
nowline = prevline;
prevline = p;
} else
seen += 1;
}
show(prevline, seen);
return 0;
}
void usage()
{
fprintf(stderr, "Usage: uniq [-udc] [+n] [-n] [input [output]]\n");
}
int getline(buf, count)
char *buf;
int count;
{
int c;
int ct = 0;
while (ct++ < count) {
c = getc(stdin);
if (c < 0) return(-1);
*buf++ = c;
if (c == '\n') {
*buf++ = 0;
return(ct);
}
}
return(ct);
}

View file

@ -21,7 +21,7 @@ MAN= acd.1 anm.1 ar.1 ash.1 asize.1 at.1 banner.1 basename.1 \
split.1 strip.1 stty.1 su.1 sum.1 svc.1 \ split.1 strip.1 stty.1 su.1 sum.1 svc.1 \
synctree.1 sysenv.1 sz.1 tail.1 tee.1 telnet.1 template.1 \ synctree.1 sysenv.1 sz.1 tail.1 tee.1 telnet.1 template.1 \
term.1 termcap.1 tget.1 time.1 top.1 tr.1 true.1 \ term.1 termcap.1 tget.1 time.1 top.1 tr.1 true.1 \
truncate.1 tsort.1 tty.1 umount.1 uname.1 unexpand.1 uniq.1 \ truncate.1 tsort.1 tty.1 umount.1 uname.1 unexpand.1 \
uud.1 uue.1 vol.1 wc.1 whereis.1 which.1 \ uud.1 uue.1 vol.1 wc.1 whereis.1 which.1 \
who.1 write.1 xargs.1 yap.1 yes.1 linkfarm.1 pkg_view.1 who.1 write.1 xargs.1 yap.1 yes.1 linkfarm.1 pkg_view.1

View file

@ -1,36 +0,0 @@
.TH UNIQ 1
.SH NAME
uniq \- delete consecutive identical lines in a file
.SH SYNOPSIS
\fBuniq\fR [\fB\-cdu\fR]\fR [\fB\-\fIn\fR] [\fB+\fIn\fR] [\fIinput [\fIoutput\fR]\fR]\fR
.br
.de FL
.TP
\\fB\\$1\\fR
\\$2
..
.de EX
.TP 20
\\fB\\$1\\fR
# \\$2
..
.SH OPTIONS
.FL "\-c" "Give count of identical lines in the input"
.FL "\-d" "Only duplicate lines are written to output"
.FL "\-u" "Only unique lines are written to output"
.FL "\-\fIn\fR" "Skip the first \fIn\fR columns when matching"
.FL "+\fIn\fR" "Skip the first \fIn\fR fields when matching"
.SH EXAMPLES
.EX "uniq +2 file" "Ignore first 2 fields when comparing"
.EX "uniq \-d inf outf" "Write duplicate lines to \fIoutf\fP"
.SH DESCRIPTION
.PP
.I Uniq
examines a file for consecutive lines that are identical.
All but duplicate entries are deleted, and the file is written to output.
The +\fIn\fR option skips the first \fIn\fR fields, where a field is defined
as a run of characters separated by white space.
The \-\fIn\fP option skips the first \fIn\fR spaces.
Fields are skipped first.
.SH "SEE ALSO"
.BR sort (1).

View file

@ -10,3 +10,4 @@ usr.bin/sed src/usr.bin/sed
usr.bin/stat src/usr.bin/stat usr.bin/stat src/usr.bin/stat
usr.bin/tic src/usr.bin/tic usr.bin/tic src/usr.bin/tic
usr.bin/mkdep src/usr.bin/mkdep usr.bin/mkdep src/usr.bin/mkdep
usr.bin/uniq src/usr.bin/uniq

View file

@ -3,7 +3,7 @@
.include <bsd.own.mk> .include <bsd.own.mk>
# NetBSD imports # NetBSD imports
SUBDIR= indent m4 stat tic sed mkdep SUBDIR= indent m4 stat tic sed mkdep uniq
# Non-NetBSD imports # Non-NetBSD imports
SUBDIR+= ministat mkimage SUBDIR+= ministat mkimage

6
usr.bin/uniq/Makefile Normal file
View file

@ -0,0 +1,6 @@
# $NetBSD: Makefile,v 1.5 2009/04/14 22:15:28 lukem Exp $
# @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= uniq
.include <bsd.prog.mk>

128
usr.bin/uniq/uniq.1 Normal file
View file

@ -0,0 +1,128 @@
.\" $NetBSD: uniq.1,v 1.12 2010/10/06 06:43:26 dholland Exp $
.\"
.\" Copyright (c) 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" the Institute of Electrical and Electronics Engineers, Inc.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)uniq.1 8.1 (Berkeley) 6/6/93
.\"
.Dd January 6, 2007
.Dt UNIQ 1
.Os
.Sh NAME
.Nm uniq
.Nd report or filter out repeated lines in a file
.Sh SYNOPSIS
.Nm
.Op Fl cdu
.Op Fl f Ar fields
.Op Fl s Ar chars
.Oo
.Ar input_file
.Op Ar output_file
.Oc
.Sh DESCRIPTION
The
.Nm
utility reads the standard input comparing adjacent lines, and writes
a copy of each unique input line to the standard output.
The second and succeeding copies of identical adjacent input lines are
not written.
Repeated lines in the input will not be detected if they are not adjacent,
so it may be necessary to sort the files first.
.Pp
The following options are available:
.Bl -tag -width Ds
.It Fl c
Precede each output line with the count of the number of times the line
occurred in the input, followed by a single space.
.It Fl d
Don't output lines that are not repeated in the input.
.It Fl f Ar fields
Ignore the first
.Ar fields
in each input line when doing comparisons.
A field is a string of non-blank characters separated from adjacent fields
by blanks.
Field numbers are one based, i.e. the first field is field one.
.It Fl s Ar chars
Ignore the first
.Ar chars
characters in each input line when doing comparisons.
If specified in conjunction with the
.Fl f
option, the first
.Ar chars
characters after the first
.Ar fields
fields will be ignored.
Character numbers are one based, i.e. the first character is character one.
.It Fl u
Don't output lines that are repeated in the input.
.\".It Fl Ns Ar n
.\"(Deprecated; replaced by
.\".Fl f ) .
.\"Ignore the first n
.\"fields on each input line when doing comparisons,
.\"where n is a number.
.\"A field is a string of non-blank
.\"characters separated from adjacent fields
.\"by blanks.
.\".It Cm \&\(pl Ns Ar n
.\"(Deprecated; replaced by
.\".Fl s ) .
.\"Ignore the first
.\".Ar m
.\"characters when doing comparisons, where
.\".Ar m
.\"is a
.\"number.
.El
.Pp
If additional arguments are specified on the command line, the first
such argument is used as the name of an input file, the second is used
as the name of an output file.
.Pp
The
.Nm
utility exits 0 on success, and \*[Gt]0 if an error occurs.
.Sh COMPATIBILITY
The historic
.Cm \&\(pl Ns Ar number
and
.Fl Ns Ar number
options have been deprecated but are still supported in this implementation.
.Sh SEE ALSO
.Xr sort 1
.Sh STANDARDS
The
.Nm
utility is expected to be
.St -p1003.2
compatible.

257
usr.bin/uniq/uniq.c Normal file
View file

@ -0,0 +1,257 @@
/* $NetBSD: uniq.c,v 1.17 2010/10/06 07:59:18 wiz Exp $ */
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Case Larsen.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1989, 1993\
The Regents of the University of California. All rights reserved.");
#endif /* not lint */
#ifndef lint
#if 0
static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95";
#endif
__RCSID("$NetBSD: uniq.c,v 1.17 2010/10/06 07:59:18 wiz Exp $");
#endif /* not lint */
#include <err.h>
#include <errno.h>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
static int cflag, dflag, uflag;
static int numchars, numfields, repeats;
static FILE *file(const char *, const char *);
static void show(FILE *, const char *);
static const char *skip(const char *);
static void obsolete(char *[]);
static void usage(void) __dead;
int
main (int argc, char *argv[])
{
const char *t1, *t2;
FILE *ifp, *ofp;
int ch;
char *prevline, *thisline, *p;
size_t prevlinesize, thislinesize, psize;
setprogname(argv[0]);
ifp = ofp = NULL;
obsolete(argv);
while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1)
switch (ch) {
case '-':
--optind;
goto done;
case 'c':
cflag = 1;
break;
case 'd':
dflag = 1;
break;
case 'f':
numfields = strtol(optarg, &p, 10);
if (numfields < 0 || *p)
errx(1, "illegal field skip value: %s", optarg);
break;
case 's':
numchars = strtol(optarg, &p, 10);
if (numchars < 0 || *p)
errx(1, "illegal character skip value: %s",
optarg);
break;
case 'u':
uflag = 1;
break;
case '?':
default:
usage();
}
done: argc -= optind;
argv +=optind;
switch(argc) {
case 0:
ifp = stdin;
ofp = stdout;
break;
case 1:
ifp = file(argv[0], "r");
ofp = stdout;
break;
case 2:
ifp = file(argv[0], "r");
ofp = file(argv[1], "w");
break;
default:
usage();
}
if ((p = fgetln(ifp, &psize)) == NULL)
return 0;
prevlinesize = psize;
if ((prevline = malloc(prevlinesize + 1)) == NULL)
err(1, "malloc");
(void)memcpy(prevline, p, prevlinesize);
prevline[prevlinesize] = '\0';
thislinesize = psize;
if ((thisline = malloc(thislinesize + 1)) == NULL)
err(1, "malloc");
while ((p = fgetln(ifp, &psize)) != NULL) {
if (psize > thislinesize) {
if ((thisline = realloc(thisline, psize + 1)) == NULL)
err(1, "realloc");
thislinesize = psize;
}
(void)memcpy(thisline, p, psize);
thisline[psize] = '\0';
/* If requested get the chosen fields + character offsets. */
if (numfields || numchars) {
t1 = skip(thisline);
t2 = skip(prevline);
} else {
t1 = thisline;
t2 = prevline;
}
/* If different, print; set previous to new value. */
if (strcmp(t1, t2)) {
char *t;
size_t ts;
show(ofp, prevline);
t = prevline;
prevline = thisline;
thisline = t;
ts = prevlinesize;
prevlinesize = thislinesize;
thislinesize = ts;
repeats = 0;
} else
++repeats;
}
show(ofp, prevline);
free(prevline);
free(thisline);
return 0;
}
/*
* show --
* Output a line depending on the flags and number of repetitions
* of the line.
*/
static void
show(FILE *ofp, const char *str)
{
if ((dflag && repeats == 0) || (uflag && repeats > 0))
return;
if (cflag) {
(void)fprintf(ofp, "%4d %s", repeats + 1, str);
} else {
(void)fprintf(ofp, "%s", str);
}
}
static const char *
skip(const char *str)
{
int infield, nchars, nfields;
for (nfields = numfields, infield = 0; nfields && *str; ++str)
if (isspace((unsigned char)*str)) {
if (infield) {
infield = 0;
--nfields;
}
} else if (!infield)
infield = 1;
for (nchars = numchars; nchars-- && *str; ++str)
continue;
return str;
}
static FILE *
file(const char *name, const char *mode)
{
FILE *fp;
if ((fp = fopen(name, mode)) == NULL)
err(1, "%s", name);
return(fp);
}
static void
obsolete(char *argv[])
{
char *ap, *p, *start;
while ((ap = *++argv) != NULL) {
/* Return if "--" or not an option of any form. */
if (ap[0] != '-') {
if (ap[0] != '+')
return;
} else if (ap[1] == '-')
return;
if (!isdigit((unsigned char)ap[1]))
continue;
/*
* Digit signifies an old-style option. Malloc space for dash,
* new option and argument.
*/
(void)asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1);
if (!p)
err(1, "malloc");
start = p;
*argv = start;
}
}
static void
usage(void)
{
(void)fprintf(stderr, "Usage: %s [-cdu] [-f fields] [-s chars] "
"[input [output]]\n", getprogname());
exit(1);
}