diff --git a/commands/Makefile b/commands/Makefile index bdcde089f..268baa625 100644 --- a/commands/Makefile +++ b/commands/Makefile @@ -30,7 +30,7 @@ SUBDIR= aal add_route adduser advent arp ash at autil awk \ syslogd tail talk talkd tar tcpd tcpdp tcpstat tee telnet \ telnetd term termcap tget time tinyhalt top touch tr \ truncate tsort ttt tty udpstat umount uname unexpand \ - uniq unstack update uud uue version vol wc \ + unstack update uud uue version vol wc \ whereis which who write writeisofs fetch \ xargs yacc yes zdump zic zmodem pkgin_cd \ mktemp diff --git a/commands/uniq/Makefile b/commands/uniq/Makefile deleted file mode 100644 index 25eeda7e2..000000000 --- a/commands/uniq/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -PROG= uniq -MAN= - -.include diff --git a/commands/uniq/uniq.c b/commands/uniq/uniq.c deleted file mode 100644 index e5785ca8c..000000000 --- a/commands/uniq/uniq.c +++ /dev/null @@ -1,199 +0,0 @@ -/* uniq - compact repeated lines Author: John Woods */ -/* Uniq [-udc] [-n] [+n] [infile [outfile]] - * - * Written 02/08/86 by John Woods, placed into public domain. Enjoy. - * - */ - -/* If the symbol WRITE_ERROR is defined, uniq will exit(1) if it gets a - * write error on the output. This is not (of course) how V7 uniq does it, - * so undefine the symbol if you want to lose your output to a full disk - */ - -#define WRITE_ERROR 1 -#include -#include -#include -#include -#include - -char buffer[BUFSIZ]; -int uflag = 1; /* default is union of -d and -u outputs */ -int dflag = 1; /* flags are mutually exclusive */ -int cflag = 0; -int fields = 0; -int chars = 0; - -#ifdef __NBSD_LIBC -#define getline unix_getline -#endif - -_PROTOTYPE(int main, (int argc, char **argv)); -_PROTOTYPE(FILE *xfopen, (char *fn, char *mode)); -_PROTOTYPE(char *skip, (char *s)); -_PROTOTYPE(int equal, (char *s1, char *s2)); -_PROTOTYPE(void show, (char *line, int count)); -_PROTOTYPE(int uniq, (void)); -_PROTOTYPE(void usage, (void)); -_PROTOTYPE(int getline, (char *buf, int count)); - -FILE *xfopen(fn, mode) -char *fn, *mode; -{ - FILE *p; - - if ((p = fopen(fn, mode)) == NULL) { - perror("uniq"); - fflush(stdout); - exit(1); - } - return(p); -} - -int main(argc, argv) -int argc; -char *argv[]; -{ - char *p; - int inf = -1, outf; - - setbuf(stdout, buffer); - for (--argc, ++argv; argc > 0 && (**argv == '-' || **argv == '+'); - --argc, ++argv) { - if (**argv == '+') - chars = atoi(*argv + 1); - else if (isdigit(argv[0][1])) - fields = atoi(*argv + 1); - else if (argv[0][1] == '\0') - inf = 0; /* - is stdin */ - else - for (p = *argv + 1; *p; p++) { - switch (*p) { - case 'd': - dflag = 1; - uflag = 0; - break; - case 'u': - uflag = 1; - dflag = 0; - break; - case 'c': cflag = 1; break; - default: usage(); - } - } - } - - /* Input file */ - if (argc == 0) - inf = 0; - else if (inf == -1) { /* if - was not given */ - fclose(stdin); - xfopen(*argv++, "r"); - argc--; - } - if (argc == 0) - outf = 1; - else { - fclose(stdout); - xfopen(*argv++, "w"); - argc--; - } - - uniq(); - fflush(stdout); - return(0); -} - -char *skip(s) -char *s; -{ - int n; - - /* Skip fields */ - for (n = fields; n > 0; --n) { - /* Skip blanks */ - while (*s && (*s == ' ' || *s == '\t')) s++; - if (!*s) return s; - while (*s && (*s != ' ' && *s != '\t')) s++; - if (!*s) return s; - } - - /* Skip characters */ - for (n = chars; n > 0; --n) { - if (!*s) return s; - s++; - } - return s; -} - -int equal(s1, s2) -char *s1, *s2; -{ - return !strcmp(skip(s1), skip(s2)); -} - -void show(line, count) -char *line; -int count; -{ - if (cflag) - printf("%4d %s", count, line); - else { - if ((uflag && count == 1) || (dflag && count != 1)) - printf("%s", line); - } -} - -/* The meat of the whole affair */ -char *nowline, *prevline, buf1[1024], buf2[1024]; - -int uniq() -{ - char *p; - int seen; - - /* Setup */ - prevline = buf1; - if (getline(prevline, 1024) < 0) return(0); - seen = 1; - nowline = buf2; - - /* Get nowline and compare if not equal, dump prevline and swap - * pointers else continue, bumping seen count */ - while (getline(nowline, 1024) > 0) { - if (!equal(prevline, nowline)) { - show(prevline, seen); - seen = 1; - p = nowline; - nowline = prevline; - prevline = p; - } else - seen += 1; - } - show(prevline, seen); - return 0; -} - -void usage() -{ - fprintf(stderr, "Usage: uniq [-udc] [+n] [-n] [input [output]]\n"); -} - -int getline(buf, count) -char *buf; -int count; -{ - int c; - int ct = 0; - - while (ct++ < count) { - c = getc(stdin); - if (c < 0) return(-1); - *buf++ = c; - if (c == '\n') { - *buf++ = 0; - return(ct); - } - } - return(ct); -} diff --git a/man/man1/Makefile b/man/man1/Makefile index 702a60c22..a054b52fb 100644 --- a/man/man1/Makefile +++ b/man/man1/Makefile @@ -21,7 +21,7 @@ MAN= acd.1 anm.1 ar.1 ash.1 asize.1 at.1 banner.1 basename.1 \ split.1 strip.1 stty.1 su.1 sum.1 svc.1 \ synctree.1 sysenv.1 sz.1 tail.1 tee.1 telnet.1 template.1 \ term.1 termcap.1 tget.1 time.1 top.1 tr.1 true.1 \ - truncate.1 tsort.1 tty.1 umount.1 uname.1 unexpand.1 uniq.1 \ + truncate.1 tsort.1 tty.1 umount.1 uname.1 unexpand.1 \ uud.1 uue.1 vol.1 wc.1 whereis.1 which.1 \ who.1 write.1 xargs.1 yap.1 yes.1 linkfarm.1 pkg_view.1 diff --git a/man/man1/uniq.1 b/man/man1/uniq.1 deleted file mode 100644 index 6ac015a34..000000000 --- a/man/man1/uniq.1 +++ /dev/null @@ -1,36 +0,0 @@ -.TH UNIQ 1 -.SH NAME -uniq \- delete consecutive identical lines in a file -.SH SYNOPSIS -\fBuniq\fR [\fB\-cdu\fR]\fR [\fB\-\fIn\fR] [\fB+\fIn\fR] [\fIinput [\fIoutput\fR]\fR]\fR -.br -.de FL -.TP -\\fB\\$1\\fR -\\$2 -.. -.de EX -.TP 20 -\\fB\\$1\\fR -# \\$2 -.. -.SH OPTIONS -.FL "\-c" "Give count of identical lines in the input" -.FL "\-d" "Only duplicate lines are written to output" -.FL "\-u" "Only unique lines are written to output" -.FL "\-\fIn\fR" "Skip the first \fIn\fR columns when matching" -.FL "+\fIn\fR" "Skip the first \fIn\fR fields when matching" -.SH EXAMPLES -.EX "uniq +2 file" "Ignore first 2 fields when comparing" -.EX "uniq \-d inf outf" "Write duplicate lines to \fIoutf\fP" -.SH DESCRIPTION -.PP -.I Uniq -examines a file for consecutive lines that are identical. -All but duplicate entries are deleted, and the file is written to output. -The +\fIn\fR option skips the first \fIn\fR fields, where a field is defined -as a run of characters separated by white space. -The \-\fIn\fP option skips the first \fIn\fR spaces. -Fields are skipped first. -.SH "SEE ALSO" -.BR sort (1). diff --git a/tools/nbsd_ports b/tools/nbsd_ports index 79ae50696..c7b04ba79 100644 --- a/tools/nbsd_ports +++ b/tools/nbsd_ports @@ -10,3 +10,4 @@ usr.bin/sed src/usr.bin/sed usr.bin/stat src/usr.bin/stat usr.bin/tic src/usr.bin/tic usr.bin/mkdep src/usr.bin/mkdep +usr.bin/uniq src/usr.bin/uniq diff --git a/usr.bin/Makefile b/usr.bin/Makefile index 896fa5c0d..d64f61319 100644 --- a/usr.bin/Makefile +++ b/usr.bin/Makefile @@ -3,7 +3,7 @@ .include # NetBSD imports -SUBDIR= indent m4 stat tic sed mkdep +SUBDIR= indent m4 stat tic sed mkdep uniq # Non-NetBSD imports SUBDIR+= ministat mkimage diff --git a/usr.bin/uniq/Makefile b/usr.bin/uniq/Makefile new file mode 100644 index 000000000..226c7829c --- /dev/null +++ b/usr.bin/uniq/Makefile @@ -0,0 +1,6 @@ +# $NetBSD: Makefile,v 1.5 2009/04/14 22:15:28 lukem Exp $ +# @(#)Makefile 8.1 (Berkeley) 6/6/93 + +PROG= uniq + +.include diff --git a/usr.bin/uniq/uniq.1 b/usr.bin/uniq/uniq.1 new file mode 100644 index 000000000..9195422e2 --- /dev/null +++ b/usr.bin/uniq/uniq.1 @@ -0,0 +1,128 @@ +.\" $NetBSD: uniq.1,v 1.12 2010/10/06 06:43:26 dholland Exp $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)uniq.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd January 6, 2007 +.Dt UNIQ 1 +.Os +.Sh NAME +.Nm uniq +.Nd report or filter out repeated lines in a file +.Sh SYNOPSIS +.Nm +.Op Fl cdu +.Op Fl f Ar fields +.Op Fl s Ar chars +.Oo +.Ar input_file +.Op Ar output_file +.Oc +.Sh DESCRIPTION +The +.Nm +utility reads the standard input comparing adjacent lines, and writes +a copy of each unique input line to the standard output. +The second and succeeding copies of identical adjacent input lines are +not written. +Repeated lines in the input will not be detected if they are not adjacent, +so it may be necessary to sort the files first. +.Pp +The following options are available: +.Bl -tag -width Ds +.It Fl c +Precede each output line with the count of the number of times the line +occurred in the input, followed by a single space. +.It Fl d +Don't output lines that are not repeated in the input. +.It Fl f Ar fields +Ignore the first +.Ar fields +in each input line when doing comparisons. +A field is a string of non-blank characters separated from adjacent fields +by blanks. +Field numbers are one based, i.e. the first field is field one. +.It Fl s Ar chars +Ignore the first +.Ar chars +characters in each input line when doing comparisons. +If specified in conjunction with the +.Fl f +option, the first +.Ar chars +characters after the first +.Ar fields +fields will be ignored. +Character numbers are one based, i.e. the first character is character one. +.It Fl u +Don't output lines that are repeated in the input. +.\".It Fl Ns Ar n +.\"(Deprecated; replaced by +.\".Fl f ) . +.\"Ignore the first n +.\"fields on each input line when doing comparisons, +.\"where n is a number. +.\"A field is a string of non-blank +.\"characters separated from adjacent fields +.\"by blanks. +.\".It Cm \&\(pl Ns Ar n +.\"(Deprecated; replaced by +.\".Fl s ) . +.\"Ignore the first +.\".Ar m +.\"characters when doing comparisons, where +.\".Ar m +.\"is a +.\"number. +.El +.Pp +If additional arguments are specified on the command line, the first +such argument is used as the name of an input file, the second is used +as the name of an output file. +.Pp +The +.Nm +utility exits 0 on success, and \*[Gt]0 if an error occurs. +.Sh COMPATIBILITY +The historic +.Cm \&\(pl Ns Ar number +and +.Fl Ns Ar number +options have been deprecated but are still supported in this implementation. +.Sh SEE ALSO +.Xr sort 1 +.Sh STANDARDS +The +.Nm +utility is expected to be +.St -p1003.2 +compatible. diff --git a/usr.bin/uniq/uniq.c b/usr.bin/uniq/uniq.c new file mode 100644 index 000000000..32589efd5 --- /dev/null +++ b/usr.bin/uniq/uniq.c @@ -0,0 +1,257 @@ +/* $NetBSD: uniq.c,v 1.17 2010/10/06 07:59:18 wiz Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Case Larsen. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1989, 1993\ + The Regents of the University of California. All rights reserved."); +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; +#endif +__RCSID("$NetBSD: uniq.c,v 1.17 2010/10/06 07:59:18 wiz Exp $"); +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include + +static int cflag, dflag, uflag; +static int numchars, numfields, repeats; + +static FILE *file(const char *, const char *); +static void show(FILE *, const char *); +static const char *skip(const char *); +static void obsolete(char *[]); +static void usage(void) __dead; + +int +main (int argc, char *argv[]) +{ + const char *t1, *t2; + FILE *ifp, *ofp; + int ch; + char *prevline, *thisline, *p; + size_t prevlinesize, thislinesize, psize; + + setprogname(argv[0]); + ifp = ofp = NULL; + obsolete(argv); + while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1) + switch (ch) { + case '-': + --optind; + goto done; + case 'c': + cflag = 1; + break; + case 'd': + dflag = 1; + break; + case 'f': + numfields = strtol(optarg, &p, 10); + if (numfields < 0 || *p) + errx(1, "illegal field skip value: %s", optarg); + break; + case 's': + numchars = strtol(optarg, &p, 10); + if (numchars < 0 || *p) + errx(1, "illegal character skip value: %s", + optarg); + break; + case 'u': + uflag = 1; + break; + case '?': + default: + usage(); + } + +done: argc -= optind; + argv +=optind; + + switch(argc) { + case 0: + ifp = stdin; + ofp = stdout; + break; + case 1: + ifp = file(argv[0], "r"); + ofp = stdout; + break; + case 2: + ifp = file(argv[0], "r"); + ofp = file(argv[1], "w"); + break; + default: + usage(); + } + + if ((p = fgetln(ifp, &psize)) == NULL) + return 0; + prevlinesize = psize; + if ((prevline = malloc(prevlinesize + 1)) == NULL) + err(1, "malloc"); + (void)memcpy(prevline, p, prevlinesize); + prevline[prevlinesize] = '\0'; + + thislinesize = psize; + if ((thisline = malloc(thislinesize + 1)) == NULL) + err(1, "malloc"); + + while ((p = fgetln(ifp, &psize)) != NULL) { + if (psize > thislinesize) { + if ((thisline = realloc(thisline, psize + 1)) == NULL) + err(1, "realloc"); + thislinesize = psize; + } + (void)memcpy(thisline, p, psize); + thisline[psize] = '\0'; + + /* If requested get the chosen fields + character offsets. */ + if (numfields || numchars) { + t1 = skip(thisline); + t2 = skip(prevline); + } else { + t1 = thisline; + t2 = prevline; + } + + /* If different, print; set previous to new value. */ + if (strcmp(t1, t2)) { + char *t; + size_t ts; + + show(ofp, prevline); + t = prevline; + prevline = thisline; + thisline = t; + ts = prevlinesize; + prevlinesize = thislinesize; + thislinesize = ts; + repeats = 0; + } else + ++repeats; + } + show(ofp, prevline); + free(prevline); + free(thisline); + return 0; +} + +/* + * show -- + * Output a line depending on the flags and number of repetitions + * of the line. + */ +static void +show(FILE *ofp, const char *str) +{ + + if ((dflag && repeats == 0) || (uflag && repeats > 0)) + return; + if (cflag) { + (void)fprintf(ofp, "%4d %s", repeats + 1, str); + } else { + (void)fprintf(ofp, "%s", str); + } +} + +static const char * +skip(const char *str) +{ + int infield, nchars, nfields; + + for (nfields = numfields, infield = 0; nfields && *str; ++str) + if (isspace((unsigned char)*str)) { + if (infield) { + infield = 0; + --nfields; + } + } else if (!infield) + infield = 1; + for (nchars = numchars; nchars-- && *str; ++str) + continue; + return str; +} + +static FILE * +file(const char *name, const char *mode) +{ + FILE *fp; + + if ((fp = fopen(name, mode)) == NULL) + err(1, "%s", name); + return(fp); +} + +static void +obsolete(char *argv[]) +{ + char *ap, *p, *start; + + while ((ap = *++argv) != NULL) { + /* Return if "--" or not an option of any form. */ + if (ap[0] != '-') { + if (ap[0] != '+') + return; + } else if (ap[1] == '-') + return; + if (!isdigit((unsigned char)ap[1])) + continue; + /* + * Digit signifies an old-style option. Malloc space for dash, + * new option and argument. + */ + (void)asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1); + if (!p) + err(1, "malloc"); + start = p; + *argv = start; + } +} + +static void +usage(void) +{ + (void)fprintf(stderr, "Usage: %s [-cdu] [-f fields] [-s chars] " + "[input [output]]\n", getprogname()); + exit(1); +}