Importing usr.bin/comm

Replaces commands/comm. No Minix-specific changes needed.

Change-Id: Ifa37c86b09f958ca0d729f3a8030d0162819a080
This commit is contained in:
Thomas Cort 2013-10-26 21:19:33 -04:00
parent 2e7c53b597
commit f55b7dc853
10 changed files with 326 additions and 262 deletions

View file

@ -5,7 +5,7 @@
SUBDIR= add_route arp ash at backup btrace \
cawf cd cdprobe cpp \
chmod chown ci cleantmp cmp co \
comm compress cp crc cron crontab \
compress cp crc cron crontab \
dd decomp16 DESCRIBE devmand devsize df dhcpd \
dhrystone diff diskctl dumpcore \
eject factor fbdctl \

View file

@ -1,4 +0,0 @@
PROG= comm
MAN=
.include <bsd.prog.mk>

View file

@ -1,206 +0,0 @@
/* comm - select lines from two sorted files Author: Martin C. Atkins */
/*
* This program was written by:
* Martin C. Atkins,
* University of York,
* Heslington,
* York. Y01 5DD
* England
* and is released into the public domain, on the condition
* that this comment is always included without alteration.
*/
#include <sys/types.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <minix/minlib.h>
#include <stdio.h>
#define BUFFER_SIZE (512)
#define LINMAX (600)
struct file {
char *name; /* the file's name */
int fd; /* the file descripter */
char buf[BUFFER_SIZE]; /* buffer storage */
char *next; /* the next character to read */
char *endp; /* the first invalid character */
int seeneof; /* an end of file has been seen */
} files[2];
char lines[2][LINMAX];
int colflgs[3] = {1, 2, 3}; /* number of tabs + 1: 0 => no column */
static char *umsg = "Usage: comm [-[123]] file1 file2\n";
int main(int argc, char **argv);
void usage(void);
void error(char *s, char *f);
void eopen(char *fn, struct file *file);
int getbuf(struct file *file);
int readline(int fno);
void comm(void);
void putcol(int col, char *buf);
void cpycol(int col);
int main(argc, argv)
int argc;
char *argv[];
{
int cnt;
if (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
char *ap;
for (ap = &argv[1][1]; *ap; ap++) switch (*ap) {
case '1':
case '2':
case '3':
cnt = *ap - '1';
if (colflgs[cnt] == 0) break;
colflgs[cnt] = 0;
for (cnt++; cnt < 3; cnt++) colflgs[cnt]--;
break;
default: usage();
}
argc--;
argv++;
}
if (argc != 3) usage();
eopen(argv[1], &files[0]);
eopen(argv[2], &files[1]);
comm();
return(0);
}
void usage()
{
std_err(umsg);
exit(1);
}
void error(s, f)
char *s, *f;
{
std_err("comm: ");
std_err(s);
if (f) std_err(f);
std_err("\n");
exit(1);
}
void eopen(fn, file)
char *fn;
struct file *file;
{
file->name = fn;
file->next = file->endp = &file->buf[0];
file->seeneof = 0;
if (fn[0] == '-' && fn[1] == '\0')
file->fd = 0;
else if ((file->fd = open(fn, O_RDONLY)) < 0)
error("can't open ", fn);
}
int getbuf(file)
struct file *file;
{
/* Get a buffer-full from the file. Return true if no characters
* were obtained because we are at end of file.
*/
int n;
if (file->seeneof) return(1);
if ((n = read(file->fd, &file->buf[0], BUFFER_SIZE)) < 0)
error("read error on ", file->name);
if (n == 0) {
file->seeneof++;
return 1;
}
file->next = &file->buf[0];
file->endp = &file->buf[n];
return(0);
}
int readline(fno)
int fno;
{
/* Read up to the next '\n' character to buf.
* Return a complete line, even if end of file occurs within a line.
* Return false at end of file/
*/
register struct file *file = &files[fno];
char *buf = lines[fno];
if (file->next == file->endp && getbuf(file)) return(0);
while ((*buf++ = *file->next++) != '\n')
if (file->next == file->endp && getbuf(file)) {
*buf++ = '\n';
*buf = '\0';
return(1);
}
*buf = '\0';
return(1);
}
void comm()
{
register int res;
if (!readline(0)) {
cpycol(1);
return;
}
if (!readline(1)) {
putcol(0, lines[0]);
cpycol(0);
return;
}
for (;;) {
if ((res = strcmp(lines[0], lines[1])) != 0) {
res = res > 0;
putcol(res, lines[res]);
if (!readline(res)) {
putcol(!res, lines[!res]);
cpycol(!res);
return;
}
} else {
putcol(2, lines[0]); /* files[1]lin == f2lin */
if (!readline(0)) {
cpycol(1);
return;
}
if (!readline(1)) {
putcol(0, lines[0]);
cpycol(0);
return;
}
}
}
/* NOTREACHED */
}
void putcol(col, buf)
int col;
char *buf;
{
int cnt;
if (colflgs[col] == 0) return;
for (cnt = 0; cnt < colflgs[col] - 1; cnt++) printf("\t");
printf("%s", buf);
}
void cpycol(col)
int col;
{
if (colflgs[col]) while (readline(col))
putcol(col, lines[col]);
}

View file

@ -1,6 +1,6 @@
MAN= ash.1 at.1 \
bsfilt.1 cawf.1 chgrp.1 \
chmod.1 cmp.1 comm.1 compress.1 \
chmod.1 cmp.1 compress.1 \
cp.1 crc.1 crontab.1 dd.1 \
df.1 dhrystone.1 dosdir.1 dosread.1 doswrite.1 \
dumpcore.1 eject.1 \

View file

@ -1,49 +0,0 @@
.TH COMM 1
.SH NAME
comm \- print lines common to two sorted files
.SH SYNOPSIS
\fBcomm\fR [\fB\-123\fR] \fIfile1 file2\fR
.br
.de FL
.TP
\\fB\\$1\\fR
\\$2
..
.de EX
.TP 20
\\fB\\$1\\fR
# \\$2
..
.SH OPTIONS
.TP 5
.B \-1
# Suppress column 1 (lines present only in \fIfile1\fP)
.TP 5
.B \-2
# Suppress column 2 (lines present only in \fIfile2\fP)
.TP 5
.B \-3
# Suppress column 3 (lines present in both files)
.SH EXAMPLES
.TP 20
.B comm file1 file2
# Print all three columns
.TP 20
.B comm \-12 file1 file2
# Print only lines common to both files
.SH DESCRIPTION
.PP
Two sorted files are read and compared.
A three column listing is produced.
Files only in
.I file1
are in column 1;
files only in
.I file2
are in column 2;
files common to both files are in column 3.
The file name \- means \fIstdin\fR.
.SH "SEE ALSO"
.BR cmp (1),
.BR diff (1),
.BR sort (1).

View file

@ -154,6 +154,7 @@
2012/10/17 12:00:00,usr.bin/cksum
2012/10/17 12:00:00,usr.bin/col
2013/10/16 12:00:00,usr.bin/column
2012/10/17 12:00:00,usr.bin/comm
2012/10/17 12:00:00,usr.bin/ctags
2013/10/14 12:00:00,usr.bin/cut
2012/10/17 12:00:00,usr.bin/dirname

View file

@ -7,7 +7,7 @@ SUBDIR= asa \
banner basename bdes \
bzip2 bzip2recover \
cal chpass cksum \
col column ctags cut \
col column comm ctags cut \
dirname du \
env expand \
finger from \

6
usr.bin/comm/Makefile Normal file
View file

@ -0,0 +1,6 @@
# $NetBSD: Makefile,v 1.3 1995/03/26 09:25:49 glass Exp $
# @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= comm
.include <bsd.prog.mk>

103
usr.bin/comm/comm.1 Normal file
View file

@ -0,0 +1,103 @@
.\" $NetBSD: comm.1,v 1.11 2009/03/09 19:24:32 joerg Exp $
.\"
.\" Copyright (c) 1989, 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" the Institute of Electrical and Electronics Engineers, Inc.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)comm.1 8.1 (Berkeley) 6/6/93
.\"
.Dd June 6, 1993
.Dt COMM 1
.Os
.Sh NAME
.Nm comm
.Nd select or reject lines common to two files
.Sh SYNOPSIS
.Nm
.Op Fl 123f
.Ar file1 file2
.Sh DESCRIPTION
The
.Nm
utility reads
.Ar file1
and
.Ar file2 ,
which should be
sorted lexically, and produces three text
columns as output: lines only in
.Ar file1 ;
lines only in
.Ar file2 ;
and lines in both files.
.Pp
The filename ``-'' means the standard input.
.Pp
The following options are available:
.Bl -tag -width Ds
.It Fl 1
Suppress printing of column 1.
.It Fl 2
Suppress printing of column 2.
.It Fl 3
Suppress printing of column 3.
.It Fl f
Fold case in line comparisons.
.El
.Pp
Each column will have a number of tab characters prepended to it
equal to the number of lower numbered columns that are being printed.
For example, if column number two is being suppressed, lines printed
in column number one will not have any tabs preceding them, and lines
printed in column number three will have one.
.Pp
.Nm
assumes that the files are lexically sorted; all characters
participate in line comparisons.
.\" .Sh ENVIRONMENT
.\" .Bl -tag -width indent
.\" .It Ev LANG
.\" .It Ev LC_ALL
.\" .It Ev LC_CTYPE
.\" .It Ev LC_COLLATE
.\" .It Ev LC_MESSAGES
.\" .El
.Sh EXIT STATUS
.Nm
exits 0 on success, \*[Gt]0 if an error occurred.
.Sh SEE ALSO
.Xr cmp 1 ,
.Xr diff 1 ,
.Xr sort 1 ,
.Xr uniq 1
.Sh STANDARDS
The
.Nm
utility conforms to
.St -p1003.2-92 .

213
usr.bin/comm/comm.c Normal file
View file

@ -0,0 +1,213 @@
/* $NetBSD: comm.c,v 1.20 2012/09/05 04:01:23 simonb Exp $ */
/*
* Copyright (c) 1989, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Case Larsen.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1989, 1993, 1994\
The Regents of the University of California. All rights reserved.");
#endif /* not lint */
#ifndef lint
#if 0
static char sccsid[] = "@(#)comm.c 8.4 (Berkeley) 5/4/95";
#endif
__RCSID("$NetBSD: comm.c,v 1.20 2012/09/05 04:01:23 simonb Exp $");
#endif /* not lint */
#include <err.h>
#include <limits.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define MAXLINELEN (LINE_MAX + 1)
static const char *tabs[] = { "", "\t", "\t\t" };
static FILE *file(const char *);
static void show(FILE *, const char *, char *);
__dead static void usage(void);
static char *getnextln(char *buf, FILE *);
int
main(int argc, char **argv)
{
int comp, file1done, file2done, read1, read2;
int ch, flag1, flag2, flag3;
FILE *fp1, *fp2;
const char *col1, *col2, *col3, **p;
char line1[MAXLINELEN], line2[MAXLINELEN];
int (*compare)(const char*,const char*);
(void)setlocale(LC_ALL, "");
file1done = file2done = 0;
flag1 = flag2 = flag3 = 1;
compare = strcoll;
while ((ch = getopt(argc, argv, "123f")) != -1)
switch(ch) {
case '1':
flag1 = 0;
break;
case '2':
flag2 = 0;
break;
case '3':
flag3 = 0;
break;
case 'f':
compare = strcasecmp;
break;
case '?':
default:
usage();
}
argc -= optind;
argv += optind;
if (argc != 2)
usage();
fp1 = file(argv[0]);
fp2 = file(argv[1]);
/* for each column printed, add another tab offset */
p = tabs;
col1 = col2 = col3 = NULL;
if (flag1)
col1 = *p++;
if (flag2)
col2 = *p++;
if (flag3)
col3 = *p;
for (read1 = read2 = 1;;) {
/* read next line, check for EOF */
if (read1)
file1done = !getnextln(line1, fp1);
if (read2)
file2done = !getnextln(line2, fp2);
/* if one file done, display the rest of the other file */
if (file1done) {
if (!file2done && col2)
show(fp2, col2, line2);
break;
}
if (file2done) {
if (!file1done && col1)
show(fp1, col1, line1);
break;
}
/* lines are the same */
if (!(comp = compare(line1, line2))) {
read1 = read2 = 1;
if (col3)
if (printf("%s%s\n", col3, line1) < 0)
break;
continue;
}
/* lines are different */
if (comp < 0) {
read1 = 1;
read2 = 0;
if (col1)
if (printf("%s%s\n", col1, line1) < 0)
break;
} else {
read1 = 0;
read2 = 1;
if (col2)
if (printf("%s%s\n", col2, line2) < 0)
break;
}
}
if (ferror (stdout) || fclose (stdout) == EOF)
err(1, "stdout");
exit(0);
}
static void
show(FILE *fp, const char *offset, char *buf)
{
while (printf("%s%s\n", offset, buf) >= 0 && getnextln(buf, fp))
;
}
static FILE *
file(const char *name)
{
FILE *fp;
if (!strcmp(name, "-"))
return (stdin);
if ((fp = fopen(name, "r")) == NULL)
err(1, "%s", name);
return (fp);
}
static void
usage(void)
{
(void)fprintf(stderr, "usage: comm [-123f] file1 file2\n");
exit(1);
}
static char *
getnextln(char *buf, FILE *fp)
{
size_t i = 0;
int c;
while ((c = getc(fp)) != '\n' && c != EOF) {
buf[i++] = c;
if (i >= MAXLINELEN)
i--; /* consumes extra characters till newline */
}
if (c == EOF && !i)
return NULL;
buf[i] = 0;
return buf;
}