Importing usr.bin/wc

Change-Id: If174bd2bfc3ef38f21d45b5ba0567ac7d99dc608
This commit is contained in:
Lionel Sambuc 2013-08-23 18:42:14 +02:00
parent 17120a359b
commit 891fbc18d7
11 changed files with 511 additions and 199 deletions

View file

@ -28,7 +28,7 @@ SUBDIR= add_route arp ash at backup basename btrace \
syslogd tail tcpd tcpdp tcpstat tee telnet \
telnetd term termcap tget time touch tr \
truncate tty udpstat umount uname unexpand \
unstack update uud uue version vol wc \
unstack update uud uue version vol \
whereis which write writeisofs fetch \
xargs zdump zmodem pkgin_cd pkgin_all \
worldstone updateboot update_bootcfg

View file

@ -1,4 +0,0 @@
PROG= wc
MAN=
.include <bsd.prog.mk>

View file

@ -1,151 +0,0 @@
/* wc - count lines, words and characters Author: David Messer */
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
/*
*
* Usage: wc [-lwc] [names]
*
* Flags:
* l - count lines.
* w - count words.
* c - count characters.
*
* Flags l, w, and c are default.
* Words are delimited by any non-alphabetic character.
*
* Released into the PUBLIC-DOMAIN 02/10/86
*
* If you find this program to be of use to you, a donation of
* whatever you think it is worth will be cheerfully accepted.
*
* Written by: David L. Messer
* P.O. Box 19130, Mpls, MN, 55119
* Program (heavily) modified by Andy Tanenbaum
*/
int lflag; /* Count lines */
int wflag; /* Count words */
int cflag; /* Count characters */
long lcount; /* Count of lines */
long wcount; /* Count of words */
long ccount; /* Count of characters */
long ltotal; /* Total count of lines */
long wtotal; /* Total count of words */
long ctotal; /* Total count of characters */
int main(int argc, char **argv);
void count(FILE *f);
void usage(void);
int main(argc, argv)
int argc;
char *argv[];
{
int k;
char *cp;
int tflag, files;
/* Get flags. */
files = argc - 1;
k = 1;
cp = argv[1];
if (argc > 1 && *cp++ == '-') {
files--;
k++; /* points to first file */
while (*cp != 0) {
switch (*cp) {
case 'l': lflag++; break;
case 'w': wflag++; break;
case 'c': cflag++; break;
default: usage();
}
cp++;
}
}
/* If no flags are set, treat as wc -lwc. */
if (!lflag && !wflag && !cflag) {
lflag = 1;
wflag = 1;
cflag = 1;
}
/* Process files. */
tflag = files >= 2; /* set if # files > 1 */
/* Check to see if input comes from std input. */
if (k >= argc) {
count(stdin);
if (lflag) printf(" %6ld", lcount);
if (wflag) printf(" %6ld", wcount);
if (cflag) printf(" %6ld", ccount);
printf(" \n");
fflush(stdout);
exit(0);
}
/* There is an explicit list of files. Loop on files. */
while (k < argc) {
FILE *f;
if ((f = fopen(argv[k], "r")) == NULL) {
fprintf(stderr, "wc: cannot open %s\n", argv[k]);
} else {
count(f);
if (lflag) printf(" %6ld", lcount);
if (wflag) printf(" %6ld", wcount);
if (cflag) printf(" %6ld", ccount);
printf(" %s\n", argv[k]);
fclose(f);
}
k++;
}
if (tflag) {
if (lflag) printf(" %6ld", ltotal);
if (wflag) printf(" %6ld", wtotal);
if (cflag) printf(" %6ld", ctotal);
printf(" total\n");
}
fflush(stdout);
return(0);
}
void count(f)
FILE *f;
{
register int c;
register int word = 0;
lcount = 0;
wcount = 0;
ccount = 0L;
while ((c = getc(f)) != EOF) {
ccount++;
if (isspace(c)) {
if (word) wcount++;
word = 0;
} else {
word = 1;
}
if (c == '\n' || c == '\f') lcount++;
}
ltotal += lcount;
wtotal += wcount;
ctotal += ccount;
}
void usage()
{
fprintf(stderr, "Usage: wc [-lwc] [name ...]\n");
exit(1);
}

View file

@ -20,7 +20,7 @@ MAN= ash.1 at.1 basename.1 \
synctree.1 sysenv.1 sz.1 tail.1 tee.1 telnet.1 template.1 \
term.1 termcap.1 tget.1 time.1 tr.1 true.1 \
truncate.1 tty.1 umount.1 uname.1 unexpand.1 \
uud.1 uue.1 vol.1 wc.1 whereis.1 which.1 \
uud.1 uue.1 vol.1 whereis.1 which.1 \
write.1 xargs.1 yap.1 linkfarm.1 pkg_view.1
MLINKS += ash.1 sh.1

View file

@ -1,40 +0,0 @@
.TH WC 1
.SH NAME
wc \- count characters, words, and lines in a file
.SH SYNOPSIS
\fBwc\fR [\fB\-clw\fR] \fIfile\fR ...\fR
.br
.de FL
.TP
\\fB\\$1\\fR
\\$2
..
.de EX
.TP 20
\\fB\\$1\\fR
# \\$2
..
.SH OPTIONS
.TP 5
.B \-c
# Print character count
.TP 5
.B \-l
# Print line count
.TP 5
.B \-w
# Print word count
.SH EXAMPLES
.TP 20
.B wc file1 file2
# Print all three counts for both files
.TP 20
.B wc \-l file
# Print line count only
.SH DESCRIPTION
.PP
.I Wc
reads each argument and computes the number of characters, words and lines
it contains.
A word is delimited by white space (space, tab, or line feed).
If no flags are present, all three counts are printed.

View file

@ -188,6 +188,7 @@
2012/10/17 12:00:00,usr.bin/tput
2012/10/17 12:00:00,usr.bin/tsort
2010/10/06 07:59:18,usr.bin/uniq
2012/10/17 12:00:00,usr.bin/wc
2013/03/22 12:00:00,usr.bin/whatis
2013/03/15 12:00:00,usr.bin/who
2012/10/17 12:00:00,usr.bin/xinstall

View file

@ -31,7 +31,7 @@ SUBDIR= \
uniq \
\
\
whatis who \
wc whatis who \
xinstall yes
.if !defined(__MINIX)

6
usr.bin/wc/Makefile Normal file
View file

@ -0,0 +1,6 @@
# $NetBSD: Makefile,v 1.4 1997/10/18 16:48:29 mrg Exp $
# from: @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= wc
.include <bsd.prog.mk>

146
usr.bin/wc/wc.1 Normal file
View file

@ -0,0 +1,146 @@
.\" $NetBSD: wc.1,v 1.15 2010/02/18 13:14:51 wiz Exp $
.\"
.\" Copyright (c) 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" the Institute of Electrical and Electronics Engineers, Inc.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" from: @(#)wc.1 8.2 (Berkeley) 4/19/94
.\"
.Dd February 18, 2010
.Dt WC 1
.Os
.Sh NAME
.Nm wc
.Nd word, line, and byte count
.Sh SYNOPSIS
.Nm
.Op Fl c | Fl m
.Op Fl Llw
.Op Ar file ...
.Sh DESCRIPTION
The
.Nm
utility displays the number of lines, words, bytes and characters contained in each
input
.Ar file
(or standard input, by default) to the standard output.
A line is defined as a string of characters delimited by a \*[Lt]newline\*[Gt]
character,
and a word is defined as a string of characters delimited by white space
characters.
White space characters are the set of characters for which the
.Xr iswspace 3
function returns true.
If more than one input file is specified, a line of cumulative counts
for all the files is displayed on a separate line after the output for
the last file.
.Pp
The following options are available:
.Bl -tag -width Ds
.It Fl c
The number of bytes in each input file
is written to the standard output.
.It Fl L
The number of characters in the longest line of each input file
is written to the standard output.
.It Fl l
The number of lines in each input file
is written to the standard output.
.It Fl m
The number of characters in each input file
is written to the standard output.
.It Fl w
The number of words in each input file
is written to the standard output.
.El
.Pp
When an option is specified,
.Nm
only
reports the
information requested by that option.
The default action is equivalent to all the flags
.Fl clw
having been specified.
.Pp
The following operands are available:
.Bl -tag -width Ds
.It Ar file
A pathname of an input file.
.El
.Pp
If no file names
are specified, the standard input is used and
no file name is displayed.
.Pp
By default, the standard output contains a line for each
input file of the form:
.Bd -literal -offset indent
lines words bytes file_name
.Ed
.Sh EXIT STATUS
.Ex -std wc
.Sh SEE ALSO
.Xr iswspace 3
.Sh COMPATIBILITY
Historically, the
.Nm
utility was documented to define a word as a ``maximal string of
characters delimited by
.Aq space ,
.Aq tab
or
.Aq newline
characters''.
The implementation, however, didn't handle non-printing characters
correctly so that `` ^D^E '' counted as 6 spaces, while ``foo^D^Ebar''
counted as 8 characters.
.Bx 4
systems after
.Bx 4.3
modified the implementation to be consistent
with the documentation.
This implementation defines a ``word'' in terms of the
.Xr iswspace 3
function, as required by
.St -p1003.2 .
.Pp
The
.Fl L
option is a non-standard extension, compatible with the
.Fl L
option of the GNU and
.Fx
.Nm
utilities.
.Sh STANDARDS
The
.Nm
utility conforms to
.St -p1003.2-92 .

354
usr.bin/wc/wc.c Normal file
View file

@ -0,0 +1,354 @@
/* $NetBSD: wc.c,v 1.35 2011/09/16 15:39:30 joerg Exp $ */
/*
* Copyright (c) 1980, 1987, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\
The Regents of the University of California. All rights reserved.");
#endif /* not lint */
#ifndef lint
#if 0
static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95";
#else
__RCSID("$NetBSD: wc.c,v 1.35 2011/09/16 15:39:30 joerg Exp $");
#endif
#endif /* not lint */
/* wc line, word, char count and optionally longest line. */
#include <sys/param.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <ctype.h>
#include <fcntl.h>
#include <err.h>
#include <errno.h>
#include <locale.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include <wctype.h>
#ifdef NO_QUAD
typedef u_long wc_count_t;
# define WCFMT " %7lu"
# define WCCAST unsigned long
#else
typedef u_quad_t wc_count_t;
# define WCFMT " %7llu"
# define WCCAST unsigned long long
#endif
static wc_count_t tlinect, twordct, tcharct, tlongest;
static bool doline, doword, dobyte, dochar, dolongest;
static int rval = 0;
static void cnt(const char *);
static void print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t,
const char *);
__dead static void usage(void);
static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *,
size_t *, const char *);
int
main(int argc, char *argv[])
{
int ch;
setlocale(LC_ALL, "");
while ((ch = getopt(argc, argv, "lwcmL")) != -1)
switch (ch) {
case 'l':
doline = true;
break;
case 'w':
doword = true;
break;
case 'm':
dochar = true;
dobyte = 0;
break;
case 'c':
dochar = 0;
dobyte = true;
break;
case 'L':
dolongest = true;
break;
case '?':
default:
usage();
}
argv += optind;
argc -= optind;
/* Wc's flags are on by default. */
if (!(doline || doword || dobyte || dochar || dolongest))
doline = doword = dobyte = true;
if (*argv == NULL) {
cnt(NULL);
} else {
bool dototal = (argc > 1);
do {
cnt(*argv);
} while(*++argv);
if (dototal) {
print_counts(tlinect, twordct, tcharct, tlongest,
"total");
}
}
exit(rval);
}
static size_t
do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st,
size_t *retcnt, const char *file)
{
size_t r;
size_t c = 0;
do {
r = mbrtowc(wc, p, len, st);
if (r == (size_t)-1) {
warnx("%s: invalid byte sequence", file);
rval = 1;
/* XXX skip 1 byte */
len--;
p++;
memset(st, 0, sizeof(*st));
continue;
} else if (r == (size_t)-2)
break;
else if (r == 0)
r = 1;
c++;
if (wc)
wc++;
len -= r;
p += r;
} while (len > 0);
*retcnt = c;
return (r);
}
static void
cnt(const char *file)
{
u_char buf[MAXBSIZE];
wchar_t wbuf[MAXBSIZE];
struct stat sb;
wc_count_t charct, linect, wordct, longest;
mbstate_t st;
u_char *C;
wchar_t *WC;
const char *name; /* filename or <stdin> */
size_t r = 0;
int fd, len = 0;
linect = wordct = charct = longest = 0;
if (file != NULL) {
if ((fd = open(file, O_RDONLY, 0)) < 0) {
warn("%s", file);
rval = 1;
return;
}
name = file;
} else {
fd = STDIN_FILENO;
name = "<stdin>";
}
if (dochar || doword || dolongest)
(void)memset(&st, 0, sizeof(st));
if (!(doword || dolongest)) {
/*
* line counting is split out because it's a lot
* faster to get lines than to get words, since
* the word count requires some logic.
*/
if (doline || dochar) {
while ((len = read(fd, buf, MAXBSIZE)) > 0) {
if (dochar) {
size_t wlen;
r = do_mb(0, (char *)buf, (size_t)len,
&st, &wlen, name);
charct += wlen;
} else if (dobyte)
charct += len;
if (doline) {
for (C = buf; len--; ++C) {
if (*C == '\n')
++linect;
}
}
}
}
/*
* if all we need is the number of characters and
* it's a directory or a regular or linked file, just
* stat the puppy. We avoid testing for it not being
* a special device in case someone adds a new type
* of inode.
*/
else if (dobyte) {
if (fstat(fd, &sb)) {
warn("%s", name);
rval = 1;
} else {
if (S_ISREG(sb.st_mode) ||
S_ISLNK(sb.st_mode) ||
S_ISDIR(sb.st_mode)) {
charct = sb.st_size;
} else {
while ((len =
read(fd, buf, MAXBSIZE)) > 0)
charct += len;
}
}
}
} else {
/* do it the hard way... */
wc_count_t linelen;
bool gotsp;
linelen = 0;
gotsp = true;
while ((len = read(fd, buf, MAXBSIZE)) > 0) {
size_t wlen;
r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen,
name);
if (dochar) {
charct += wlen;
} else if (dobyte) {
charct += len;
}
for (WC = wbuf; wlen--; ++WC) {
if (iswspace(*WC)) {
gotsp = true;
if (*WC == L'\n') {
++linect;
if (linelen > longest)
longest = linelen;
linelen = 0;
} else {
linelen++;
}
} else {
/*
* This line implements the POSIX
* spec, i.e. a word is a "maximal
* string of characters delimited by
* whitespace." Notice nothing was
* said about a character being
* printing or non-printing.
*/
if (gotsp) {
gotsp = false;
++wordct;
}
linelen++;
}
}
}
}
if (len == -1) {
warn("%s", name);
rval = 1;
}
if (dochar && r == (size_t)-2) {
warnx("%s: incomplete multibyte character", name);
rval = 1;
}
print_counts(linect, wordct, charct, longest, file);
/*
* don't bother checkint doline, doword, or dobyte --- speeds
* up the common case
*/
tlinect += linect;
twordct += wordct;
tcharct += charct;
if (dolongest && longest > tlongest)
tlongest = longest;
if (close(fd)) {
warn("%s", name);
rval = 1;
}
}
static void
print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars,
wc_count_t longest, const char *name)
{
if (doline)
(void)printf(WCFMT, (WCCAST)lines);
if (doword)
(void)printf(WCFMT, (WCCAST)words);
if (dobyte || dochar)
(void)printf(WCFMT, (WCCAST)chars);
if (dolongest)
(void)printf(WCFMT, (WCCAST)longest);
if (name != NULL)
(void)printf(" %s\n", name);
else
(void)putchar('\n');
}
static void
usage(void)
{
(void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n");
exit(1);
}