From 891fbc18d7304e58ad6307401f297fba2f1b1f8f Mon Sep 17 00:00:00 2001 From: Lionel Sambuc Date: Fri, 23 Aug 2013 18:42:14 +0200 Subject: [PATCH] Importing usr.bin/wc Change-Id: If174bd2bfc3ef38f21d45b5ba0567ac7d99dc608 --- commands/Makefile | 2 +- commands/wc/Makefile | 4 - commands/wc/wc.c | 151 ----------------- man/man1/Makefile | 2 +- man/man1/wc.1 | 40 ----- releasetools/nbsd_ports | 1 + test/testsh1.sh | 2 +- usr.bin/Makefile | 2 +- usr.bin/wc/Makefile | 6 + usr.bin/wc/wc.1 | 146 +++++++++++++++++ usr.bin/wc/wc.c | 354 ++++++++++++++++++++++++++++++++++++++++ 11 files changed, 511 insertions(+), 199 deletions(-) delete mode 100644 commands/wc/Makefile delete mode 100644 commands/wc/wc.c delete mode 100644 man/man1/wc.1 create mode 100644 usr.bin/wc/Makefile create mode 100644 usr.bin/wc/wc.1 create mode 100644 usr.bin/wc/wc.c diff --git a/commands/Makefile b/commands/Makefile index f67b01405..b539b7f2c 100644 --- a/commands/Makefile +++ b/commands/Makefile @@ -28,7 +28,7 @@ SUBDIR= add_route arp ash at backup basename btrace \ syslogd tail tcpd tcpdp tcpstat tee telnet \ telnetd term termcap tget time touch tr \ truncate tty udpstat umount uname unexpand \ - unstack update uud uue version vol wc \ + unstack update uud uue version vol \ whereis which write writeisofs fetch \ xargs zdump zmodem pkgin_cd pkgin_all \ worldstone updateboot update_bootcfg diff --git a/commands/wc/Makefile b/commands/wc/Makefile deleted file mode 100644 index 9fba7b91f..000000000 --- a/commands/wc/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -PROG= wc -MAN= - -.include diff --git a/commands/wc/wc.c b/commands/wc/wc.c deleted file mode 100644 index b978d21c4..000000000 --- a/commands/wc/wc.c +++ /dev/null @@ -1,151 +0,0 @@ -/* wc - count lines, words and characters Author: David Messer */ - -#include -#include -#include - -/* - * - * Usage: wc [-lwc] [names] - * - * Flags: - * l - count lines. - * w - count words. - * c - count characters. - * - * Flags l, w, and c are default. - * Words are delimited by any non-alphabetic character. - * - * Released into the PUBLIC-DOMAIN 02/10/86 - * - * If you find this program to be of use to you, a donation of - * whatever you think it is worth will be cheerfully accepted. - * - * Written by: David L. Messer - * P.O. Box 19130, Mpls, MN, 55119 - * Program (heavily) modified by Andy Tanenbaum - */ - - -int lflag; /* Count lines */ -int wflag; /* Count words */ -int cflag; /* Count characters */ - -long lcount; /* Count of lines */ -long wcount; /* Count of words */ -long ccount; /* Count of characters */ - -long ltotal; /* Total count of lines */ -long wtotal; /* Total count of words */ -long ctotal; /* Total count of characters */ - -int main(int argc, char **argv); -void count(FILE *f); -void usage(void); - -int main(argc, argv) -int argc; -char *argv[]; -{ - int k; - char *cp; - int tflag, files; - - /* Get flags. */ - files = argc - 1; - k = 1; - cp = argv[1]; - if (argc > 1 && *cp++ == '-') { - files--; - k++; /* points to first file */ - while (*cp != 0) { - switch (*cp) { - case 'l': lflag++; break; - case 'w': wflag++; break; - case 'c': cflag++; break; - default: usage(); - } - cp++; - } - } - - /* If no flags are set, treat as wc -lwc. */ - if (!lflag && !wflag && !cflag) { - lflag = 1; - wflag = 1; - cflag = 1; - } - - /* Process files. */ - tflag = files >= 2; /* set if # files > 1 */ - - /* Check to see if input comes from std input. */ - if (k >= argc) { - count(stdin); - if (lflag) printf(" %6ld", lcount); - if (wflag) printf(" %6ld", wcount); - if (cflag) printf(" %6ld", ccount); - printf(" \n"); - fflush(stdout); - exit(0); - } - - /* There is an explicit list of files. Loop on files. */ - while (k < argc) { - FILE *f; - - if ((f = fopen(argv[k], "r")) == NULL) { - fprintf(stderr, "wc: cannot open %s\n", argv[k]); - } else { - count(f); - if (lflag) printf(" %6ld", lcount); - if (wflag) printf(" %6ld", wcount); - if (cflag) printf(" %6ld", ccount); - printf(" %s\n", argv[k]); - fclose(f); - } - k++; - } - - if (tflag) { - if (lflag) printf(" %6ld", ltotal); - if (wflag) printf(" %6ld", wtotal); - if (cflag) printf(" %6ld", ctotal); - printf(" total\n"); - } - fflush(stdout); - return(0); -} - -void count(f) -FILE *f; -{ - register int c; - register int word = 0; - - lcount = 0; - wcount = 0; - ccount = 0L; - - while ((c = getc(f)) != EOF) { - ccount++; - - if (isspace(c)) { - if (word) wcount++; - word = 0; - } else { - word = 1; - } - - if (c == '\n' || c == '\f') lcount++; - } - ltotal += lcount; - wtotal += wcount; - ctotal += ccount; -} - -void usage() -{ - fprintf(stderr, "Usage: wc [-lwc] [name ...]\n"); - exit(1); -} diff --git a/man/man1/Makefile b/man/man1/Makefile index 62acaf331..fd4cc970e 100644 --- a/man/man1/Makefile +++ b/man/man1/Makefile @@ -20,7 +20,7 @@ MAN= ash.1 at.1 basename.1 \ synctree.1 sysenv.1 sz.1 tail.1 tee.1 telnet.1 template.1 \ term.1 termcap.1 tget.1 time.1 tr.1 true.1 \ truncate.1 tty.1 umount.1 uname.1 unexpand.1 \ - uud.1 uue.1 vol.1 wc.1 whereis.1 which.1 \ + uud.1 uue.1 vol.1 whereis.1 which.1 \ write.1 xargs.1 yap.1 linkfarm.1 pkg_view.1 MLINKS += ash.1 sh.1 diff --git a/man/man1/wc.1 b/man/man1/wc.1 deleted file mode 100644 index 65184fed3..000000000 --- a/man/man1/wc.1 +++ /dev/null @@ -1,40 +0,0 @@ -.TH WC 1 -.SH NAME -wc \- count characters, words, and lines in a file -.SH SYNOPSIS -\fBwc\fR [\fB\-clw\fR] \fIfile\fR ...\fR -.br -.de FL -.TP -\\fB\\$1\\fR -\\$2 -.. -.de EX -.TP 20 -\\fB\\$1\\fR -# \\$2 -.. -.SH OPTIONS -.TP 5 -.B \-c -# Print character count -.TP 5 -.B \-l -# Print line count -.TP 5 -.B \-w -# Print word count -.SH EXAMPLES -.TP 20 -.B wc file1 file2 -# Print all three counts for both files -.TP 20 -.B wc \-l file -# Print line count only -.SH DESCRIPTION -.PP -.I Wc -reads each argument and computes the number of characters, words and lines -it contains. -A word is delimited by white space (space, tab, or line feed). -If no flags are present, all three counts are printed. diff --git a/releasetools/nbsd_ports b/releasetools/nbsd_ports index d4cf29b20..765e228fc 100644 --- a/releasetools/nbsd_ports +++ b/releasetools/nbsd_ports @@ -188,6 +188,7 @@ 2012/10/17 12:00:00,usr.bin/tput 2012/10/17 12:00:00,usr.bin/tsort 2010/10/06 07:59:18,usr.bin/uniq +2012/10/17 12:00:00,usr.bin/wc 2013/03/22 12:00:00,usr.bin/whatis 2013/03/15 12:00:00,usr.bin/who 2012/10/17 12:00:00,usr.bin/xinstall diff --git a/test/testsh1.sh b/test/testsh1.sh index 5e43f8954..c094d8434 100755 --- a/test/testsh1.sh +++ b/test/testsh1.sh @@ -49,7 +49,7 @@ rm -rf foo #Test cat cat num num num num num >y wc -c y >x1 -echo " 55 y" >x2 +echo " 55 y" >x2 if cmp -s x1 x2; then : ; else bomb "Error on cat test 1"; fi cat z if cmp -s y z; then : ; else bomb "Error on cat test 2"; fi diff --git a/usr.bin/Makefile b/usr.bin/Makefile index 502f64d50..4e99dbd71 100644 --- a/usr.bin/Makefile +++ b/usr.bin/Makefile @@ -31,7 +31,7 @@ SUBDIR= \ uniq \ \ \ - whatis who \ + wc whatis who \ xinstall yes .if !defined(__MINIX) diff --git a/usr.bin/wc/Makefile b/usr.bin/wc/Makefile new file mode 100644 index 000000000..55ac91815 --- /dev/null +++ b/usr.bin/wc/Makefile @@ -0,0 +1,6 @@ +# $NetBSD: Makefile,v 1.4 1997/10/18 16:48:29 mrg Exp $ +# from: @(#)Makefile 8.1 (Berkeley) 6/6/93 + +PROG= wc + +.include diff --git a/usr.bin/wc/wc.1 b/usr.bin/wc/wc.1 new file mode 100644 index 000000000..81702f99f --- /dev/null +++ b/usr.bin/wc/wc.1 @@ -0,0 +1,146 @@ +.\" $NetBSD: wc.1,v 1.15 2010/02/18 13:14:51 wiz Exp $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" from: @(#)wc.1 8.2 (Berkeley) 4/19/94 +.\" +.Dd February 18, 2010 +.Dt WC 1 +.Os +.Sh NAME +.Nm wc +.Nd word, line, and byte count +.Sh SYNOPSIS +.Nm +.Op Fl c | Fl m +.Op Fl Llw +.Op Ar file ... +.Sh DESCRIPTION +The +.Nm +utility displays the number of lines, words, bytes and characters contained in each +input +.Ar file +(or standard input, by default) to the standard output. +A line is defined as a string of characters delimited by a \*[Lt]newline\*[Gt] +character, +and a word is defined as a string of characters delimited by white space +characters. +White space characters are the set of characters for which the +.Xr iswspace 3 +function returns true. +If more than one input file is specified, a line of cumulative counts +for all the files is displayed on a separate line after the output for +the last file. +.Pp +The following options are available: +.Bl -tag -width Ds +.It Fl c +The number of bytes in each input file +is written to the standard output. +.It Fl L +The number of characters in the longest line of each input file +is written to the standard output. +.It Fl l +The number of lines in each input file +is written to the standard output. +.It Fl m +The number of characters in each input file +is written to the standard output. +.It Fl w +The number of words in each input file +is written to the standard output. +.El +.Pp +When an option is specified, +.Nm +only +reports the +information requested by that option. +The default action is equivalent to all the flags +.Fl clw +having been specified. +.Pp +The following operands are available: +.Bl -tag -width Ds +.It Ar file +A pathname of an input file. +.El +.Pp +If no file names +are specified, the standard input is used and +no file name is displayed. +.Pp +By default, the standard output contains a line for each +input file of the form: +.Bd -literal -offset indent +lines words bytes file_name +.Ed +.Sh EXIT STATUS +.Ex -std wc +.Sh SEE ALSO +.Xr iswspace 3 +.Sh COMPATIBILITY +Historically, the +.Nm +utility was documented to define a word as a ``maximal string of +characters delimited by +.Aq space , +.Aq tab +or +.Aq newline +characters''. +The implementation, however, didn't handle non-printing characters +correctly so that `` ^D^E '' counted as 6 spaces, while ``foo^D^Ebar'' +counted as 8 characters. +.Bx 4 +systems after +.Bx 4.3 +modified the implementation to be consistent +with the documentation. +This implementation defines a ``word'' in terms of the +.Xr iswspace 3 +function, as required by +.St -p1003.2 . +.Pp +The +.Fl L +option is a non-standard extension, compatible with the +.Fl L +option of the GNU and +.Fx +.Nm +utilities. +.Sh STANDARDS +The +.Nm +utility conforms to +.St -p1003.2-92 . diff --git a/usr.bin/wc/wc.c b/usr.bin/wc/wc.c new file mode 100644 index 000000000..602a4508d --- /dev/null +++ b/usr.bin/wc/wc.c @@ -0,0 +1,354 @@ +/* $NetBSD: wc.c,v 1.35 2011/09/16 15:39:30 joerg Exp $ */ + +/* + * Copyright (c) 1980, 1987, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\ + The Regents of the University of California. All rights reserved."); +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95"; +#else +__RCSID("$NetBSD: wc.c,v 1.35 2011/09/16 15:39:30 joerg Exp $"); +#endif +#endif /* not lint */ + +/* wc line, word, char count and optionally longest line. */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef NO_QUAD +typedef u_long wc_count_t; +# define WCFMT " %7lu" +# define WCCAST unsigned long +#else +typedef u_quad_t wc_count_t; +# define WCFMT " %7llu" +# define WCCAST unsigned long long +#endif + +static wc_count_t tlinect, twordct, tcharct, tlongest; +static bool doline, doword, dobyte, dochar, dolongest; +static int rval = 0; + +static void cnt(const char *); +static void print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t, + const char *); +__dead static void usage(void); +static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *, + size_t *, const char *); + +int +main(int argc, char *argv[]) +{ + int ch; + + setlocale(LC_ALL, ""); + + while ((ch = getopt(argc, argv, "lwcmL")) != -1) + switch (ch) { + case 'l': + doline = true; + break; + case 'w': + doword = true; + break; + case 'm': + dochar = true; + dobyte = 0; + break; + case 'c': + dochar = 0; + dobyte = true; + break; + case 'L': + dolongest = true; + break; + case '?': + default: + usage(); + } + argv += optind; + argc -= optind; + + /* Wc's flags are on by default. */ + if (!(doline || doword || dobyte || dochar || dolongest)) + doline = doword = dobyte = true; + + if (*argv == NULL) { + cnt(NULL); + } else { + bool dototal = (argc > 1); + + do { + cnt(*argv); + } while(*++argv); + + if (dototal) { + print_counts(tlinect, twordct, tcharct, tlongest, + "total"); + } + } + + exit(rval); +} + +static size_t +do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st, + size_t *retcnt, const char *file) +{ + size_t r; + size_t c = 0; + + do { + r = mbrtowc(wc, p, len, st); + if (r == (size_t)-1) { + warnx("%s: invalid byte sequence", file); + rval = 1; + + /* XXX skip 1 byte */ + len--; + p++; + memset(st, 0, sizeof(*st)); + continue; + } else if (r == (size_t)-2) + break; + else if (r == 0) + r = 1; + c++; + if (wc) + wc++; + len -= r; + p += r; + } while (len > 0); + + *retcnt = c; + + return (r); +} + +static void +cnt(const char *file) +{ + u_char buf[MAXBSIZE]; + wchar_t wbuf[MAXBSIZE]; + struct stat sb; + wc_count_t charct, linect, wordct, longest; + mbstate_t st; + u_char *C; + wchar_t *WC; + const char *name; /* filename or */ + size_t r = 0; + int fd, len = 0; + + linect = wordct = charct = longest = 0; + if (file != NULL) { + if ((fd = open(file, O_RDONLY, 0)) < 0) { + warn("%s", file); + rval = 1; + return; + } + name = file; + } else { + fd = STDIN_FILENO; + name = ""; + } + + if (dochar || doword || dolongest) + (void)memset(&st, 0, sizeof(st)); + + if (!(doword || dolongest)) { + /* + * line counting is split out because it's a lot + * faster to get lines than to get words, since + * the word count requires some logic. + */ + if (doline || dochar) { + while ((len = read(fd, buf, MAXBSIZE)) > 0) { + if (dochar) { + size_t wlen; + + r = do_mb(0, (char *)buf, (size_t)len, + &st, &wlen, name); + charct += wlen; + } else if (dobyte) + charct += len; + if (doline) { + for (C = buf; len--; ++C) { + if (*C == '\n') + ++linect; + } + } + } + } + + /* + * if all we need is the number of characters and + * it's a directory or a regular or linked file, just + * stat the puppy. We avoid testing for it not being + * a special device in case someone adds a new type + * of inode. + */ + else if (dobyte) { + if (fstat(fd, &sb)) { + warn("%s", name); + rval = 1; + } else { + if (S_ISREG(sb.st_mode) || + S_ISLNK(sb.st_mode) || + S_ISDIR(sb.st_mode)) { + charct = sb.st_size; + } else { + while ((len = + read(fd, buf, MAXBSIZE)) > 0) + charct += len; + } + } + } + } else { + /* do it the hard way... */ + wc_count_t linelen; + bool gotsp; + + linelen = 0; + gotsp = true; + while ((len = read(fd, buf, MAXBSIZE)) > 0) { + size_t wlen; + + r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen, + name); + if (dochar) { + charct += wlen; + } else if (dobyte) { + charct += len; + } + for (WC = wbuf; wlen--; ++WC) { + if (iswspace(*WC)) { + gotsp = true; + if (*WC == L'\n') { + ++linect; + if (linelen > longest) + longest = linelen; + linelen = 0; + } else { + linelen++; + } + } else { + /* + * This line implements the POSIX + * spec, i.e. a word is a "maximal + * string of characters delimited by + * whitespace." Notice nothing was + * said about a character being + * printing or non-printing. + */ + if (gotsp) { + gotsp = false; + ++wordct; + } + + linelen++; + } + } + } + } + + if (len == -1) { + warn("%s", name); + rval = 1; + } + if (dochar && r == (size_t)-2) { + warnx("%s: incomplete multibyte character", name); + rval = 1; + } + + print_counts(linect, wordct, charct, longest, file); + + /* + * don't bother checkint doline, doword, or dobyte --- speeds + * up the common case + */ + tlinect += linect; + twordct += wordct; + tcharct += charct; + if (dolongest && longest > tlongest) + tlongest = longest; + + if (close(fd)) { + warn("%s", name); + rval = 1; + } +} + +static void +print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars, + wc_count_t longest, const char *name) +{ + + if (doline) + (void)printf(WCFMT, (WCCAST)lines); + if (doword) + (void)printf(WCFMT, (WCCAST)words); + if (dobyte || dochar) + (void)printf(WCFMT, (WCCAST)chars); + if (dolongest) + (void)printf(WCFMT, (WCCAST)longest); + + if (name != NULL) + (void)printf(" %s\n", name); + else + (void)putchar('\n'); +} + +static void +usage(void) +{ + + (void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n"); + exit(1); +}