Importing usr.bin/tr

Change-Id: I4563198f111f2ec3687f6a5084edd05f243c2263
2013-08-24 22:11:19 +02:00 · 2013-08-24 22:11:19 +02:00 · d5c6c6a51b
commit d5c6c6a51b
parent 00785f561f
11 changed files with 494 additions and 231 deletions
--- a/commands/Makefile
+++ b/commands/Makefile
@ -26,7 +26,7 @@ SUBDIR=	add_route arp ash at backup btrace \
 	sleep slip spell split sprofalyze sprofdiff srccrc \
 	stty svclog svrctl swifi synctree sysenv \
 	syslogd tail tcpd tcpdp tcpstat tee telnet \
-	telnetd term termcap tget time touch tr \
+	telnetd term termcap tget time touch \
 	truncate tty udpstat umount uname \
 	unstack update uud uue version vol \
 	whereis which write writeisofs fetch \
--- a/commands/tr/Makefile
+++ b/commands/tr/Makefile
@ -1,5 +0,0 @@
-PROG=	tr
-SRCS=	tr.c str.c
-MAN=
-
-.include <bsd.prog.mk>
--- a/man/man1/Makefile
+++ b/man/man1/Makefile
@ -18,7 +18,7 @@ MAN=	ash.1 at.1 \
 	shar.1 sleep.1 spell.1 \
 	split.1 stty.1 svc.1 svrctl.1 \
 	synctree.1 sysenv.1 sz.1 tail.1 tee.1 telnet.1 template.1 \
-	term.1 termcap.1 tget.1 time.1 tr.1 true.1 \
+	term.1 termcap.1 tget.1 time.1 true.1 \
 	truncate.1 tty.1 umount.1 uname.1 \
 	uud.1 uue.1 vol.1 whereis.1 which.1 \
 	write.1 yap.1 linkfarm.1 pkg_view.1
--- a/man/man1/tr.1
+++ b/man/man1/tr.1
@ -1,70 +0,0 @@
-.TH TR 1
-.SH NAME
-tr \- translate character codes
-.SH SYNOPSIS
-\fBtr\fR [\fB\-cds\fR]\fR [\fIstring1\fR] [\fIstring2\fR]\fR
-.br
-.de FL
-.TP
-\\fB\\$1\\fR
-\\$2
-..
-.de EX
-.TP 20
-\\fB\\$1\\fR
-# \\$2
-..
-.SH OPTIONS
-.TP 5
-.B \-c
-# Complement the set of characters in \fIstring1\fR
-.TP 5
-.B \-d
-# Delete all characters specified in \fIstring1\fR
-.TP 5
-.B \-s
-# Squeeze all runs of characters in \fIstring1\fR to one character
-.SH EXAMPLES
-.TP 20
-.B tr \(fmA\-Z\(fm \(fma\-z\(fm <x >y
-# Convert upper case to lower case
-.TP 20
-.B tr \-d \(fm0123456789\(fm <f1 >f2
-# Delete all digits from \fIf1\fR
-.SH DESCRIPTION
-.PP
-.I Tr
-performs simple character translation.
-When no flag is specified, each character in 
-.I string1
-is mapped onto the corresponding character in
-.I string2 .
-.PP
-There are two types of
-.I tr
-out there, one that requires [ and ] for character classes, and one that does
-not.  Here is what the example above would look like for a
-.I tr
-that needs the brackets:
-.PP
-.RS
-.B "tr \(fm[A\-Z]\(fm \(fm[a\-z]\(fm <x >y"
-.RE
-.PP
-Use [ and ] if you want to be portable, because a
-.I tr
-that doesn't need them will still accept the syntax and mindlessly
-translate [ into [ and ] into ].
-.PP
-MINIX tr supports the following character classes: alnum, alpha, digit, lower,
-upper and xdigit. If any of these keywords is encountered between backets and 
-colons, it is replaced by respectively alphanumeric characters, alphabetic
-characters, decimal digits, lowercase letters, uppercase letters and 
-hexadecimal digits. The following are equivalent with the given examples:
-.TP 20
-.B tr \(fm[:upper:]\(fm \(fm[:lower:]\(fm <x >y
-# Convert upper case to lower case
-.TP 20
-.B tr \-d \(fm[:digit:]\(fm <f1 >f2
-# Delete all digits from \fIf1\fR
-
--- a/releasetools/nbsd_ports
+++ b/releasetools/nbsd_ports
@ -189,6 +189,7 @@
 2012/02/10 16:16:12,usr.bin/su
 2012/06/01 12:08:40,usr.bin/tic
 2012/10/17 12:00:00,usr.bin/tput
+2012/10/17 12:00:00,usr.bin/tr
 2012/10/17 12:00:00,usr.bin/tsort
 2010/10/06 07:59:18,usr.bin/uniq
 2012/10/17 12:00:00,usr.bin/wc
--- a/usr.bin/Makefile
+++ b/usr.bin/Makefile
@ -26,7 +26,7 @@ SUBDIR= \
 	shuffle sed seq \
 	sort stat su \
 	tic tput \
-	tsort unexpand \
+	tr tsort unexpand \
 	toproto \
 	uniq \
 	\
--- a/usr.bin/tr/Makefile
+++ b/usr.bin/tr/Makefile
@ -0,0 +1,11 @@
+#	$NetBSD: Makefile,v 1.8 2012/08/10 12:10:28 joerg Exp $
+#	@(#)Makefile	8.1 (Berkeley) 6/6/93
+
+PROG=	tr
+SRCS=	str.c tr.c
+
+.for f in str tr
+COPTS.${f}.c+=  -Wno-pointer-sign
+.endfor
+
+.include <bsd.prog.mk>
--- a/usr.bin/tr/extern.h
+++ b/usr.bin/tr/extern.h
@ -1,4 +1,4 @@
-/*	$NetBSD: extern.h,v 1.6 2003/08/07 11:16:46 agc Exp $	*/
+/*	$NetBSD: extern.h,v 1.7 2011/09/06 18:33:46 joerg Exp $	*/

 /*-
 * Copyright (c) 1991, 1993
@ -45,4 +45,4 @@ typedef struct {
 #define	NCHARS	(UCHAR_MAX + 1)		/* Number of possible characters. */
 #define	OOBCH	(UCHAR_MAX + 1)		/* Out of band character value. */

-int	 next (STR *);
+int	 next(STR *);
--- a/commands/tr/str.c
+++ b/commands/tr/str.c
@ -1,4 +1,4 @@
-/*	$NetBSD: str.c,v 1.12 2009/04/13 23:50:49 lukem Exp $	*/
+/*	$NetBSD: str.c,v 1.19 2011/09/08 12:00:26 christos Exp $	*/

 /*-
 * Copyright (c) 1991, 1993
@ -29,6 +29,14 @@
 * SUCH DAMAGE.
 */

+#include <sys/cdefs.h>
+#ifndef lint
+#if 0
+static char sccsid[] = "@(#)str.c	8.2 (Berkeley) 4/28/95";
+#endif
+__RCSID("$NetBSD: str.c,v 1.19 2011/09/08 12:00:26 christos Exp $");
+#endif /* not lint */
+
 #include <sys/types.h>

 #include <err.h>
@ -39,38 +47,37 @@
 #include <string.h>
 #include <ctype.h>

-#include "tr.h"
+#include "extern.h"

-static int	backslash (STR *);
-static int	bracket (STR *);
-static int	c_class (const void *, const void *);
-static void	genclass (STR *);
-static void	genequiv (STR *);
-static int	genrange (STR *);
-static void	genseq (STR *);
+static int	backslash(STR *);
+static int	bracket(STR *);
+static int	c_class(const void *, const void *);
+static void	genclass(STR *);
+static void	genequiv(STR *);
+static int	genrange(STR *);
+static void	genseq(STR *);

 int
-next(s)
-	STR *s;
+next(STR *s)
 {
 	int ch;

 	switch (s->state) {
 	case EOS:
-		return (0);
+		return 0;
 	case INFINITE:
-		return (1);
+		return 1;
 	case NORMAL:
 		switch (ch = *s->str) {
 		case '\0':
 			s->state = EOS;
-			return (0);
+			return 0;
 		case '\\':
 			s->lastch = backslash(s);
 			break;
 		case '[':
 			if (bracket(s))
-				return (next(s));
+				return next(s);
 			/* FALLTHROUGH */
 		default:
 			++s->str;
@ -80,121 +87,114 @@ next(s)

 		/* We can start a range at any time. */
 		if (s->str[0] == '-' && genrange(s))
-			return (next(s));
-		return (1);
+			return next(s);
+		return 1;
 	case RANGE:
 		if (s->cnt-- == 0) {
 			s->state = NORMAL;
-			return (next(s));
+			return next(s);
 		}
 		++s->lastch;
-		return (1);
+		return 1;
 	case SEQUENCE:
 		if (s->cnt-- == 0) {
 			s->state = NORMAL;
-			return (next(s));
+			return next(s);
 		}
-		return (1);
+		return 1;
 	case SET:
 		if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
 			s->state = NORMAL;
-			return (next(s));
+			return next(s);
 		}
-		return (1);
+		return 1;
 	}
 	/* NOTREACHED */
-	return (0);
+	return 0;
 }

 static int
-bracket(s)
-	STR *s;
+bracket(STR *s)
 {
 	char *p;

 	switch (s->str[1]) {
 	case ':':				/* "[:class:]" */
-		if ((p = strstr((char *) s->str + 2, ":]")) == NULL)
-			return (0);
+		if ((p = strstr(s->str + 2, ":]")) == NULL)
+			return 0;
 		*p = '\0';
 		s->str += 2;
 		genclass(s);
-		s->str = (unsigned char *) p + 2;
-		return (1);
+		s->str = p + 2;
+		return 1;
 	case '=':				/* "[=equiv=]" */
-		if ((p = strstr((char *) s->str + 2, "=]")) == NULL)
-			return (0);
+		if ((p = strstr(s->str + 2, "=]")) == NULL)
+			return 0;
 		s->str += 2;
 		genequiv(s);
-		return (1);
+		return 1;
 	default:				/* "[\###*n]" or "[#*n]" */
-		if ((p = strpbrk((char *) s->str + 2, "*]")) == NULL)
-			return (0);
+		if ((p = strpbrk(s->str + 2, "*]")) == NULL)
+			return 0;
 		if (p[0] != '*' || strchr(p, ']') == NULL)
-			return (0);
+			return 0;
 		s->str += 1;
 		genseq(s);
-		return (1);
+		return 1;
 	}
 	/* NOTREACHED */
 }

 typedef struct {
 	const char *name;
-	int (*func) (int);
-	int *set;
+	int (*func)(int);
 } CLASS;

-static CLASS classes[] = {
-	{ "alnum",  isalnum,  NULL, },
-	{ "alpha",  isalpha,  NULL, },
-	{ "blank",  isblank,  NULL, },
-	{ "cntrl",  iscntrl,  NULL, },
-	{ "digit",  isdigit,  NULL, },
-	{ "graph",  isgraph,  NULL, },
-	{ "lower",  islower,  NULL, },
-	{ "print",  isprint,  NULL, },
-	{ "punct",  ispunct,  NULL, },
-	{ "space",  isspace,  NULL, },
-	{ "upper",  isupper,  NULL, },
-	{ "xdigit", isxdigit, NULL, },
+static const CLASS classes[] = {
+	{ "alnum",  isalnum  },
+	{ "alpha",  isalpha  },
+	{ "blank",  isblank  },
+	{ "cntrl",  iscntrl  },
+	{ "digit",  isdigit  },
+	{ "graph",  isgraph  },
+	{ "lower",  islower  },
+	{ "print",  isprint  },
+	{ "punct",  ispunct  },
+	{ "space",  isspace  },
+	{ "upper",  isupper  },
+	{ "xdigit", isxdigit },
 };

 static void
-genclass(s)
-	STR *s;
+genclass(STR *s)
 {
-	int cnt, (*func) (int);
-	CLASS *cp, tmp;
+	int cnt;
+	const CLASS *cp;
+	CLASS tmp;
 	int *p;

-	tmp.name = (char *) s->str;
-	if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
-	    sizeof(CLASS), sizeof(CLASS), c_class)) == NULL) {
-		fprintf(stderr, "tr: unknown class %s\n", s->str);
-		exit(1);
-	}
+	tmp.name = s->str;
+	if ((cp = bsearch(&tmp, classes, sizeof(classes) /
+	    sizeof(*cp), sizeof(*cp), c_class)) == NULL)
+		errx(1, "unknown class %s", s->str);

-	if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL) {
-		perror("malloc");
-		exit(1);
-	}
-	memset(p, 0, (NCHARS + 1) * sizeof(int));
-	for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
-		if ((func)(cnt))
+	if ((s->set = p = malloc((NCHARS + 1) * sizeof(*p))) == NULL)
+		err(1, "malloc");
+
+	for (cnt = 0; cnt < NCHARS; ++cnt)
+		if ((*cp->func)(cnt))
 			*p++ = cnt;
-	*p = OOBCH;
+	*p++ = OOBCH;
+	memset(p, 0, NCHARS + 1 - (p - s->set));

 	s->cnt = 0;
 	s->state = SET;
-	s->set = cp->set;
 }

 static int
-c_class(a, b)
-	const void *a, *b;
+c_class(const void *a, const void *b)
 {
-	return (strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name));
+	return strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name);
 }

 /*
@ -202,21 +202,16 @@ c_class(a, b)
 * we just syntax check and grab the character.
 */
 static void
-genequiv(s)
-	STR *s;
+genequiv(STR *s)
 {
 	if (*s->str == '\\') {
 		s->equiv[0] = backslash(s);
-		if (*s->str != '=') {
-			fprintf(stderr, "tr: misplaced equivalence equals sign\n");
-			exit(1);
-		}
+		if (*s->str != '=')
+			errx(1, "misplaced equivalence equals sign");
 	} else {
 		s->equiv[0] = s->str[0];
-		if (s->str[1] != '=') {
-			fprintf(stderr, "tr: misplaced equivalence equals sign\n");
-			exit(1);
-		}
+		if (s->str[1] != '=')
+			errx(1, "misplaced equivalence equals sign");
 	}
 	s->str += 2;
 	s->cnt = 0;
@ -225,43 +220,37 @@ genequiv(s)
 }

 static int
-genrange(s)
-	STR *s;
+genrange(STR *s)
 {
 	int stopval;
-	unsigned char *savestart;
+	char *savestart;

 	savestart = s->str;
 	stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
 	if (stopval < (u_char)s->lastch) {
 		s->str = savestart;
-		return (0);
+		return 0;
 	}
 	s->cnt = stopval - s->lastch + 1;
 	s->state = RANGE;
 	--s->lastch;
-	return (1);
+	return 1;
 }

 static void
-genseq(s)
-	STR *s;
+genseq(STR *s)
 {
 	char *ep;

-	if (s->which == STRING1) {
-		fprintf(stderr, "tr: sequences only valid in string2\n");
-		exit(1);
-	}
+	if (s->which == STRING1)
+		errx(1, "sequences only valid in string2");

 	if (*s->str == '\\')
 		s->lastch = backslash(s);
 	else
 		s->lastch = *s->str++;
-	if (*s->str != '*') {
-		fprintf(stderr, "tr: misplaced sequence asterisk\n");
-		exit(1);
-	}
+	if (*s->str != '*')
+		errx(1, "misplaced sequence asterisk");

 	switch (*++s->str) {
 	case '\\':
@ -273,14 +262,13 @@ genseq(s)
 		break;
 	default:
 		if (isdigit(*s->str)) {
-			s->cnt = strtol((char *) s->str, &ep, 0);
+			s->cnt = strtol(s->str, &ep, 0);
 			if (*ep == ']') {
-				s->str = (unsigned char *) ep + 1;
+				s->str = ep + 1;
 				break;
 			}
 		}
-		fprintf(stderr, "tr: illegal sequence count\n");
-		exit(1);
+		errx(1, "illegal sequence count");
 		/* NOTREACHED */
 	}

@ -292,8 +280,7 @@ genseq(s)
 * an escape code or a literal character.
 */
 static int
-backslash(s)
-	STR *s;
+backslash(STR *s)
 {
 	int ch, cnt, val;

@ -308,28 +295,30 @@ backslash(s)
 		}
 	}
 	if (cnt)
-		return (val);
+		return val;
 	if (ch != '\0')
 		++s->str;
 	switch (ch) {
-		case 'a':			/* escape characters */
-			return ('\7');
-		case 'b':
-			return ('\b');
-		case 'f':
-			return ('\f');
-		case 'n':
-			return ('\n');
-		case 'r':
-			return ('\r');
-		case 't':
-			return ('\t');
-		case 'v':
-			return ('\13');
-		case '\0':			/*  \" -> \ */
-			s->state = EOS;
-			return ('\\');
-		default:			/* \x" -> x */
-			return (ch);
+	case 'a':			/* escape characters */
+		return '\7';
+	case 'b':
+		return '\b';
+	case 'e':
+		return '\033';
+	case 'f':
+		return '\f';
+	case 'n':
+		return '\n';
+	case 'r':
+		return '\r';
+	case 't':
+		return '\t';
+	case 'v':
+		return '\13';
+	case '\0':			/*  \" -> \ */
+		s->state = EOS;
+		return '\\';
+	default:			/* \x" -> x */
+		return ch;
 	}
 }
--- a/usr.bin/tr/tr.1
+++ b/usr.bin/tr/tr.1
@ -0,0 +1,350 @@
+.\"	$NetBSD: tr.1,v 1.18 2009/11/12 00:43:53 joerg Exp $
+.\"
+.\" Copyright (c) 1991, 1993
+.\"	The Regents of the University of California.  All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" the Institute of Electrical and Electronics Engineers, Inc.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"     @(#)tr.1	8.1 (Berkeley) 6/6/93
+.\"
+.Dd March 23, 2004
+.Dt TR 1
+.Os
+.Sh NAME
+.Nm tr
+.Nd translate characters
+.Sh SYNOPSIS
+.Nm
+.Op Fl cs
+.Ar string1 string2
+.Nm
+.Op Fl c
+.Fl d
+.Ar string1
+.Nm
+.Op Fl c
+.Fl s
+.Ar string1
+.Nm
+.Op Fl c
+.Fl ds
+.Ar string1 string2
+.Sh DESCRIPTION
+The
+.Nm
+utility copies the standard input to the standard output with substitution
+or deletion of selected characters.
+.Pp
+The following options are available:
+.Bl -tag -width Ds
+.It Fl c
+Complements the set of characters in
+.Ar string1 ,
+that is
+.Fl c Ar \&ab
+includes every character except for
+.Sq a
+and
+.Sq b .
+.It Fl d
+The
+.Fl d
+option causes characters to be deleted from the input.
+.It Fl s
+The
+.Fl s
+option squeezes multiple occurrences of the characters listed in the last
+operand (either
+.Ar string1
+or
+.Ar string2 )
+in the input into a single instance of the character.
+This occurs after all deletion and translation is completed.
+.El
+.Pp
+In the first synopsis form, the characters in
+.Ar string1
+are translated into the characters in
+.Ar string2
+where the first character in
+.Ar string1
+is translated into the first character in
+.Ar string2
+and so on.
+If
+.Ar string1
+is longer than
+.Ar string2 ,
+the last character found in
+.Ar string2
+is duplicated until
+.Ar string1
+is exhausted.
+.Pp
+In the second synopsis form, the characters in
+.Ar string1
+are deleted from the input.
+.Pp
+In the third synopsis form, the characters in
+.Ar string1
+are compressed as described for the
+.Fl s
+option.
+.Pp
+In the fourth synopsis form, the characters in
+.Ar string1
+are deleted from the input, and the characters in
+.Ar string2
+are compressed as described for the
+.Fl s
+option.
+.Pp
+The following conventions can be used in
+.Ar string1
+and
+.Ar string2
+to specify sets of characters:
+.Bl -tag -width [:equiv:]
+.It character
+Any character not described by one of the following conventions
+represents itself.
+.It \eoctal
+A backslash followed by 1, 2 or 3 octal digits represents a character
+with that encoded value.
+To follow an octal sequence with a digit as a character, left zero-pad
+the octal sequence to the full 3 octal digits.
+.It \echaracter
+A backslash followed by certain special characters maps to special
+values.
+.sp
+.Bl -column cc
+.It \ea	\*[Lt]alert character\*[Gt]
+.It \eb	\*[Lt]backspace\*[Gt]
+.It \ef	\*[Lt]form-feed\*[Gt]
+.It \en	\*[Lt]newline\*[Gt]
+.It \er	\*[Lt]carriage return\*[Gt]
+.It \et	\*[Lt]tab\*[Gt]
+.It \ev	\*[Lt]vertical tab\*[Gt]
+.El
+.sp
+A backslash followed by any other character maps to that character.
+.It c-c
+Represents the range of characters between the range endpoints, inclusively.
+.It [:class:]
+Represents all characters belonging to the defined character class.
+Class names are:
+.sp
+.Bl -column xdigit
+.It alnum	\*[Lt]alphanumeric characters\*[Gt]
+.It alpha	\*[Lt]alphabetic characters\*[Gt]
+.It blank	\*[Lt]blank characters\*[Gt]
+.It cntrl	\*[Lt]control characters\*[Gt]
+.It digit	\*[Lt]numeric characters\*[Gt]
+.It graph	\*[Lt]graphic characters\*[Gt]
+.It lower	\*[Lt]lower-case alphabetic characters\*[Gt]
+.It print	\*[Lt]printable characters\*[Gt]
+.It punct	\*[Lt]punctuation characters\*[Gt]
+.It space	\*[Lt]space characters\*[Gt]
+.It upper	\*[Lt]upper-case characters\*[Gt]
+.It xdigit	\*[Lt]hexadecimal characters\*[Gt]
+.El
+.Pp
+.\" All classes may be used in
+.\" .Ar string1 ,
+.\" and in
+.\" .Ar string2
+.\" when both the
+.\" .Fl d
+.\" and
+.\" .Fl s
+.\" options are specified.
+.\" Otherwise, only the classes ``upper'' and ``lower'' may be used in
+.\" .Ar string2
+.\" and then only when the corresponding class (``upper'' for ``lower''
+.\" and vice-versa) is specified in the same relative position in
+.\" .Ar string1 .
+.\" .Pp
+With the exception of the
+.Dq upper
+and
+.Dq lower
+classes, characters in the classes are in unspecified order.
+In the
+.Dq upper
+and
+.Dq lower
+classes, characters are entered in ascending order.
+.Pp
+For specific information as to which ASCII characters are included
+in these classes, see
+.Xr ctype 3
+and related manual pages.
+.It [=equiv=]
+Represents all characters or collating (sorting) elements belonging to
+the same equivalence class as
+.Ar equiv .
+If there is a secondary ordering within the equivalence class, the
+characters are ordered in ascending sequence.
+Otherwise, they are ordered after their encoded values.
+An example of an equivalence class might be
+.Dq \&c
+and
+.Dq \&ch
+in Spanish;
+English has no equivalence classes.
+.It [#*n]
+Represents
+.Ar n
+repeated occurrences of the character represented by
+.Ar # .
+This
+expression is only valid when it occurs in
+.Ar string2 .
+If
+.Ar n
+is omitted or is zero, it is interpreted as large enough to extend
+.Ar string2
+sequence to the length of
+.Ar string1 .
+If
+.Ar n
+has a leading zero, it is interpreted as an octal value, otherwise,
+it's interpreted as a decimal value.
+.El
+.Sh EXIT STATUS
+.Nm
+exits 0 on success, and \*[Gt]0 if an error occurs.
+.Sh EXAMPLES
+The following examples are shown as given to the shell:
+.Pp
+Create a list of the words in
+.Ar file1 ,
+one per line, where a word is taken to be a maximal string of letters:
+.sp
+.D1 Li "tr -cs \*q[:alpha:]\*q \*q\en\*q \*[Lt] file1"
+.sp
+Translate the contents of
+.Ar file1
+to upper-case:
+.sp
+.D1 Li "tr \*q[:lower:]\*q \*q[:upper:]\*q \*[Lt] file1"
+.sp
+Strip out non-printable characters from
+.Ar file1 :
+.sp
+.D1 Li "tr -cd \*q[:print:]\*q \*[Lt] file1"
+.Sh COMPATIBILITY
+.At V
+has historically implemented character ranges using the syntax
+.Dq [c-c]
+instead of the
+.Dq c-c
+used by historic
+.Bx
+implementations and standardized by POSIX.
+.At V
+shell scripts should work under this implementation as long as
+the range is intended to map in another range, i.e. the command
+.Pp
+.Ic "tr [a-z] [A-Z]"
+.Pp
+will work as it will map the
+.Sq \&[
+character in
+.Ar string1
+to the
+.Sq \&[
+character in
+.Ar string2 .
+However, if the shell script is deleting or squeezing characters as in
+the command
+.Pp
+.Ic "tr -d [a-z]"
+.Pp
+the characters
+.Sq \&[
+and
+.Sq \&]
+will be included in the deletion or compression list which would
+not have happened under an historic
+.At V
+implementation.
+Additionally, any scripts that depended on the sequence
+.Dq a-z
+to represent the three characters
+.Sq \&a ,
+.Sq \&- ,
+and
+.Sq \&z
+will have to be rewritten as
+.Dq a\e-z .
+.Pp
+The
+.Nm
+utility has historically not permitted the manipulation of NUL bytes in
+its input and, additionally, stripped NUL's from its input stream.
+This implementation has removed this behavior as a bug.
+.Pp
+The
+.Nm
+utility has historically been extremely forgiving of syntax errors,
+for example, the
+.Fl c
+and
+.Fl s
+options were ignored unless two strings were specified.
+This implementation will not permit illegal syntax.
+.Sh STANDARDS
+The
+.Nm
+utility is expected to be
+.St -p1003.2
+compatible.
+It should be noted that the feature wherein the last character of
+.Ar string2
+is duplicated if
+.Ar string2
+has less characters than
+.Ar string1
+is permitted by POSIX but is not required.
+Shell scripts attempting to be portable to other POSIX systems should use
+the
+.Dq [#*]
+convention instead of relying on this behavior.
+.Sh BUGS
+.Nm
+was originally designed to work with
+.Tn US-ASCII .
+Its use with character sets that do not share all the properties of
+.Tn US-ASCII ,
+e.g., a symmetric set of upper and lower case characters
+that can be algorithmically converted one to the other,
+may yield unpredictable results.
+.Pp
+.Nm
+should be internationalized.
--- a/commands/tr/tr.c
+++ b/commands/tr/tr.c
@ -1,4 +1,4 @@
-/*	$NetBSD: tr.c,v 1.8 2008/07/21 14:19:27 lukem Exp $	*/
+/*	$NetBSD: tr.c,v 1.9 2011/09/06 18:33:46 joerg Exp $	*/

 /*
 * Copyright (c) 1988, 1993
@ -30,21 +30,17 @@
 */

 #include <sys/cdefs.h>
-#if 0
 #ifndef lint
 __COPYRIGHT("@(#) Copyright (c) 1988, 1993\
 The Regents of the University of California.  All rights reserved.");
 #endif /* not lint */
-#endif

-#if 0
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)tr.c	8.2 (Berkeley) 5/4/95";
 #endif
-__RCSID("$NetBSD: tr.c,v 1.8 2008/07/21 14:19:27 lukem Exp $");
+__RCSID("$NetBSD: tr.c,v 1.9 2011/09/06 18:33:46 joerg Exp $");
 #endif /* not lint */
-#endif

 #include <sys/types.h>

@ -54,7 +50,7 @@ __RCSID("$NetBSD: tr.c,v 1.8 2008/07/21 14:19:27 lukem Exp $");
 #include <string.h>
 #include <unistd.h>

-#include "tr.h"
+#include "extern.h"

 static int string1[NCHARS] = {
 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,		/* ASCII */
@ -94,14 +90,11 @@ static int string1[NCHARS] = {
 STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
 STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };

-int	main (int, char **);
-static void setup (int *, char *, STR *, int);
-static void usage (void);
+static void setup(int *, char *, STR *, int);
+__dead static void usage(void);

 int
-main(argc, argv)
-	int argc;
-	char **argv;
+main(int argc, char **argv)
 {
 	int ch, cnt, lastch, *p;
 	int cflag, dflag, sflag, isstring2;
@ -198,17 +191,15 @@ main(argc, argv)
 	if (!isstring2)
 		usage();

-	s1.str = (unsigned char *) argv[0];
-	s2.str = (unsigned char *) argv[1];
+	s1.str = argv[0];
+	s2.str = argv[1];

 	if (cflag)
 		for (cnt = NCHARS, p = string1; cnt--;)
 			*p++ = OOBCH;

-	if (!next(&s2)) {
-		fprintf(stderr, "empty string2\n");
-		exit(1);
-	}
+	if (!next(&s2))
+		errx(1, "empty string2");

 	/* If string2 runs out of characters, use the last one specified. */
 	if (sflag)
@ -242,15 +233,11 @@ main(argc, argv)
 }

 static void
-setup(string, arg, str, cflag)
-	int *string;
-	char *arg;
-	STR *str;
-	int cflag;
+setup(int *string, char *arg, STR *str, int cflag)
 {
 	int cnt, *p;

-	str->str = (unsigned char *) arg;
+	str->str = arg;
 	memset(string, 0, NCHARS * sizeof(int));
 	while (next(str))
 		string[str->lastch] = 1;
@ -260,7 +247,7 @@ setup(string, arg, str, cflag)
 }

 static void
-usage()
+usage(void)
 {
 	(void)fprintf(stderr, "usage: tr [-cs] string1 string2\n");
 	(void)fprintf(stderr, "       tr [-c] -d string1\n");