original netbsd printf, cut

This commit is contained in:
Ben Gras 2010-06-22 21:20:54 +00:00
parent 2ac57865cb
commit ff26d9a4ff
7 changed files with 1649 additions and 0 deletions

10
commands/cut/Makefile Normal file
View file

@ -0,0 +1,10 @@
# $NetBSD: Makefile,v 1.5 2007/07/02 18:41:03 christos Exp $
# @(#)Makefile 8.1 (Berkeley) 6/6/93
WARNS?= 4
PROG= cut
LDADD+= -lutil
DPADD+= ${LIBUTIL}
.include <bsd.prog.mk>

130
commands/cut/cut.1 Normal file
View file

@ -0,0 +1,130 @@
.\" $NetBSD: cut.1,v 1.14.26.1 2008/12/23 03:47:20 snj Exp $
.\"
.\" Copyright (c) 1989, 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" the Institute of Electrical and Electronics Engineers, Inc.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)cut.1 8.1 (Berkeley) 6/6/93
.\"
.Dd December 21, 2008
.Dt CUT 1
.Os
.Sh NAME
.Nm cut
.Nd select portions of each line of a file
.Sh SYNOPSIS
.Nm
.Fl b Ar list
.Op Fl n
.Op Ar
.Nm
.Fl c Ar list
.Op Ar
.Nm
.Fl f Ar list
.Op Fl d Ar delim
.Op Fl s
.Op Ar
.Sh DESCRIPTION
The
.Nm
utility selects portions of each line (as specified by
.Ar list )
from each
.Ar file
and writes them to the
standard output.
If the
.Ar file
argument is a single dash
.Pq Sq -
or no
.Ar file
arguments were specified, lines are read from the standard input.
The items specified by
.Ar list
can be in terms of column position or in terms of fields delimited
by a special character.
Column numbering starts from 1.
.Pp
.Ar List
is a comma or whitespace separated set of increasing numbers and/or
number ranges.
Number ranges consist of a number, a dash
.Pq Li \- ,
and a second number
and select the fields or columns from the first number to the second,
inclusive.
Numbers or number ranges may be preceded by a dash, which selects all
fields or columns from 1 to the first number.
Numbers or number ranges may be followed by a dash, which selects all
fields or columns from the last number to the end of the line.
Numbers and number ranges may be repeated, overlapping, and in any order.
It is not an error to select fields or columns not present in the
input line.
.Pp
The options are as follows:
.Bl -tag -width Fl
.It Fl b Ar list
The
.Ar list
specifies byte positions.
.It Fl c Ar list
The
.Ar list
specifies character positions.
.It Fl d Ar string
Use the first character of
.Ar string
as the field delimiter character.
The default is the
.Aq TAB
character.
.It Fl f Ar list
The
.Ar list
specifies fields, separated by the field delimiter character.
The selected fields are output,
separated by the field delimiter character.
.It Fl n
Do not split multi-byte characters.
.It Fl s
Suppresses lines with no field delimiter characters.
Unless specified, lines with no delimiters are passed through unmodified.
.El
.Sh EXIT STATUS
.Nm
exits 0 on success, 1 if an error occurred.
.Sh SEE ALSO
.Xr paste 1
.Sh STANDARDS
The
.Nm
utility conforms to
.St -p1003.2-92 .

302
commands/cut/cut.c Normal file
View file

@ -0,0 +1,302 @@
/* $NetBSD: cut.c,v 1.25 2008/07/21 14:19:22 lukem Exp $ */
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1989, 1993\
The Regents of the University of California. All rights reserved.");
#endif /* not lint */
#ifndef lint
#if 0
static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95";
#endif
__RCSID("$NetBSD: cut.c,v 1.25 2008/07/21 14:19:22 lukem Exp $");
#endif /* not lint */
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <util.h>
#include <wchar.h>
#include <sys/param.h>
static int bflag;
static int cflag;
static char dchar;
static int dflag;
static int fflag;
static int sflag;
static void b_cut(FILE *, const char *);
static void c_cut(FILE *, const char *);
static void f_cut(FILE *, const char *);
static void get_list(char *);
static void usage(void) __dead;
int
main(int argc, char *argv[])
{
FILE *fp;
void (*fcn)(FILE *, const char *);
int ch;
fcn = NULL;
(void)setlocale(LC_ALL, "");
dchar = '\t'; /* default delimiter is \t */
/* Since we don't support multi-byte characters, the -c and -b
options are equivalent, and the -n option is meaningless. */
while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
switch(ch) {
case 'b':
fcn = b_cut;
get_list(optarg);
bflag = 1;
break;
case 'c':
fcn = c_cut;
get_list(optarg);
cflag = 1;
break;
case 'd':
dchar = *optarg;
dflag = 1;
break;
case 'f':
get_list(optarg);
fcn = f_cut;
fflag = 1;
break;
case 's':
sflag = 1;
break;
case 'n':
break;
case '?':
default:
usage();
}
argc -= optind;
argv += optind;
if (fflag) {
if (cflag || bflag)
usage();
} else if ((!cflag && !bflag) || dflag || sflag)
usage();
else if (bflag && cflag)
usage();
if (*argv)
for (; *argv; ++argv) {
if (strcmp(*argv, "-") == 0)
fcn(stdin, "stdin");
else {
if ((fp = fopen(*argv, "r")) == NULL)
err(1, "%s", *argv);
fcn(fp, *argv);
(void)fclose(fp);
}
}
else
fcn(stdin, "stdin");
return 0;
}
static size_t autostart, autostop, maxval;
static char *positions = NULL;
static size_t numpositions = 0;
#define ALLOC_CHUNK _POSIX2_LINE_MAX /* malloc granularity */
static void
get_list(char *list)
{
size_t setautostart, start, stop;
char *pos;
char *p;
if (positions == NULL) {
numpositions = ALLOC_CHUNK;
positions = ecalloc(numpositions, sizeof(*positions));
}
/*
* set a byte in the positions array to indicate if a field or
* column is to be selected; use +1, it's 1-based, not 0-based.
* This parser is less restrictive than the Draft 9 POSIX spec.
* POSIX doesn't allow lists that aren't in increasing order or
* overlapping lists. We also handle "-3-5" although there's no
* real reason too.
*/
for (; (p = strtok(list, ", \t")) != NULL; list = NULL) {
setautostart = start = stop = 0;
if (*p == '-') {
++p;
setautostart = 1;
}
if (isdigit((unsigned char)*p)) {
start = stop = strtol(p, &p, 10);
if (setautostart && start > autostart)
autostart = start;
}
if (*p == '-') {
if (isdigit((unsigned char)p[1]))
stop = strtol(p + 1, &p, 10);
if (*p == '-') {
++p;
if (!autostop || autostop > stop)
autostop = stop;
}
}
if (*p)
errx(1, "[-cf] list: illegal list value");
if (!stop || !start)
errx(1, "[-cf] list: values may not include zero");
if (stop + 1 > numpositions) {
size_t newsize;
newsize = roundup(stop + 1, ALLOC_CHUNK);
positions = erealloc(positions, newsize);
(void)memset(positions + numpositions, 0,
newsize - numpositions);
numpositions = newsize;
}
if (maxval < stop)
maxval = stop;
for (pos = positions + start; start++ <= stop; pos++)
*pos = 1;
}
/* overlapping ranges */
if (autostop && maxval > autostop)
maxval = autostop;
/* set autostart */
if (autostart)
(void)memset(positions + 1, '1', autostart);
}
static void
/*ARGSUSED*/
f_cut(FILE *fp, const char *fname __unused)
{
int ch, field, isdelim;
char *pos, *p, sep;
int output;
size_t len;
char *lbuf, *tbuf;
for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len)) != NULL;) {
output = 0;
if (lbuf[len - 1] != '\n') {
/* no newline at the end of the last line so add one */
if ((tbuf = (char *)malloc(len + 1)) == NULL)
err(1, NULL);
(void)memcpy(tbuf, lbuf, len);
tbuf[len++] = '\n';
lbuf = tbuf;
}
for (isdelim = 0, p = lbuf;; ++p) {
ch = *p;
/* this should work if newline is delimiter */
if (ch == sep)
isdelim = 1;
if (ch == '\n') {
if (!isdelim && !sflag)
(void)fwrite(lbuf, len, 1, stdout);
break;
}
}
if (!isdelim)
continue;
pos = positions + 1;
for (field = maxval, p = lbuf; field; --field, ++pos) {
if (*pos) {
if (output++)
(void)putchar(sep);
while ((ch = *p++) != '\n' && ch != sep)
(void)putchar(ch);
} else {
while ((ch = *p++) != '\n' && ch != sep)
continue;
}
if (ch == '\n')
break;
}
if (ch != '\n') {
if (autostop) {
if (output)
(void)putchar(sep);
for (; (ch = *p) != '\n'; ++p)
(void)putchar(ch);
} else
for (; (ch = *p) != '\n'; ++p);
}
(void)putchar('\n');
if (tbuf) {
free(tbuf);
tbuf = NULL;
}
}
if (tbuf)
free(tbuf);
}
static void
usage(void)
{
(void)fprintf(stderr, "Usage:\tcut -b list [-n] [file ...]\n"
"\tcut -c list [file1 ...]\n"
"\tcut -f list [-d delim] [-s] [file ...]\n");
exit(1);
}
/* make b_put(): */
#define CUT_BYTE 1
#include "x_cut.c"
#undef CUT_BYTE
/* make c_put(): */
#define CUT_BYTE 0
#include "x_cut.c"
#undef CUT_BYTE

95
commands/cut/x_cut.c Normal file
View file

@ -0,0 +1,95 @@
/* $NetBSD: x_cut.c,v 1.2 2007/07/02 18:41:04 christos Exp $ */
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* This file is #include'd twice from cut.c, to generate both
* single- and multibyte versions of the same code.
*
* In cut.c #define:
* CUT_BYTE=0 to define b_cut (singlebyte), and
* CUT_BYTE=1 to define c_cut (multibyte).
*
*/
#if (CUT_BYTE == 1)
# define CUT_FN b_cut
# define CUT_CH_T int
# define CUT_GETC getc
# define CUT_EOF EOF
# define CUT_PUTCHAR putchar
#else
# define CUT_FN c_cut
# define CUT_CH_T wint_t
# define CUT_GETC getwc
# define CUT_EOF WEOF
# define CUT_PUTCHAR putwchar
#endif
/* ARGSUSED */
void
CUT_FN(FILE *fp, const char *fname __unused)
{
CUT_CH_T ch;
int col;
char *pos;
ch = 0;
for (;;) {
pos = positions + 1;
for (col = maxval; col; --col) {
if ((ch = CUT_GETC(fp)) == EOF)
return;
if (ch == '\n')
break;
if (*pos++)
(void)CUT_PUTCHAR(ch);
}
if (ch != '\n') {
if (autostop)
while ((ch = CUT_GETC(fp)) != CUT_EOF && ch != '\n')
(void)CUT_PUTCHAR(ch);
else
while ((ch = CUT_GETC(fp)) != CUT_EOF && ch != '\n');
}
(void)CUT_PUTCHAR('\n');
}
}
#undef CUT_FN
#undef CUT_CH_T
#undef CUT_GETC
#undef CUT_EOF
#undef CUT_PUTCHAR

7
commands/printf/Makefile Normal file
View file

@ -0,0 +1,7 @@
# $NetBSD: Makefile,v 1.9 2004/10/30 19:28:35 christos Exp $
# from: @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= printf
WARNS= 3
.include <bsd.prog.mk>

412
commands/printf/printf.1 Normal file
View file

@ -0,0 +1,412 @@
.\" $NetBSD: printf.1,v 1.22 2008/09/01 09:20:41 dholland Exp $
.\"
.\" Copyright (c) 1989, 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" the Institute of Electrical and Electronics Engineers, Inc.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" from: @(#)printf.1 8.1 (Berkeley) 6/6/93
.\"
.Dd May 6, 2008
.Dt PRINTF 1
.Os
.Sh NAME
.Nm printf
.Nd formatted output
.Sh SYNOPSIS
.Nm
.Ar format
.Op Ar arguments ...
.Sh DESCRIPTION
.Nm
formats and prints its arguments, after the first, under control
of the
.Ar format .
The
.Ar format
is a character string which contains three types of objects: plain characters,
which are simply copied to standard output, character escape sequences which
are converted and copied to the standard output, and format specifications,
each of which causes printing of the next successive
.Ar argument .
.Pp
The
.Ar arguments
after the first are treated as strings if the corresponding format is
either
.Cm b ,
.Cm B ,
.Cm c ,
or
.Cm s ;
otherwise it is evaluated as a C constant, with the following extensions:
.Pp
.Bl -bullet -offset indent -compact
.It
A leading plus or minus sign is allowed.
.It
If the leading character is a single or double quote, the value is the
.Tn ASCII
code of the next character.
.El
.Pp
The format string is reused as often as necessary to satisfy the
.Ar arguments .
Any extra format specifications are evaluated with zero or the null
string.
.Pp
Character escape sequences are in backslash notation as defined in
.St -ansiC .
The characters and their meanings are as follows:
.Bl -tag -width Ds -offset indent
.It Cm \ee
Write an
.Aq escape
character.
.It Cm \ea
Write a
.Aq bell
character.
.It Cm \eb
Write a
.Aq backspace
character.
.It Cm \ef
Write a
.Aq form-feed
character.
.It Cm \en
Write a
.Aq new-line
character.
.It Cm \er
Write a
.Aq carriage return
character.
.It Cm \et
Write a
.Aq tab
character.
.It Cm \ev
Write a
.Aq vertical tab
character.
.It Cm \e\'
Write a
.Aq single quote
character.
.It Cm \e"
Write a
.Aq double quote
character.
.It Cm \e\e
Write a backslash character.
.It Cm \e Ns Ar num
Write an 8\-bit character whose
.Tn ASCII
value is the 1\-, 2\-, or 3\-digit octal number
.Ar num .
.It Cm \ex Ns Ar xx
Write an 8\-bit character whose
.Tn ASCII
value is the 1\- or 2\-digit hexadecimal number
.Ar xx .
.El
.Pp
Each format specification is introduced by the percent character
.Pq Dq \&% .
The remainder of the format specification includes,
in the following order:
.Bl -tag -width Ds
.It Zero or more of the following flags :
.Bl -tag -width Ds
.It Cm #
A
.Sq #
character specifying that the value should be printed in an
.Dq alternative form .
For
.Cm b ,
.Cm c ,
.Cm d ,
and
.Cm s
formats, this option has no effect.
For the
.Cm o
format the precision of the number is increased to force the first
character of the output string to a zero.
For the
.Cm x
.Pq Cm X
format, a non-zero result has the string
.Li 0x
.Pq Li 0X
prepended to it.
For
.Cm e ,
.Cm E ,
.Cm f ,
.Cm g ,
and
.Cm G
formats, the result will always contain a decimal point, even if no
digits follow the point (normally, a decimal point only appears in the
results of those formats if a digit follows the decimal point).
For
.Cm g
and
.Cm G
formats, trailing zeros are not removed from the result as they
would otherwise be.
.\" I turned this off - decided it isn't a valid use of '#'
.\" For the
.\" .Cm B
.\" format, backslash-escape sequences are expanded first;
.It Cm \&\-
A minus sign
.Sq \-
which specifies
.Em left adjustment
of the output in the indicated field;
.It Cm \&+
A
.Sq \&+
character specifying that there should always be
a sign placed before the number when using signed formats.
.It Sq \&\ \&
A space specifying that a blank should be left before a positive number
for a signed format.
A
.Sq \&+
overrides a space if both are used;
.It Cm \&0
A zero `0' character indicating that zero-padding should be used
rather than blank-padding.
A
.Sq \-
overrides a
.Sq \&0
if both are used;
.El
.It Field Width :
An optional digit string specifying a
.Em field width ;
if the output string has fewer characters than the field width it will
be blank-padded on the left (or right, if the left-adjustment indicator
has been given) to make up the field width (note that a leading zero
is a flag, but an embedded zero is part of a field width);
.It Precision :
An optional period,
.Sq Cm \&. ,
followed by an optional digit string giving a
.Em precision
which specifies the number of digits to appear after the decimal point,
for
.Cm e
and
.Cm f
formats, or the maximum number of characters to be printed
from a string
.Sm off
.Pf ( Cm b ,
.Sm on
.Cm B ,
and
.Cm s
formats); if the digit string is missing, the precision is treated
as zero;
.It Format :
A character which indicates the type of format to use (one of
.Cm diouxXfwEgGbBcs ) .
.El
.Pp
A field width or precision may be
.Sq Cm \&*
instead of a digit string.
In this case an
.Ar argument
supplies the field width or precision.
.Pp
The format characters and their meanings are:
.Bl -tag -width Fl
.It Cm diouXx
The
.Ar argument
is printed as a signed decimal (d or i), unsigned octal, unsigned decimal,
or unsigned hexadecimal (X or x), respectively.
.It Cm f
The
.Ar argument
is printed in the style
.Sm off
.Pf [\-]ddd Cm \&. No ddd
.Sm on
where the number of d's
after the decimal point is equal to the precision specification for
the argument.
If the precision is missing, 6 digits are given; if the precision
is explicitly 0, no digits and no decimal point are printed.
.It Cm eE
The
.Ar argument
is printed in the style
.Sm off
.Pf [\-]d Cm \&. No ddd Cm e No \\*(Pmdd
.Sm on
where there
is one digit before the decimal point and the number after is equal to
the precision specification for the argument; when the precision is
missing, 6 digits are produced.
An upper-case E is used for an
.Sq E
format.
.It Cm gG
The
.Ar argument
is printed in style
.Cm f
or in style
.Cm e
.Pq Cm E
whichever gives full precision in minimum space.
.It Cm b
Characters from the string
.Ar argument
are printed with backslash-escape sequences expanded.
.br
The following additional backslash-escape sequences are supported:
.Bl -tag -width Ds
.It Cm \ec
Causes
.Nm
to ignore any remaining characters in the string operand containing it,
any remaining string operands, and any additional characters in
the format operand.
.It Cm \e0 Ns Ar num
Write an 8\-bit character whose
.Tn ASCII
value is the 1\-, 2\-, or 3\-digit
octal number
.Ar num .
.It Cm \e^ Ns Ar c
Write the control character
.Ar c .
Generates characters `\e000' through `\e037`, and `\e177' (from `\e^?').
.It Cm \eM\- Ns Ar c
Write the character
.Ar c
with the 8th bit set.
Generates characters `\e241' through `\e376`.
.It Cm \eM^ Ns Ar c
Write the control character
.Ar c
with the 8th bit set.
Generates characters `\e200' through `\e237`, and `\e377' (from `\eM^?').
.El
.It Cm B
Characters from the string
.Ar argument
are printed with unprintable characters backslash-escaped using the
.Sm off
.Pf ` Cm \e Ar c No ',
.Pf ` Cm \e^ Ar c No ',
.Pf ` Cm \eM\- Ar c No '
or
.Pf ` Cm \eM^ Ar c No ',
.Sm on
formats described above.
.It Cm c
The first character of
.Ar argument
is printed.
.It Cm s
Characters from the string
.Ar argument
are printed until the end is reached or until the number of characters
indicated by the precision specification is reached; if the
precision is omitted, all characters in the string are printed.
.It Cm \&%
Print a `%'; no argument is used.
.El
.Pp
In no case does a non-existent or small field width cause truncation of
a field; padding takes place only if the specified field width exceeds
the actual width.
.Sh EXIT STATUS
.Nm
exits 0 on success, 1 on failure.
.Sh SEE ALSO
.Xr echo 1 ,
.Xr printf 3 ,
.Xr vis 3 ,
.Xr printf 9
.Sh STANDARDS
The
.Nm
utility conforms to
.St -p1003.1-2001 .
.Pp
Support for the floating point formats and `*' as a field width and precision
are optional in POSIX.
.Pp
The behaviour of the %B format and the \e', \e", \exxx, \ee and
\e[M][\-|^]c escape sequences are undefined in POSIX.
.Sh BUGS
Since the floating point numbers are translated from
.Tn ASCII
to floating-point and
then back again, floating-point precision may be lost.
.Pp
Hexadecimal character constants are restricted to, and should be specified
as, two character constants.
This is contrary to the ISO C standard but
does guarantee detection of the end of the constant.
.Sh NOTES
All formats which treat the
.Ar argument
as a number first convert the
.Ar argument
from its external representation as a character string
to an internal numeric representation, and then apply the
format to the internal numeric representation, producing
another external character string representation.
One might expect the
.Cm \&%c
format to do likewise, but in fact it does not.
.Pp
To convert a string representation of a decimal, octal, or hexadecimal
number into the corresponding character, two nested
.Nm
invocations may be used, in which the inner invocation
converts the input to an octal string, and the outer
invocation uses the octal string as part of a format.
For example, the following command outputs the character whose code
is 0x0A, which is a newline in ASCII:
.Pp
.Dl printf \&"$(printf \&"\e\e%o" \&"0x0A")"

693
commands/printf/printf.c Normal file
View file

@ -0,0 +1,693 @@
/* $NetBSD: printf.c,v 1.33.8.1 2009/10/14 18:37:30 sborrill Exp $ */
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
#if !defined(BUILTIN) && !defined(SHELL)
__COPYRIGHT("@(#) Copyright (c) 1989, 1993\
The Regents of the University of California. All rights reserved.");
#endif
#endif
#ifndef lint
#if 0
static char sccsid[] = "@(#)printf.c 8.2 (Berkeley) 3/22/95";
#else
__RCSID("$NetBSD: printf.c,v 1.33.8.1 2009/10/14 18:37:30 sborrill Exp $");
#endif
#endif /* not lint */
#include <sys/types.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <inttypes.h>
#include <limits.h>
#include <locale.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#ifdef __GNUC__
#define ESCAPE '\e'
#else
#define ESCAPE 033
#endif
static void conv_escape_str(char *, void (*)(int));
static char *conv_escape(char *, char *);
static char *conv_expand(const char *);
static int getchr(void);
static double getdouble(void);
static int getwidth(void);
static intmax_t getintmax(void);
static uintmax_t getuintmax(void);
static char *getstr(void);
static char *mklong(const char *, int);
static void check_conversion(const char *, const char *);
static void usage(void);
static void b_count(int);
static void b_output(int);
static size_t b_length;
static char *b_fmt;
static int rval;
static char **gargv;
#ifdef BUILTIN /* csh builtin */
#define main progprintf
#endif
#ifdef SHELL /* sh (aka ash) builtin */
#define main printfcmd
#include "../../bin/sh/bltin/bltin.h"
#endif /* SHELL */
#define PF(f, func) { \
if (fieldwidth != -1) { \
if (precision != -1) \
error = printf(f, fieldwidth, precision, func); \
else \
error = printf(f, fieldwidth, func); \
} else if (precision != -1) \
error = printf(f, precision, func); \
else \
error = printf(f, func); \
}
#define APF(cpp, f, func) { \
if (fieldwidth != -1) { \
if (precision != -1) \
error = asprintf(cpp, f, fieldwidth, precision, func); \
else \
error = asprintf(cpp, f, fieldwidth, func); \
} else if (precision != -1) \
error = asprintf(cpp, f, precision, func); \
else \
error = asprintf(cpp, f, func); \
}
#ifdef main
int main(int, char *[]);
#endif
int main(int argc, char *argv[])
{
char *fmt, *start;
int fieldwidth, precision;
char nextch;
char *format;
int ch;
int error;
#if !defined(SHELL) && !defined(BUILTIN)
(void)setlocale (LC_ALL, "");
#endif
while ((ch = getopt(argc, argv, "")) != -1) {
switch (ch) {
case '?':
default:
usage();
return 1;
}
}
argc -= optind;
argv += optind;
if (argc < 1) {
usage();
return 1;
}
format = *argv;
gargv = ++argv;
#define SKIP1 "#-+ 0"
#define SKIP2 "0123456789"
do {
/*
* Basic algorithm is to scan the format string for conversion
* specifications -- once one is found, find out if the field
* width or precision is a '*'; if it is, gather up value.
* Note, format strings are reused as necessary to use up the
* provided arguments, arguments of zero/null string are
* provided to use up the format string.
*/
/* find next format specification */
for (fmt = format; (ch = *fmt++) != '\0';) {
if (ch == '\\') {
char c_ch;
fmt = conv_escape(fmt, &c_ch);
putchar(c_ch);
continue;
}
if (ch != '%' || (*fmt == '%' && ++fmt)) {
(void)putchar(ch);
continue;
}
/* Ok - we've found a format specification,
Save its address for a later printf(). */
start = fmt - 1;
/* skip to field width */
fmt += strspn(fmt, SKIP1);
if (*fmt == '*') {
fmt++;
fieldwidth = getwidth();
} else
fieldwidth = -1;
/* skip to possible '.', get following precision */
fmt += strspn(fmt, SKIP2);
if (*fmt == '.') {
fmt++;
if (*fmt == '*') {
fmt++;
precision = getwidth();
} else
precision = -1;
} else
precision = -1;
fmt += strspn(fmt, SKIP2);
ch = *fmt;
if (!ch) {
warnx("missing format character");
return (1);
}
/* null terminate format string to we can use it
as an argument to printf. */
nextch = fmt[1];
fmt[1] = 0;
switch (ch) {
case 'B': {
const char *p = conv_expand(getstr());
if (p == NULL)
goto out;
*fmt = 's';
PF(start, p);
if (error < 0)
goto out;
break;
}
case 'b': {
/* There has to be a better way to do this,
* but the string we generate might have
* embedded nulls. */
static char *a, *t;
char *cp = getstr();
/* Free on entry in case shell longjumped out */
if (a != NULL)
free(a);
a = NULL;
if (t != NULL)
free(t);
t = NULL;
/* Count number of bytes we want to output */
b_length = 0;
conv_escape_str(cp, b_count);
t = malloc(b_length + 1);
if (t == NULL)
goto out;
(void)memset(t, 'x', b_length);
t[b_length] = 0;
/* Get printf to calculate the lengths */
*fmt = 's';
APF(&a, start, t);
if (error == -1)
goto out;
b_fmt = a;
/* Output leading spaces and data bytes */
conv_escape_str(cp, b_output);
/* Add any trailing spaces */
printf("%s", b_fmt);
break;
}
case 'c': {
char p = getchr();
PF(start, p);
if (error < 0)
goto out;
break;
}
case 's': {
char *p = getstr();
PF(start, p);
if (error < 0)
goto out;
break;
}
case 'd':
case 'i': {
intmax_t p = getintmax();
char *f = mklong(start, ch);
PF(f, p);
if (error < 0)
goto out;
break;
}
case 'o':
case 'u':
case 'x':
case 'X': {
uintmax_t p = getuintmax();
char *f = mklong(start, ch);
PF(f, p);
if (error < 0)
goto out;
break;
}
case 'e':
case 'E':
case 'f':
case 'g':
case 'G': {
double p = getdouble();
PF(start, p);
if (error < 0)
goto out;
break;
}
default:
warnx("%s: invalid directive", start);
return 1;
}
*fmt++ = ch;
*fmt = nextch;
/* escape if a \c was encountered */
if (rval & 0x100)
return rval & ~0x100;
}
} while (gargv != argv && *gargv);
return rval & ~0x100;
out:
warn("print failed");
return 1;
}
/* helper functions for conv_escape_str */
static void
/*ARGSUSED*/
b_count(int ch)
{
b_length++;
}
/* Output one converted character for every 'x' in the 'format' */
static void
b_output(int ch)
{
for (;;) {
switch (*b_fmt++) {
case 0:
b_fmt--;
return;
case ' ':
putchar(' ');
break;
default:
putchar(ch);
return;
}
}
}
/*
* Print SysV echo(1) style escape string
* Halts processing string if a \c escape is encountered.
*/
static void
conv_escape_str(char *str, void (*do_putchar)(int))
{
int value;
int ch;
char c;
while ((ch = *str++) != '\0') {
if (ch != '\\') {
do_putchar(ch);
continue;
}
ch = *str++;
if (ch == 'c') {
/* \c as in SYSV echo - abort all processing.... */
rval |= 0x100;
break;
}
/*
* %b string octal constants are not like those in C.
* They start with a \0, and are followed by 0, 1, 2,
* or 3 octal digits.
*/
if (ch == '0') {
int octnum = 0, i;
for (i = 0; i < 3; i++) {
if (!isdigit((unsigned char)*str) || *str > '7')
break;
octnum = (octnum << 3) | (*str++ - '0');
}
do_putchar(octnum);
continue;
}
/* \[M][^|-]C as defined by vis(3) */
if (ch == 'M' && *str == '-') {
do_putchar(0200 | str[1]);
str += 2;
continue;
}
if (ch == 'M' && *str == '^') {
str++;
value = 0200;
ch = '^';
} else
value = 0;
if (ch == '^') {
ch = *str++;
if (ch == '?')
value |= 0177;
else
value |= ch & 037;
do_putchar(value);
continue;
}
/* Finally test for sequences valid in the format string */
str = conv_escape(str - 1, &c);
do_putchar(c);
}
}
/*
* Print "standard" escape characters
*/
static char *
conv_escape(char *str, char *conv_ch)
{
int value;
int ch;
char num_buf[4], *num_end;
ch = *str++;
switch (ch) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
num_buf[0] = ch;
ch = str[0];
num_buf[1] = ch;
num_buf[2] = ch ? str[1] : 0;
num_buf[3] = 0;
value = strtoul(num_buf, &num_end, 8);
str += num_end - (num_buf + 1);
break;
case 'x':
/* Hexadecimal character constants are not required to be
supported (by SuS v1) because there is no consistent
way to detect the end of the constant.
Supporting 2 byte constants is a compromise. */
ch = str[0];
num_buf[0] = ch;
num_buf[1] = ch ? str[1] : 0;
num_buf[2] = 0;
value = strtoul(num_buf, &num_end, 16);
str += num_end - num_buf;
break;
case '\\': value = '\\'; break; /* backslash */
case '\'': value = '\''; break; /* single quote */
case '"': value = '"'; break; /* double quote */
case 'a': value = '\a'; break; /* alert */
case 'b': value = '\b'; break; /* backspace */
case 'e': value = ESCAPE; break; /* escape */
case 'f': value = '\f'; break; /* form-feed */
case 'n': value = '\n'; break; /* newline */
case 'r': value = '\r'; break; /* carriage-return */
case 't': value = '\t'; break; /* tab */
case 'v': value = '\v'; break; /* vertical-tab */
default:
warnx("unknown escape sequence `\\%c'", ch);
rval = 1;
value = ch;
break;
}
*conv_ch = value;
return str;
}
/* expand a string so that everything is printable */
static char *
conv_expand(const char *str)
{
static char *conv_str;
char *cp;
int ch;
if (conv_str)
free(conv_str);
/* get a buffer that is definitely large enough.... */
conv_str = malloc(4 * strlen(str) + 1);
if (!conv_str)
return NULL;
cp = conv_str;
while ((ch = *(const unsigned char *)str++) != '\0') {
switch (ch) {
/* Use C escapes for expected control characters */
case '\\': ch = '\\'; break; /* backslash */
case '\'': ch = '\''; break; /* single quote */
case '"': ch = '"'; break; /* double quote */
case '\a': ch = 'a'; break; /* alert */
case '\b': ch = 'b'; break; /* backspace */
case ESCAPE: ch = 'e'; break; /* escape */
case '\f': ch = 'f'; break; /* form-feed */
case '\n': ch = 'n'; break; /* newline */
case '\r': ch = 'r'; break; /* carriage-return */
case '\t': ch = 't'; break; /* tab */
case '\v': ch = 'v'; break; /* vertical-tab */
default:
/* Copy anything printable */
if (isprint(ch)) {
*cp++ = ch;
continue;
}
/* Use vis(3) encodings for the rest */
*cp++ = '\\';
if (ch & 0200) {
*cp++ = 'M';
ch &= ~0200;
}
if (ch == 0177) {
*cp++ = '^';
*cp++ = '?';
continue;
}
if (ch < 040) {
*cp++ = '^';
*cp++ = ch | 0100;
continue;
}
*cp++ = '-';
*cp++ = ch;
continue;
}
*cp++ = '\\';
*cp++ = ch;
}
*cp = 0;
return conv_str;
}
static char *
mklong(const char *str, int ch)
{
static char copy[64];
size_t len;
len = strlen(str) + 2;
if (len > sizeof copy) {
warnx("format %s too complex\n", str);
len = 4;
}
(void)memmove(copy, str, len - 3);
copy[len - 3] = 'j';
copy[len - 2] = ch;
copy[len - 1] = '\0';
return copy;
}
static int
getchr(void)
{
if (!*gargv)
return 0;
return (int)**gargv++;
}
static char *
getstr(void)
{
static char empty[] = "";
if (!*gargv)
return empty;
return *gargv++;
}
static int
getwidth(void)
{
long val;
char *s, *ep;
s = *gargv;
if (!*gargv)
return (0);
gargv++;
errno = 0;
val = strtoul(s, &ep, 0);
check_conversion(s, ep);
/* Arbitrarily 'restrict' field widths to 1Mbyte */
if (val < 0 || val > 1 << 20) {
warnx("%s: invalid field width", s);
return 0;
}
return val;
}
static intmax_t
getintmax(void)
{
intmax_t val;
char *cp, *ep;
cp = *gargv;
if (cp == NULL)
return 0;
gargv++;
if (*cp == '\"' || *cp == '\'')
return *(cp+1);
errno = 0;
val = strtoimax(cp, &ep, 0);
check_conversion(cp, ep);
return val;
}
static uintmax_t
getuintmax(void)
{
uintmax_t val;
char *cp, *ep;
cp = *gargv;
if (cp == NULL)
return 0;
gargv++;
if (*cp == '\"' || *cp == '\'')
return *(cp + 1);
/* strtoumax won't error -ve values */
while (isspace(*(unsigned char *)cp))
cp++;
if (*cp == '-') {
warnx("%s: expected positive numeric value", cp);
rval = 1;
return 0;
}
errno = 0;
val = strtoumax(cp, &ep, 0);
check_conversion(cp, ep);
return val;
}
static double
getdouble(void)
{
double val;
char *ep;
if (!*gargv)
return (0.0);
if (**gargv == '\"' || **gargv == '\'')
return (double) *((*gargv++)+1);
errno = 0;
val = strtod(*gargv, &ep);
check_conversion(*gargv++, ep);
return val;
}
static void
check_conversion(const char *s, const char *ep)
{
if (*ep) {
if (ep == s)
warnx("%s: expected numeric value", s);
else
warnx("%s: not completely converted", s);
rval = 1;
} else if (errno == ERANGE) {
warnx("%s: %s", s, strerror(ERANGE));
rval = 1;
}
}
static void
usage(void)
{
(void)fprintf(stderr, "Usage: %s format [arg ...]\n", getprogname());
}