Importing usr.bin/split

Replaces commands/split. No Minix-specific changes needed.

Change-Id: I3c6551cf52d4c43eea0e8005b9cae83e17271f2d
This commit is contained in:
Thomas Cort 2013-10-27 13:28:00 -04:00
parent 2f85279772
commit 30eeed2c01
10 changed files with 503 additions and 160 deletions

View file

@ -23,7 +23,7 @@ SUBDIR= add_route arp ash at backup btrace \
ramdisk rarpd rawspeed rcp readclock \
reboot remsync rget rlogin \
rotate rsh rshd service setup shar \
sleep slip spell split sprofalyze sprofdiff srccrc \
sleep slip spell sprofalyze sprofdiff srccrc \
stty svclog svrctl swifi synctree sysenv \
syslogd tail tcpd tcpdp tcpstat telnet \
telnetd term termcap tget time touch \

View file

@ -1,4 +0,0 @@
PROG= split
MAN=
.include <bsd.prog.mk>

View file

@ -1,117 +0,0 @@
/* split - split a file Author: Michiel Huisjes */
#include <sys/types.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <minix/minlib.h>
#define CHUNK_SIZE 1024
int cut_line = 1000;
int infile;
char out_file[100];
char *suffix;
int main(int argc, char **argv);
void split(void);
int newfile(void);
void usage(void);
void quit(void);
int main(argc, argv)
int argc;
char **argv;
{
unsigned short i;
out_file[0] = 'x';
infile = -1;
if (argc > 4) usage();
for (i = 1; i < argc; i++) {
if (argv[i][0] == '-') {
if (argv[i][1] >= '0' && argv[i][1] <= '9'
&& cut_line == 1000)
cut_line = atoi(argv[i]);
else if (argv[i][1] == '\0' && infile == -1)
infile = 0;
else
usage();
} else if (infile == -1) {
if ((infile = open(argv[i], O_RDONLY)) < 0) {
std_err("Cannot open input file.\n");
exit(1);
}
} else
strcpy(out_file, argv[i]);
}
if (infile == -1) infile = 0;
strcat(out_file, "aa");
for (suffix = out_file; *suffix; suffix++);
suffix--;
/* Appendix now points to last `a' of "aa". We have to decrement it by one */
*suffix = 'a' - 1;
split();
return(0);
}
void split()
{
char buf[CHUNK_SIZE];
register char *index, *base;
register int n;
int fd;
long lines = 0L;
fd = -1;
while ((n = read(infile, buf, CHUNK_SIZE)) > 0) {
base = index = buf;
while (--n >= 0) {
if (*index++ == '\n')
if (++lines % cut_line == 0) {
if (fd == -1) fd = newfile();
if (write(fd, base, (int) (index - base)) != (int) (index - base))
quit();
base = index;
close(fd);
fd = -1;
}
}
if (index == base) continue;
if (fd == -1) fd = newfile();
if (write(fd, base, (int) (index - base)) != (int) (index - base))
quit();
}
}
int newfile()
{
int fd;
if (++*suffix > 'z') { /* Increment letter */
*suffix = 'a'; /* Reset last letter */
++*(suffix - 1); /* Previous letter must be incremented */
/* E.g. was `filename.az' */
/* Now `filename.ba' */
}
if ((fd = creat(out_file, 0644)) < 0) {
std_err("Cannot create new file.\n");
exit(2);
}
return fd;
}
void usage()
{
std_err("Usage: split [-n] [file [name]].\n");
exit(1);
}
void quit()
{
std_err("split: write error\n");
exit(1);
}

View file

@ -16,7 +16,7 @@ MAN= ash.1 at.1 \
profile.1 ps.1 rcp.1 recwave.1 \
remsync.1 rget.1 rlogin.1 rsh.1 rz.1 \
shar.1 sleep.1 spell.1 \
split.1 stty.1 svc.1 svrctl.1 \
stty.1 svc.1 svrctl.1 \
synctree.1 sysenv.1 sz.1 tail.1 telnet.1 template.1 \
term.1 termcap.1 tget.1 time.1 true.1 \
truncate.1 tty.1 umount.1 uname.1 \

View file

@ -1,36 +0,0 @@
.TH SPLIT 1
.SH NAME
split \- split a large file into several smaller files
.SH SYNOPSIS
\fBsplit\fR [\fB\-\fIn\fR]\fR [\fIfile \fR[\fIprefix\fR]\fR]\fR
.br
.de FL
.TP
\\fB\\$1\\fR
\\$2
..
.de EX
.TP 20
\\fB\\$1\\fR
# \\$2
..
.SH OPTIONS
.TP 5
.B \-\fIn\fP
# Number of lines per piece (default: 1000)
.SH EXAMPLES
.TP 20
.B split \-200 file
# Split \fIfile\fP into pieces of 200 lines each
.TP 20
.B split file z
# Split \fIfile\fP into \fIzaa\fP, \fIzab\fP, etc.
.SH DESCRIPTION
.PP
.I Split
reads \fIfile\fP and writes it out in \fIn\fP-line pieces.
By default, the pieces are called \fIxaa\fP, \fIxab\fP, etc.
The optional second argument can be used to provide an alternative
prefix for the output file names.
.SH "SEE ALSO"
.BR cat (1).

View file

@ -202,6 +202,7 @@
2010/05/27 08:40:19,usr.bin/seq
2013/06/02 12:00:00,usr.bin/shuffle
2012/10/17 12:00:00,usr.bin/sort
2012/10/17 12:00:00,usr.bin/split
2011/01/15 22:54:10,usr.bin/stat
2012/02/10 16:16:12,usr.bin/su
2013/10/06 12:00:00,usr.bin/tee

View file

@ -24,7 +24,7 @@ SUBDIR= asa \
renice rev \
\
shuffle sed seq \
sort stat su \
sort split stat su \
tee tic tput \
tr tsort unexpand \
toproto \

6
usr.bin/split/Makefile Normal file
View file

@ -0,0 +1,6 @@
# $NetBSD: Makefile,v 1.6 2009/04/14 22:15:26 lukem Exp $
# @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= split
.include <bsd.prog.mk>

132
usr.bin/split/split.1 Normal file
View file

@ -0,0 +1,132 @@
.\" $NetBSD: split.1,v 1.15 2007/05/31 01:35:35 jschauma Exp $
.\"
.\" Copyright (c) 1990, 1991, 1993, 1994
.\" The Regents of the University of California. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)split.1 8.3 (Berkeley) 4/16/94
.\"
.Dd May 28, 2007
.Dt SPLIT 1
.Os
.Sh NAME
.Nm split
.Nd split a file into pieces
.Sh SYNOPSIS
.Nm
.Op Fl a Ar suffix_length
.Oo
.Fl b Ar byte_count Ns Oo Li k|m Oc |
.Fl l Ar line_count
.Fl n Ar chunk_count
.Oc
.Op Ar file Op Ar name
.Sh DESCRIPTION
The
.Nm
utility reads the given
.Ar file
and breaks it up into files of 1000 lines each.
If
.Ar file
is a single dash or absent,
.Nm
reads from the standard input.
.Ar file
itself is not altered.
.Pp
The options are as follows:
.Bl -tag -width Ds
.It Fl a
Use
.Ar suffix_length
letters to form the suffix of the file name.
.It Fl b
Create smaller files
.Ar byte_count
bytes in length.
If
.Ql k
is appended to the number, the file is split into
.Ar byte_count
kilobyte pieces.
If
.Ql m
is appended to the number, the file is split into
.Ar byte_count
megabyte pieces.
.It Fl l
Create smaller files
.Ar line_count
lines in length.
.It Fl n
Split file into
.Ar chunk_count
smaller files.
.El
.Pp
If additional arguments are specified, the first is used as the name
of the input file which is to be split.
If a second additional argument is specified, it is used as a prefix
for the names of the files into which the file is split.
In this case, each file into which the file is split is named by the
prefix followed by a lexically ordered suffix using
.Ar suffix_length
characters in the range
.Dq Li a-z .
If
.Fl a
is not specified, two letters are used as the suffix.
.Pp
If the
.Ar name
argument is not specified,
.Ql x
is used.
.Sh STANDARDS
The
.Nm
utility conforms to
.St -p1003.1-2001 .
.Sh HISTORY
A
.Nm
command appeared in
.At v6 .
.Pp
The
.Fl a
option was introduced in
.Nx 2.0 .
Before that, if
.Ar name
was not specified,
.Nm
would vary the first letter of the filename
to increase the number of possible output files.
The
.Fl a
option makes this unnecessary.

361
usr.bin/split/split.c Normal file
View file

@ -0,0 +1,361 @@
/* $NetBSD: split.c,v 1.26 2011/09/16 15:39:29 joerg Exp $ */
/*
* Copyright (c) 1987, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\
The Regents of the University of California. All rights reserved.");
#endif /* not lint */
#ifndef lint
#if 0
static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94";
#endif
__RCSID("$NetBSD: split.c,v 1.26 2011/09/16 15:39:29 joerg Exp $");
#endif /* not lint */
#include <sys/param.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define DEFLINE 1000 /* Default num lines per file. */
static int file_open; /* If a file open. */
static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */
static char *fname; /* File name prefix. */
static size_t sfxlen = 2; /* suffix length. */
static void newfile(void);
static void split1(off_t, int) __dead;
static void split2(off_t) __dead;
static void split3(off_t) __dead;
static void usage(void) __dead;
static size_t bigwrite(int, void const *, size_t);
int
main(int argc, char *argv[])
{
int ch;
char *ep, *p;
char const *base;
off_t bytecnt = 0; /* Byte count to split on. */
off_t numlines = 0; /* Line count to split on. */
off_t chunks = 0; /* Number of chunks to split into. */
while ((ch = getopt(argc, argv, "0123456789b:l:a:n:")) != -1)
switch (ch) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
/*
* Undocumented kludge: split was originally designed
* to take a number after a dash.
*/
if (numlines == 0) {
p = argv[optind - 1];
if (p[0] == '-' && p[1] == ch && !p[2])
p++;
else
p = argv[optind] + 1;
numlines = strtoull(p, &ep, 10);
if (numlines == 0 || *ep != '\0')
errx(1, "%s: illegal line count.", p);
}
break;
case 'b': /* Byte count. */
if (!isdigit((unsigned char)optarg[0]) ||
(bytecnt = strtoull(optarg, &ep, 10)) == 0 ||
(*ep != '\0' && *ep != 'k' && *ep != 'm'))
errx(1, "%s: illegal byte count.", optarg);
if (*ep == 'k')
bytecnt *= 1024;
else if (*ep == 'm')
bytecnt *= 1024 * 1024;
break;
case 'l': /* Line count. */
if (numlines != 0)
usage();
if (!isdigit((unsigned char)optarg[0]) ||
(numlines = strtoull(optarg, &ep, 10)) == 0 ||
*ep != '\0')
errx(1, "%s: illegal line count.", optarg);
break;
case 'a': /* Suffix length. */
if (!isdigit((unsigned char)optarg[0]) ||
(sfxlen = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
*ep != '\0')
errx(1, "%s: illegal suffix length.", optarg);
break;
case 'n': /* Chunks. */
if (!isdigit((unsigned char)optarg[0]) ||
(chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
*ep != '\0')
errx(1, "%s: illegal number of chunks.", optarg);
break;
default:
usage();
}
argv += optind;
argc -= optind;
if (*argv != NULL) {
if (strcmp(*argv, "-") != 0 &&
(ifd = open(*argv, O_RDONLY, 0)) < 0)
err(1, "%s", *argv);
++argv;
}
base = (*argv != NULL) ? *argv++ : "x";
if ((fname = malloc(strlen(base) + sfxlen + 1)) == NULL)
err(EXIT_FAILURE, NULL);
(void)strcpy(fname, base); /* File name prefix. */
if (*argv != NULL)
usage();
if (numlines == 0)
numlines = DEFLINE;
else if (bytecnt || chunks)
usage();
if (bytecnt && chunks)
usage();
if (bytecnt)
split1(bytecnt, 0);
else if (chunks)
split3(chunks);
else
split2(numlines);
return 0;
}
/*
* split1 --
* Split the input by bytes.
*/
static void
split1(off_t bytecnt, int maxcnt)
{
off_t bcnt;
ssize_t dist, len;
char *C;
char bfr[MAXBSIZE];
int nfiles;
nfiles = 0;
for (bcnt = 0;;)
switch (len = read(ifd, bfr, MAXBSIZE)) {
case 0:
exit(0);
/* NOTREACHED */
case -1:
err(1, "read");
/* NOTREACHED */
default:
if (!file_open) {
if (!maxcnt || (nfiles < maxcnt)) {
newfile();
nfiles++;
file_open = 1;
}
}
if (bcnt + len >= bytecnt) {
/* LINTED: bytecnt - bcnt <= len */
dist = bytecnt - bcnt;
if (bigwrite(ofd, bfr, dist) != (size_t)dist)
err(1, "write");
len -= dist;
for (C = bfr + dist; len >= bytecnt;
/* LINTED: bytecnt <= len */
len -= bytecnt, C += bytecnt) {
if (!maxcnt || (nfiles < maxcnt)) {
newfile();
nfiles++;
}
/* LINTED: as above */
if (bigwrite(ofd,
C, bytecnt) != (size_t)bytecnt)
err(1, "write");
}
if (len) {
if (!maxcnt || (nfiles < maxcnt)) {
newfile();
nfiles++;
}
/* LINTED: len >= 0 */
if (bigwrite(ofd, C, len) != (size_t)len)
err(1, "write");
} else
file_open = 0;
bcnt = len;
} else {
bcnt += len;
/* LINTED: len >= 0 */
if (bigwrite(ofd, bfr, len) != (size_t)len)
err(1, "write");
}
}
}
/*
* split2 --
* Split the input by lines.
*/
static void
split2(off_t numlines)
{
off_t lcnt;
size_t bcnt;
ssize_t len;
char *Ce, *Cs;
char bfr[MAXBSIZE];
for (lcnt = 0;;)
switch (len = read(ifd, bfr, MAXBSIZE)) {
case 0:
exit(0);
/* NOTREACHED */
case -1:
err(1, "read");
/* NOTREACHED */
default:
if (!file_open) {
newfile();
file_open = 1;
}
for (Cs = Ce = bfr; len--; Ce++)
if (*Ce == '\n' && ++lcnt == numlines) {
bcnt = Ce - Cs + 1;
if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
err(1, "write");
lcnt = 0;
Cs = Ce + 1;
if (len)
newfile();
else
file_open = 0;
}
if (Cs < Ce) {
bcnt = Ce - Cs;
if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
err(1, "write");
}
}
}
/*
* split3 --
* Split the input into specified number of chunks
*/
static void
split3(off_t chunks)
{
struct stat sb;
if (fstat(ifd, &sb) == -1) {
err(1, "stat");
/* NOTREACHED */
}
if (chunks > sb.st_size) {
errx(1, "can't split into more than %d files",
(int)sb.st_size);
/* NOTREACHED */
}
split1(sb.st_size/chunks, chunks);
}
/*
* newfile --
* Open a new output file.
*/
static void
newfile(void)
{
static int fnum;
static char *fpnt;
int quot, i;
if (ofd == -1) {
fpnt = fname + strlen(fname);
fpnt[sfxlen] = '\0';
} else if (close(ofd) != 0)
err(1, "%s", fname);
quot = fnum;
for (i = sfxlen - 1; i >= 0; i--) {
fpnt[i] = quot % 26 + 'a';
quot = quot / 26;
}
if (quot > 0)
errx(1, "too many files.");
++fnum;
if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0)
err(1, "%s", fname);
}
static size_t
bigwrite(int fd, const void *buf, size_t len)
{
const char *ptr = buf;
size_t sofar = 0;
ssize_t w;
while (len != 0) {
if ((w = write(fd, ptr, len)) == -1)
return sofar;
len -= w;
ptr += w;
sofar += w;
}
return sofar;
}
static void
usage(void)
{
(void)fprintf(stderr,
"usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_length] "
"[file [prefix]]\n", getprogname());
exit(1);
}