Importing usr.bin/split

Replaces commands/split. No Minix-specific changes needed. Change-Id: I3c6551cf52d4c43eea0e8005b9cae83e17271f2d
2013-10-27 13:28:00 -04:00 · 2013-10-27 13:28:00 -04:00 · 30eeed2c01
parent 2f85279772
commit 30eeed2c01
10 changed files with 503 additions and 160 deletions
--- a/commands/Makefile
+++ b/commands/Makefile
@ -23,7 +23,7 @@ SUBDIR=	add_route arp ash at backup btrace \
 	ramdisk rarpd rawspeed rcp readclock \
 	reboot remsync rget rlogin \
 	rotate rsh rshd service setup shar \
-	sleep slip spell split sprofalyze sprofdiff srccrc \
+	sleep slip spell sprofalyze sprofdiff srccrc \
 	stty svclog svrctl swifi synctree sysenv \
 	syslogd tail tcpd tcpdp tcpstat telnet \
 	telnetd term termcap tget time touch \
--- a/commands/split/Makefile
+++ b/commands/split/Makefile
@ -1,4 +0,0 @@
-PROG=	split
-MAN=
-
-.include <bsd.prog.mk>
--- a/commands/split/split.c
+++ b/commands/split/split.c
@ -1,117 +0,0 @@
-/* split - split a file			Author: Michiel Huisjes */
-
-#include <sys/types.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <minix/minlib.h>
-
-#define CHUNK_SIZE	1024
-
-int cut_line = 1000;
-int infile;
-char out_file[100];
-char *suffix;
-
-int main(int argc, char **argv);
-void split(void);
-int newfile(void);
-void usage(void);
-void quit(void);
-
-int main(argc, argv)
-int argc;
-char **argv;
-{
-  unsigned short i;
-
-  out_file[0] = 'x';
-  infile = -1;
-
-  if (argc > 4) usage();
-  for (i = 1; i < argc; i++) {
-	if (argv[i][0] == '-') {
-		if (argv[i][1] >= '0' && argv[i][1] <= '9'
-		    && cut_line == 1000)
-			cut_line = atoi(argv[i]);
-		else if (argv[i][1] == '\0' && infile == -1)
-			infile = 0;
-		else
-			usage();
-	} else if (infile == -1) {
-		if ((infile = open(argv[i], O_RDONLY)) < 0) {
-			std_err("Cannot open input file.\n");
-			exit(1);
-		}
-	} else
-		strcpy(out_file, argv[i]);
-  }
-  if (infile == -1) infile = 0;
-  strcat(out_file, "aa");
-  for (suffix = out_file; *suffix; suffix++);
-  suffix--;
-
-/* Appendix now points to last `a' of "aa". We have to decrement it by one */
-  *suffix = 'a' - 1;
-  split();
-  return(0);
-}
-
-void split()
-{
-  char buf[CHUNK_SIZE];
-  register char *index, *base;
-  register int n;
-  int fd;
-  long lines = 0L;
-
-  fd = -1;
-  while ((n = read(infile, buf, CHUNK_SIZE)) > 0) {
-	base = index = buf;
-	while (--n >= 0) {
-		if (*index++ == '\n')
-			if (++lines % cut_line == 0) {
-				if (fd == -1) fd = newfile();
-				if (write(fd, base, (int) (index - base)) != (int) (index - base))
-					quit();
-				base = index;
-				close(fd);
-				fd = -1;
-			}
-	}
-	if (index == base) continue;
-	if (fd == -1) fd = newfile();
-	if (write(fd, base, (int) (index - base)) != (int) (index - base))
-		quit();
-  }
-}
-
-int newfile()
-{
-  int fd;
-
-  if (++*suffix > 'z') {	/* Increment letter */
-	*suffix = 'a';		/* Reset last letter */
-	++*(suffix - 1);	/* Previous letter must be incremented */
-	/* E.g. was `filename.az' */
-	/* Now `filename.ba' */
-  }
-  if ((fd = creat(out_file, 0644)) < 0) {
-	std_err("Cannot create new file.\n");
-	exit(2);
-  }
-  return fd;
-}
-
-void usage()
-{
-  std_err("Usage: split [-n] [file [name]].\n");
-  exit(1);
-}
-
-void quit()
-{
-  std_err("split: write error\n");
-  exit(1);
-}
--- a/man/man1/Makefile
+++ b/man/man1/Makefile
@ -16,7 +16,7 @@ MAN=	ash.1 at.1 \
 	profile.1 ps.1 rcp.1 recwave.1 \
 	remsync.1 rget.1 rlogin.1 rsh.1 rz.1 \
 	shar.1 sleep.1 spell.1 \
-	split.1 stty.1 svc.1 svrctl.1 \
+	stty.1 svc.1 svrctl.1 \
 	synctree.1 sysenv.1 sz.1 tail.1 telnet.1 template.1 \
 	term.1 termcap.1 tget.1 time.1 true.1 \
 	truncate.1 tty.1 umount.1 uname.1 \
--- a/man/man1/split.1
+++ b/man/man1/split.1
@ -1,36 +0,0 @@
-.TH SPLIT 1
-.SH NAME
-split \- split a large file into several smaller files
-.SH SYNOPSIS
-\fBsplit\fR [\fB\-\fIn\fR]\fR [\fIfile \fR[\fIprefix\fR]\fR]\fR
-.br
-.de FL
-.TP
-\\fB\\$1\\fR
-\\$2
-..
-.de EX
-.TP 20
-\\fB\\$1\\fR
-# \\$2
-..
-.SH OPTIONS
-.TP 5
-.B \-\fIn\fP
-# Number of lines per piece (default: 1000)
-.SH EXAMPLES
-.TP 20
-.B split \-200 file
-# Split \fIfile\fP into pieces of 200 lines each
-.TP 20
-.B split file z
-# Split \fIfile\fP into \fIzaa\fP, \fIzab\fP, etc.
-.SH DESCRIPTION
-.PP
-.I Split 
-reads \fIfile\fP and writes it out in \fIn\fP-line pieces.
-By default, the pieces are called \fIxaa\fP, \fIxab\fP, etc.
-The optional second argument can be used to provide an alternative
-prefix for the output file names.
-.SH "SEE ALSO"
-.BR cat (1).
--- a/releasetools/nbsd_ports
+++ b/releasetools/nbsd_ports
@ -202,6 +202,7 @@
 2010/05/27 08:40:19,usr.bin/seq
 2013/06/02 12:00:00,usr.bin/shuffle
 2012/10/17 12:00:00,usr.bin/sort
+2012/10/17 12:00:00,usr.bin/split
 2011/01/15 22:54:10,usr.bin/stat
 2012/02/10 16:16:12,usr.bin/su
 2013/10/06 12:00:00,usr.bin/tee
--- a/usr.bin/Makefile
+++ b/usr.bin/Makefile
@ -24,7 +24,7 @@ SUBDIR= asa \
 	renice rev \
 	\
 	shuffle sed seq \
-	sort stat su \
+	sort split stat su \
 	tee tic tput \
 	tr tsort unexpand \
 	toproto \
--- a/usr.bin/split/Makefile
+++ b/usr.bin/split/Makefile
@ -0,0 +1,6 @@
+#	$NetBSD: Makefile,v 1.6 2009/04/14 22:15:26 lukem Exp $
+#	@(#)Makefile	8.1 (Berkeley) 6/6/93
+
+PROG=	split
+
+.include <bsd.prog.mk>
--- a/usr.bin/split/split.1
+++ b/usr.bin/split/split.1
@ -0,0 +1,132 @@
+.\"	$NetBSD: split.1,v 1.15 2007/05/31 01:35:35 jschauma Exp $
+.\"
+.\" Copyright (c) 1990, 1991, 1993, 1994
+.\"	The Regents of the University of California.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"	@(#)split.1	8.3 (Berkeley) 4/16/94
+.\"
+.Dd May 28, 2007
+.Dt SPLIT 1
+.Os
+.Sh NAME
+.Nm split
+.Nd split a file into pieces
+.Sh SYNOPSIS
+.Nm
+.Op Fl a Ar suffix_length
+.Oo
+.Fl b Ar byte_count Ns Oo Li k|m Oc |
+.Fl l Ar line_count
+.Fl n Ar chunk_count
+.Oc
+.Op Ar file Op Ar name
+.Sh DESCRIPTION
+The
+.Nm
+utility reads the given
+.Ar file
+and breaks it up into files of 1000 lines each.
+If
+.Ar file
+is a single dash or absent,
+.Nm
+reads from the standard input.
+.Ar file
+itself is not altered.
+.Pp
+The options are as follows:
+.Bl -tag -width Ds
+.It Fl a
+Use
+.Ar suffix_length
+letters to form the suffix of the file name.
+.It Fl b
+Create smaller files
+.Ar byte_count
+bytes in length.
+If
+.Ql k
+is appended to the number, the file is split into
+.Ar byte_count
+kilobyte pieces.
+If
+.Ql m
+is appended to the number, the file is split into
+.Ar byte_count
+megabyte pieces.
+.It Fl l
+Create smaller files
+.Ar line_count
+lines in length.
+.It Fl n
+Split file into
+.Ar chunk_count
+smaller files.
+.El
+.Pp
+If additional arguments are specified, the first is used as the name
+of the input file which is to be split.
+If a second additional argument is specified, it is used as a prefix
+for the names of the files into which the file is split.
+In this case, each file into which the file is split is named by the
+prefix followed by a lexically ordered suffix using
+.Ar suffix_length
+characters in the range
+.Dq Li a-z .
+If
+.Fl a
+is not specified, two letters are used as the suffix.
+.Pp
+If the
+.Ar name
+argument is not specified,
+.Ql x
+is used.
+.Sh STANDARDS
+The
+.Nm
+utility conforms to
+.St -p1003.1-2001 .
+.Sh HISTORY
+A
+.Nm
+command appeared in
+.At v6 .
+.Pp
+The
+.Fl a
+option was introduced in
+.Nx 2.0 .
+Before that, if
+.Ar name
+was not specified,
+.Nm
+would vary the first letter of the filename
+to increase the number of possible output files.
+The
+.Fl a
+option makes this unnecessary.
--- a/usr.bin/split/split.c
+++ b/usr.bin/split/split.c
@ -0,0 +1,361 @@
+/*	$NetBSD: split.c,v 1.26 2011/09/16 15:39:29 joerg Exp $	*/
+
+/*
+ * Copyright (c) 1987, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#ifndef lint
+__COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\
+ The Regents of the University of California.  All rights reserved.");
+#endif /* not lint */
+
+#ifndef lint
+#if 0
+static char sccsid[] = "@(#)split.c	8.3 (Berkeley) 4/25/94";
+#endif
+__RCSID("$NetBSD: split.c,v 1.26 2011/09/16 15:39:29 joerg Exp $");
+#endif /* not lint */
+
+#include <sys/param.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#define DEFLINE	1000		/* Default num lines per file. */
+
+static int file_open;		/* If a file open. */
+static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */
+static char *fname;		/* File name prefix. */
+static size_t sfxlen = 2;		/* suffix length. */
+
+static void newfile(void);
+static void split1(off_t, int) __dead;
+static void split2(off_t) __dead;
+static void split3(off_t) __dead;
+static void usage(void) __dead;
+static size_t bigwrite(int, void const *, size_t);
+
+int
+main(int argc, char *argv[])
+{
+	int ch;
+	char *ep, *p;
+	char const *base;
+	off_t bytecnt = 0;	/* Byte count to split on. */
+	off_t numlines = 0;	/* Line count to split on. */
+	off_t chunks = 0;	/* Number of chunks to split into. */
+
+	while ((ch = getopt(argc, argv, "0123456789b:l:a:n:")) != -1)
+		switch (ch) {
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+			/*
+			 * Undocumented kludge: split was originally designed
+			 * to take a number after a dash.
+			 */
+			if (numlines == 0) {
+				p = argv[optind - 1];
+				if (p[0] == '-' && p[1] == ch && !p[2])
+					p++;
+				else
+					p = argv[optind] + 1;
+				numlines = strtoull(p, &ep, 10);
+				if (numlines == 0 || *ep != '\0')
+					errx(1, "%s: illegal line count.", p);
+			}
+			break;
+		case 'b':		/* Byte count. */
+			if (!isdigit((unsigned char)optarg[0]) ||
+			    (bytecnt = strtoull(optarg, &ep, 10)) == 0 ||
+			    (*ep != '\0' && *ep != 'k' && *ep != 'm'))
+				errx(1, "%s: illegal byte count.", optarg);
+			if (*ep == 'k')
+				bytecnt *= 1024;
+			else if (*ep == 'm')
+				bytecnt *= 1024 * 1024;
+			break;
+		case 'l':		/* Line count. */
+			if (numlines != 0)
+				usage();
+			if (!isdigit((unsigned char)optarg[0]) ||
+			    (numlines = strtoull(optarg, &ep, 10)) == 0 ||
+			    *ep != '\0')
+				errx(1, "%s: illegal line count.", optarg);
+			break;
+		case 'a':		/* Suffix length. */
+			if (!isdigit((unsigned char)optarg[0]) ||
+			    (sfxlen = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
+			    *ep != '\0')
+				errx(1, "%s: illegal suffix length.", optarg);
+			break;
+		case 'n':		/* Chunks. */
+			if (!isdigit((unsigned char)optarg[0]) ||
+			    (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
+			    *ep != '\0')
+				errx(1, "%s: illegal number of chunks.", optarg);
+			break;
+		default:
+			usage();
+		}
+	argv += optind;
+	argc -= optind;
+
+	if (*argv != NULL) {
+		if (strcmp(*argv, "-") != 0 &&
+		    (ifd = open(*argv, O_RDONLY, 0)) < 0)
+			err(1, "%s", *argv);
+		++argv;
+	}
+
+
+	base = (*argv != NULL) ? *argv++ : "x";
+	if ((fname = malloc(strlen(base) + sfxlen + 1)) == NULL)
+		err(EXIT_FAILURE, NULL);
+	(void)strcpy(fname, base);		/* File name prefix. */
+
+	if (*argv != NULL)
+		usage();
+
+	if (numlines == 0)
+		numlines = DEFLINE;
+	else if (bytecnt || chunks)
+		usage();
+
+	if (bytecnt && chunks)
+		usage();
+
+	if (bytecnt)
+		split1(bytecnt, 0);
+	else if (chunks)
+		split3(chunks);
+	else 
+		split2(numlines);
+
+	return 0;
+}
+
+/*
+ * split1 --
+ *	Split the input by bytes.
+ */
+static void
+split1(off_t bytecnt, int maxcnt)
+{
+	off_t bcnt;
+	ssize_t dist, len;
+	char *C;
+	char bfr[MAXBSIZE];
+	int nfiles;
+
+	nfiles = 0;
+
+	for (bcnt = 0;;)
+		switch (len = read(ifd, bfr, MAXBSIZE)) {
+		case 0:
+			exit(0);
+			/* NOTREACHED */
+		case -1:
+			err(1, "read");
+			/* NOTREACHED */
+		default:
+			if (!file_open) {
+				if (!maxcnt || (nfiles < maxcnt)) {
+					newfile();
+					nfiles++;
+					file_open = 1;
+				}
+			}
+			if (bcnt + len >= bytecnt) {
+				/* LINTED: bytecnt - bcnt <= len */
+				dist = bytecnt - bcnt;
+				if (bigwrite(ofd, bfr, dist) != (size_t)dist)
+					err(1, "write");
+				len -= dist;
+				for (C = bfr + dist; len >= bytecnt;
+				    /* LINTED: bytecnt <= len */
+				    len -= bytecnt, C += bytecnt) {
+					if (!maxcnt || (nfiles < maxcnt)) {
+						newfile();
+						nfiles++;
+					}
+					/* LINTED: as above */
+					if (bigwrite(ofd,
+					    C, bytecnt) != (size_t)bytecnt)
+						err(1, "write");
+				}
+				if (len) {
+					if (!maxcnt || (nfiles < maxcnt)) {
+						newfile();
+						nfiles++;
+					}
+					/* LINTED: len >= 0 */
+					if (bigwrite(ofd, C, len) != (size_t)len)
+						err(1, "write");
+				} else
+					file_open = 0;
+				bcnt = len;
+			} else {
+				bcnt += len;
+				/* LINTED: len >= 0 */
+				if (bigwrite(ofd, bfr, len) != (size_t)len)
+					err(1, "write");
+			}
+		}
+}
+
+/*
+ * split2 --
+ *	Split the input by lines.
+ */
+static void
+split2(off_t numlines)
+{
+	off_t lcnt;
+	size_t bcnt;
+	ssize_t len;
+	char *Ce, *Cs;
+	char bfr[MAXBSIZE];
+
+	for (lcnt = 0;;)
+		switch (len = read(ifd, bfr, MAXBSIZE)) {
+		case 0:
+			exit(0);
+			/* NOTREACHED */
+		case -1:
+			err(1, "read");
+			/* NOTREACHED */
+		default:
+			if (!file_open) {
+				newfile();
+				file_open = 1;
+			}
+			for (Cs = Ce = bfr; len--; Ce++)
+				if (*Ce == '\n' && ++lcnt == numlines) {
+					bcnt = Ce - Cs + 1;
+					if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
+						err(1, "write");
+					lcnt = 0;
+					Cs = Ce + 1;
+					if (len)
+						newfile();
+					else
+						file_open = 0;
+				}
+			if (Cs < Ce) {
+				bcnt = Ce - Cs;
+				if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
+					err(1, "write");
+			}
+		}
+}
+
+/*
+ * split3 --
+ *	Split the input into specified number of chunks
+ */
+static void
+split3(off_t chunks)
+{
+	struct stat sb;
+
+	if (fstat(ifd, &sb) == -1) {
+		err(1, "stat");
+		/* NOTREACHED */
+	}
+
+	if (chunks > sb.st_size) {
+		errx(1, "can't split into more than %d files",
+				(int)sb.st_size);
+		/* NOTREACHED */
+	}
+
+	split1(sb.st_size/chunks, chunks);
+}
+
+/*
+ * newfile --
+ *	Open a new output file.
+ */
+static void
+newfile(void)
+{
+	static int fnum;
+	static char *fpnt;
+	int quot, i;
+
+	if (ofd == -1) {
+		fpnt = fname + strlen(fname);
+		fpnt[sfxlen] = '\0';
+	} else if (close(ofd) != 0)
+		err(1, "%s", fname);
+
+	quot = fnum;
+	for (i = sfxlen - 1; i >= 0; i--) {
+		fpnt[i] = quot % 26 + 'a';
+		quot = quot / 26;
+	}
+	if (quot > 0)
+		errx(1, "too many files.");
+	++fnum;
+	if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0)
+		err(1, "%s", fname);
+}
+
+static size_t
+bigwrite(int fd, const void *buf, size_t len)
+{
+	const char *ptr = buf;
+	size_t sofar = 0;
+	ssize_t w;
+
+	while (len != 0) {
+		if  ((w = write(fd, ptr, len)) == -1)
+			return sofar;
+		len -= w;
+		ptr += w;
+		sofar += w;
+	}
+	return sofar;
+}
+
+
+static void
+usage(void)
+{
+	(void)fprintf(stderr,
+"usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_length] "
+"[file [prefix]]\n", getprogname());
+	exit(1);
+}