From 40b23ce476b0e85e5ffcf4697b70823258d11d2c Mon Sep 17 00:00:00 2001 From: Thomas Cort Date: Mon, 14 Oct 2013 23:53:26 -0400 Subject: [PATCH] Importing usr.bin/unzip No Minix-specific changes needed. Change-Id: Ib0b2bf2254acf3d8704bd6acda83af997240ebcf --- distrib/sets/lists/minix/mi | 2 + releasetools/nbsd_ports | 1 + usr.bin/Makefile | 2 +- usr.bin/unzip/Makefile | 10 + usr.bin/unzip/unzip.1 | 190 +++++++ usr.bin/unzip/unzip.c | 1074 +++++++++++++++++++++++++++++++++++ 6 files changed, 1278 insertions(+), 1 deletion(-) create mode 100644 usr.bin/unzip/Makefile create mode 100644 usr.bin/unzip/unzip.1 create mode 100644 usr.bin/unzip/unzip.c diff --git a/distrib/sets/lists/minix/mi b/distrib/sets/lists/minix/mi index dbe8f36cb..a2e87c282 100644 --- a/distrib/sets/lists/minix/mi +++ b/distrib/sets/lists/minix/mi @@ -517,6 +517,7 @@ ./usr/bin/unlzma minix-sys ./usr/bin/unstack minix-sys ./usr/bin/unxz minix-sys +./usr/bin/unzip minix-sys ./usr/bin/update minix-sys ./usr/bin/uptime minix-sys ./usr/bin/uud minix-sys @@ -2071,6 +2072,7 @@ ./usr/man/man1/unlzma.1 minix-sys ./usr/man/man1/unset.1 minix-sys ./usr/man/man1/unxz.1 minix-sys +./usr/man/man1/unzip.1 minix-sys ./usr/man/man1/uud.1 minix-sys ./usr/man/man1/uue.1 minix-sys ./usr/man/man1/vi.1 minix-sys diff --git a/releasetools/nbsd_ports b/releasetools/nbsd_ports index 5149a83d5..d87d62073 100644 --- a/releasetools/nbsd_ports +++ b/releasetools/nbsd_ports @@ -198,6 +198,7 @@ 2012/10/17 12:00:00,usr.bin/tr 2012/10/17 12:00:00,usr.bin/tsort 2010/10/06 07:59:18,usr.bin/uniq +2013/10/14 12:00:00,usr.bin/unzip 2012/10/17 12:00:00,usr.bin/wc 2013/03/22 12:00:00,usr.bin/whatis 2013/03/15 12:00:00,usr.bin/who diff --git a/usr.bin/Makefile b/usr.bin/Makefile index aa7aa88c7..01a36ae73 100644 --- a/usr.bin/Makefile +++ b/usr.bin/Makefile @@ -28,7 +28,7 @@ SUBDIR= \ tee tic tput \ tr tsort unexpand \ toproto \ - uniq \ + uniq unzip \ \ wc whatis who \ xargs xinstall yes diff --git a/usr.bin/unzip/Makefile b/usr.bin/unzip/Makefile new file mode 100644 index 000000000..8fc742046 --- /dev/null +++ b/usr.bin/unzip/Makefile @@ -0,0 +1,10 @@ +# $NetBSD: Makefile,v 1.2 2011/08/18 11:29:27 christos Exp $ + +PROG= unzip + +DPADD+= ${LIBARCHIVE} ${LIBZ} ${LIBBZ2} +LDADD+= -larchive -lz -lbz2 + +COPTS.unzip.c += -Wno-format-y2k + +.include diff --git a/usr.bin/unzip/unzip.1 b/usr.bin/unzip/unzip.1 new file mode 100644 index 000000000..202abd72f --- /dev/null +++ b/usr.bin/unzip/unzip.1 @@ -0,0 +1,190 @@ +.\"- +.\" Copyright (c) 2009 Joerg Sonnenberger +.\" Copyright (c) 2007-2008 Dag-Erling Coïdan Smørgrav +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: revision 180125$ +.\" $NetBSD: unzip.1,v 1.9 2013/07/20 21:40:00 wiz Exp $ +.\" +.Dd August 18, 2011 +.Dt UNZIP 1 +.Os +.Sh NAME +.Nm unzip +.Nd extract files from a ZIP archive +.Sh SYNOPSIS +.Nm +.Op Fl aCcfjLlnopqtuvy +.Op Fl d Ar dir +.Op Fl x Ar pattern +.Ar zipfile +.Sh DESCRIPTION +The following options are available: +.Bl -tag -width Fl +.It Fl a +When extracting a text file, convert DOS-style line endings to +Unix-style line endings. +.It Fl C +Match file names case-insensitively. +.It Fl c +Extract to stdout/screen. +When extracting files from the zipfile, they are written to stdout. +This is similar to +.Fl p , +but doesn't suppress normal output. +.It Fl d Ar dir +Extract files into the specified directory rather than the current +directory. +.It Fl f +Update existing. +Extract only files from the zipfile if a file with the same name +already exists on disk and is older than the former. +Otherwise, the file is silently skipped. +.It Fl j +Ignore directories stored in the zipfile; instead, extract all files +directly into the extraction directory. +.It Fl L +Convert the names of the extracted files and directories to lowercase. +.It Fl l +List, rather than extract, the contents of the zipfile. +.It Fl n +No overwrite. +When extracting a file from the zipfile, if a file with the same name +already exists on disk, the file is silently skipped. +.It Fl o +Overwrite. +When extracting a file from the zipfile, if a file with the same name +already exists on disk, the existing file is replaced with the file +from the zipfile. +.It Fl p +Extract to stdout. +When extracting files from the zipfile, they are written to stdout. +The normal output is suppressed as if +.Fl q +was specified. +.It Fl q +Quiet: print less information while extracting. +.It Fl t +Test: do not extract anything, but verify the checksum of every file +in the archive. +.It Fl u +Update. +When extracting a file from the zipfile, if a file with the same name +already exists on disk, the existing file is replaced with the file +from the zipfile if and only if the latter is newer than the former. +Otherwise, the file is silently skipped. +.It Fl v +List verbosely, rather than extract, the contents of the zipfile. +This differs from +.Fl l +by using the long listing. +Note that most of the data is currently fake and does not reflect the +content of the archive. +.It Fl x Ar pattern +Exclude files matching the pattern +.Ar pattern . +.It Fl y +Print four digit years in listings instead of two. +.El +.Pp +Note that only one of +.Fl n , +.Fl o , +and +.Fl u +may be specified. +.Sh ENVIRONMENT +If the +.Ev UNZIP_DEBUG +environment variable is defined, the +.Fl q +command-line option has no effect, and additional debugging +information will be printed to +.Va stderr . +.Sh COMPATIBILITY +The +.Nm +utility aims to be sufficiently compatible with other implementations +to serve as a drop-in replacement in the context of the +.Xr pkgsrc 7 +system. +No attempt has been made to replicate functionality which is not +required for that purpose. +.Pp +For compatibility reasons, command-line options will be recognized if +they are listed not only before but also after the name of the +zipfile. +.Pp +Normally, the +.Fl a +option should only affect files which are marked as text files in the +zipfile's central directory. +Since the +.Xr archive 3 +library reads zipfiles sequentially, and does not use the central +directory, that information is not available to the +.Nm +utility. +Instead, the +.Nm +utility will assume that a file is a text file if no non-ASCII +characters are present within the first block of data decompressed for +that file. +If non-ASCII characters appear in subsequent blocks of data, a warning +will be issued. +.Pp +The +.Nm +utility is only able to process ZIP archives handled by +.Xr libarchive 3 . +Depending on the installed version of +.Xr libarchive 3 , +this may or may not include self-extracting archives. +.Sh SEE ALSO +.Xr libarchive 3 +.Sh HISTORY +The +.Nm +utility appeared in +.Nx 6.0 . +.Sh AUTHORS +The +.Nm +utility and this manual page were written by +.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org . +It uses the +.Xr archive 3 +library developed by +.An Tim Kientzle Aq Mt kientzle@FreeBSD.org . +.Sh BUGS +The +.Nm +utility currently does not support asking the user whether to +overwrite or skip a file that already exists on disk. +To be on the safe side, it will fail if it encounters a file that +already exists and neither the +.Fl n +nor the +.Fl o +command line option was specified. diff --git a/usr.bin/unzip/unzip.c b/usr.bin/unzip/unzip.c new file mode 100644 index 000000000..df008b5a5 --- /dev/null +++ b/usr.bin/unzip/unzip.c @@ -0,0 +1,1074 @@ +/* $NetBSD: unzip.c,v 1.19 2011/09/06 18:43:41 joerg Exp $ */ + +/*- + * Copyright (c) 2009, 2010 Joerg Sonnenberger + * Copyright (c) 2007-2008 Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: revision 180124$ + * + * This file would be much shorter if we didn't care about command-line + * compatibility with Info-ZIP's UnZip, which requires us to duplicate + * parts of libarchive in order to gain more detailed control of its + * behaviour for the purpose of implementing the -n, -o, -L and -a + * options. + */ + +#include +__RCSID("$NetBSD: unzip.c,v 1.19 2011/09/06 18:43:41 joerg Exp $"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* command-line options */ +static int a_opt; /* convert EOL */ +static int C_opt; /* match case-insensitively */ +static int c_opt; /* extract to stdout */ +static const char *d_arg; /* directory */ +static int f_opt; /* update existing files only */ +static int j_opt; /* junk directories */ +static int L_opt; /* lowercase names */ +static int n_opt; /* never overwrite */ +static int o_opt; /* always overwrite */ +static int p_opt; /* extract to stdout, quiet */ +static int q_opt; /* quiet */ +static int t_opt; /* test */ +static int u_opt; /* update */ +static int v_opt; /* verbose/list */ +static const char * y_str = ""; /* 4 digit year */ + +/* time when unzip started */ +static time_t now; + +/* debug flag */ +static int unzip_debug; + +/* running on tty? */ +static int tty; + +/* convenience macro */ +/* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */ +#define ac(call) \ + do { \ + int acret = (call); \ + if (acret != ARCHIVE_OK) \ + errorx("%s", archive_error_string(a)); \ + } while (0) + +/* + * Indicates that last info() did not end with EOL. This helps error() et + * al. avoid printing an error message on the same line as an incomplete + * informational message. + */ +static int noeol; + +/* fatal error message + errno */ +__dead __printflike(1, 2) static void +error(const char *fmt, ...) +{ + va_list ap; + + if (noeol) + fprintf(stdout, "\n"); + fflush(stdout); + fprintf(stderr, "unzip: "); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, ": %s\n", strerror(errno)); + exit(1); +} + +/* fatal error message, no errno */ +__dead __printflike(1, 2) static void +errorx(const char *fmt, ...) +{ + va_list ap; + + if (noeol) + fprintf(stdout, "\n"); + fflush(stdout); + fprintf(stderr, "unzip: "); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); + exit(1); +} + +#if 0 +/* non-fatal error message + errno */ +__printflike(1, 2) static void +warning(const char *fmt, ...) +{ + va_list ap; + + if (noeol) + fprintf(stdout, "\n"); + fflush(stdout); + fprintf(stderr, "unzip: "); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, ": %s\n", strerror(errno)); +} +#endif +/* non-fatal error message, no errno */ +__printflike(1, 2) static void +warningx(const char *fmt, ...) +{ + va_list ap; + + if (noeol) + fprintf(stdout, "\n"); + fflush(stdout); + fprintf(stderr, "unzip: "); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); +} + +/* informational message (if not -q) */ +__printflike(1, 2) static void +info(const char *fmt, ...) +{ + va_list ap; + + if (q_opt && !unzip_debug) + return; + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + va_end(ap); + fflush(stdout); + + if (*fmt == '\0') + noeol = 1; + else + noeol = fmt[strlen(fmt) - 1] != '\n'; +} + +/* debug message (if unzip_debug) */ +__printflike(1, 2) static void +debug(const char *fmt, ...) +{ + va_list ap; + + if (!unzip_debug) + return; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fflush(stderr); + + if (*fmt == '\0') + noeol = 1; + else + noeol = fmt[strlen(fmt) - 1] != '\n'; +} + +/* duplicate a path name, possibly converting to lower case */ +static char * +pathdup(const char *path) +{ + char *str; + size_t i, len; + + len = strlen(path); + while (len && path[len - 1] == '/') + len--; + if ((str = malloc(len + 1)) == NULL) { + errno = ENOMEM; + error("malloc()"); + } + if (L_opt) { + for (i = 0; i < len; ++i) + str[i] = tolower((unsigned char)path[i]); + } else { + memcpy(str, path, len); + } + str[len] = '\0'; + + return (str); +} + +/* concatenate two path names */ +static char * +pathcat(const char *prefix, const char *path) +{ + char *str; + size_t prelen, len; + + prelen = prefix ? strlen(prefix) + 1 : 0; + len = strlen(path) + 1; + if ((str = malloc(prelen + len)) == NULL) { + errno = ENOMEM; + error("malloc()"); + } + if (prefix) { + memcpy(str, prefix, prelen); /* includes zero */ + str[prelen - 1] = '/'; /* splat zero */ + } + memcpy(str + prelen, path, len); /* includes zero */ + + return (str); +} + +/* + * Pattern lists for include / exclude processing + */ +struct pattern { + STAILQ_ENTRY(pattern) link; + char pattern[]; +}; + +STAILQ_HEAD(pattern_list, pattern); +static struct pattern_list include = STAILQ_HEAD_INITIALIZER(include); +static struct pattern_list exclude = STAILQ_HEAD_INITIALIZER(exclude); + +/* + * Add an entry to a pattern list + */ +static void +add_pattern(struct pattern_list *list, const char *pattern) +{ + struct pattern *entry; + size_t len; + + debug("adding pattern '%s'\n", pattern); + len = strlen(pattern); + if ((entry = malloc(sizeof *entry + len + 1)) == NULL) { + errno = ENOMEM; + error("malloc()"); + } + memcpy(entry->pattern, pattern, len + 1); + STAILQ_INSERT_TAIL(list, entry, link); +} + +/* + * Match a string against a list of patterns + */ +static int +match_pattern(struct pattern_list *list, const char *str) +{ + struct pattern *entry; + + STAILQ_FOREACH(entry, list, link) { + if (fnmatch(entry->pattern, str, C_opt ? FNM_CASEFOLD : 0) == 0) + return (1); + } + return (0); +} + +/* + * Verify that a given pathname is in the include list and not in the + * exclude list. + */ +static int +accept_pathname(const char *pathname) +{ + + if (!STAILQ_EMPTY(&include) && !match_pattern(&include, pathname)) + return (0); + if (!STAILQ_EMPTY(&exclude) && match_pattern(&exclude, pathname)) + return (0); + return (1); +} + +/* + * Create the specified directory with the specified mode, taking certain + * precautions on they way. + */ +static void +make_dir(const char *path, int mode) +{ + struct stat sb; + + if (lstat(path, &sb) == 0) { + if (S_ISDIR(sb.st_mode)) + return; + /* + * Normally, we should either ask the user about removing + * the non-directory of the same name as a directory we + * wish to create, or respect the -n or -o command-line + * options. However, this may lead to a later failure or + * even compromise (if this non-directory happens to be a + * symlink to somewhere unsafe), so we don't. + */ + + /* + * Don't check unlink() result; failure will cause mkdir() + * to fail later, which we will catch. + */ + (void)unlink(path); + } + if (mkdir(path, mode) != 0 && errno != EEXIST) + error("mkdir('%s')", path); +} + +/* + * Ensure that all directories leading up to (but not including) the + * specified path exist. + * + * XXX inefficient + modifies the file in-place + */ +static void +make_parent(char *path) +{ + struct stat sb; + char *sep; + + sep = strrchr(path, '/'); + if (sep == NULL || sep == path) + return; + *sep = '\0'; + if (lstat(path, &sb) == 0) { + if (S_ISDIR(sb.st_mode)) { + *sep = '/'; + return; + } + unlink(path); + } + make_parent(path); + mkdir(path, 0755); + *sep = '/'; + +#if 0 + for (sep = path; (sep = strchr(sep, '/')) != NULL; sep++) { + /* root in case of absolute d_arg */ + if (sep == path) + continue; + *sep = '\0'; + make_dir(path, 0755); + *sep = '/'; + } +#endif +} + +/* + * Extract a directory. + */ +static void +extract_dir(struct archive *a, struct archive_entry *e, const char *path) +{ + int mode; + + mode = archive_entry_mode(e) & 0777; + if (mode == 0) + mode = 0755; + + /* + * Some zipfiles contain directories with weird permissions such + * as 0644 or 0444. This can cause strange issues such as being + * unable to extract files into the directory we just created, or + * the user being unable to remove the directory later without + * first manually changing its permissions. Therefore, we whack + * the permissions into shape, assuming that the user wants full + * access and that anyone who gets read access also gets execute + * access. + */ + mode |= 0700; + if (mode & 0040) + mode |= 0010; + if (mode & 0004) + mode |= 0001; + + info(" creating: %s/\n", path); + make_dir(path, mode); + ac(archive_read_data_skip(a)); +} + +static unsigned char buffer[8192]; +static char spinner[] = { '|', '/', '-', '\\' }; + +static int +handle_existing_file(char **path) +{ + size_t alen; + ssize_t len; + char buf[4]; + + for (;;) { + fprintf(stderr, + "replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ", + *path); + fgets(buf, 4, stdin); + switch (*buf) { + case 'A': + o_opt = 1; + /* FALL THROUGH */ + case 'y': + case 'Y': + (void)unlink(*path); + return 1; + case 'N': + n_opt = 1; + /* FALL THROUGH */ + case 'n': + return -1; + case 'r': + case 'R': + printf("New name: "); + fflush(stdout); + free(*path); + *path = NULL; + alen = 0; + len = getline(path, &alen, stdin); + if ((*path)[len - 1] == '\n') + (*path)[len - 1] = '\0'; + return 0; + default: + break; + } + } +} + +/* + * Detect binary files by a combination of character white list and + * black list. NUL bytes and other control codes without use in text files + * result directly in switching the file to binary mode. Otherwise, at least + * one white-listed byte has to be found. + * + * Black-listed: 0..6, 14..25, 28..31 + * White-listed: 9..10, 13, >= 32 + * + * See the proginfo/txtvsbin.txt in the zip sources for a detailed discussion. + */ +#define BYTE_IS_BINARY(x) ((x) < 32 && (0xf3ffc07fU & (1U << (x)))) +#define BYTE_IS_TEXT(x) ((x) >= 32 || (0x00002600U & (1U << (x)))) + +static int +check_binary(const unsigned char *buf, size_t len) +{ + int rv; + for (rv = 1; len--; ++buf) { + if (BYTE_IS_BINARY(*buf)) + return 1; + if (BYTE_IS_TEXT(*buf)) + rv = 0; + } + + return rv; +} + +/* + * Extract a regular file. + */ +static void +extract_file(struct archive *a, struct archive_entry *e, char **path) +{ + int mode; + time_t mtime; + struct stat sb; + struct timeval tv[2]; + int cr, fd, text, warn, check; + ssize_t len; + unsigned char *p, *q, *end; + + mode = archive_entry_mode(e) & 0777; + if (mode == 0) + mode = 0644; + mtime = archive_entry_mtime(e); + + /* look for existing file of same name */ +recheck: + if (lstat(*path, &sb) == 0) { + if (u_opt || f_opt) { + /* check if up-to-date */ + if (S_ISREG(sb.st_mode) && sb.st_mtime >= mtime) + return; + (void)unlink(*path); + } else if (o_opt) { + /* overwrite */ + (void)unlink(*path); + } else if (n_opt) { + /* do not overwrite */ + return; + } else { + check = handle_existing_file(path); + if (check == 0) + goto recheck; + if (check == -1) + return; /* do not overwrite */ + } + } else { + if (f_opt) + return; + } + + if ((fd = open(*path, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0) + error("open('%s')", *path); + + /* loop over file contents and write to disk */ + info(" extracting: %s", *path); + text = a_opt; + warn = 0; + cr = 0; + for (int n = 0; ; n++) { + if (tty && (n % 4) == 0) + info(" %c\b\b", spinner[(n / 4) % sizeof spinner]); + + len = archive_read_data(a, buffer, sizeof buffer); + + if (len < 0) + ac(len); + + /* left over CR from previous buffer */ + if (a_opt && cr) { + if (len == 0 || buffer[0] != '\n') + if (write(fd, "\r", 1) != 1) + error("write('%s')", *path); + cr = 0; + } + + /* EOF */ + if (len == 0) + break; + end = buffer + len; + + /* + * Detect whether this is a text file. The correct way to + * do this is to check the least significant bit of the + * "internal file attributes" field of the corresponding + * file header in the central directory, but libarchive + * does not read the central directory, so we have to + * guess by looking for non-ASCII characters in the + * buffer. Hopefully we won't guess wrong. If we do + * guess wrong, we print a warning message later. + */ + if (a_opt && n == 0) { + if (check_binary(buffer, len)) + text = 0; + } + + /* simple case */ + if (!a_opt || !text) { + if (write(fd, buffer, len) != len) + error("write('%s')", *path); + continue; + } + + /* hard case: convert \r\n to \n (sigh...) */ + for (p = buffer; p < end; p = q + 1) { + for (q = p; q < end; q++) { + if (!warn && BYTE_IS_BINARY(*q)) { + warningx("%s may be corrupted due" + " to weak text file detection" + " heuristic", *path); + warn = 1; + } + if (q[0] != '\r') + continue; + if (&q[1] == end) { + cr = 1; + break; + } + if (q[1] == '\n') + break; + } + if (write(fd, p, q - p) != q - p) + error("write('%s')", *path); + } + } + if (tty) + info(" \b\b"); + if (text) + info(" (text)"); + info("\n"); + + /* set access and modification time */ + tv[0].tv_sec = now; + tv[0].tv_usec = 0; + tv[1].tv_sec = mtime; + tv[1].tv_usec = 0; + if (futimes(fd, tv) != 0) + error("utimes('%s')", *path); + if (close(fd) != 0) + error("close('%s')", *path); +} + +/* + * Extract a zipfile entry: first perform some sanity checks to ensure + * that it is either a directory or a regular file and that the path is + * not absolute and does not try to break out of the current directory; + * then call either extract_dir() or extract_file() as appropriate. + * + * This is complicated a bit by the various ways in which we need to + * manipulate the path name. Case conversion (if requested by the -L + * option) happens first, but the include / exclude patterns are applied + * to the full converted path name, before the directory part of the path + * is removed in accordance with the -j option. Sanity checks are + * intentionally done earlier than they need to be, so the user will get a + * warning about insecure paths even for files or directories which + * wouldn't be extracted anyway. + */ +static void +extract(struct archive *a, struct archive_entry *e) +{ + char *pathname, *realpathname; + mode_t filetype; + char *p, *q; + + pathname = pathdup(archive_entry_pathname(e)); + filetype = archive_entry_filetype(e); + + /* sanity checks */ + if (pathname[0] == '/' || + strncmp(pathname, "../", 3) == 0 || + strstr(pathname, "/../") != NULL) { + warningx("skipping insecure entry '%s'", pathname); + ac(archive_read_data_skip(a)); + free(pathname); + return; + } + + /* I don't think this can happen in a zipfile.. */ + if (!S_ISDIR(filetype) && !S_ISREG(filetype)) { + warningx("skipping non-regular entry '%s'", pathname); + ac(archive_read_data_skip(a)); + free(pathname); + return; + } + + /* skip directories in -j case */ + if (S_ISDIR(filetype) && j_opt) { + ac(archive_read_data_skip(a)); + free(pathname); + return; + } + + /* apply include / exclude patterns */ + if (!accept_pathname(pathname)) { + ac(archive_read_data_skip(a)); + free(pathname); + return; + } + + /* apply -j and -d */ + if (j_opt) { + for (p = q = pathname; *p; ++p) + if (*p == '/') + q = p + 1; + realpathname = pathcat(d_arg, q); + } else { + realpathname = pathcat(d_arg, pathname); + } + + /* ensure that parent directory exists */ + make_parent(realpathname); + + if (S_ISDIR(filetype)) + extract_dir(a, e, realpathname); + else + extract_file(a, e, &realpathname); + + free(realpathname); + free(pathname); +} + +static void +extract_stdout(struct archive *a, struct archive_entry *e) +{ + char *pathname; + mode_t filetype; + int cr, text, warn; + ssize_t len; + unsigned char *p, *q, *end; + + pathname = pathdup(archive_entry_pathname(e)); + filetype = archive_entry_filetype(e); + + /* I don't think this can happen in a zipfile.. */ + if (!S_ISDIR(filetype) && !S_ISREG(filetype)) { + warningx("skipping non-regular entry '%s'", pathname); + ac(archive_read_data_skip(a)); + free(pathname); + return; + } + + /* skip directories in -j case */ + if (S_ISDIR(filetype)) { + ac(archive_read_data_skip(a)); + free(pathname); + return; + } + + /* apply include / exclude patterns */ + if (!accept_pathname(pathname)) { + ac(archive_read_data_skip(a)); + free(pathname); + return; + } + + if (c_opt) + info("x %s\n", pathname); + + text = a_opt; + warn = 0; + cr = 0; + for (int n = 0; ; n++) { + len = archive_read_data(a, buffer, sizeof buffer); + + if (len < 0) + ac(len); + + /* left over CR from previous buffer */ + if (a_opt && cr) { + if (len == 0 || buffer[0] != '\n') { + if (fwrite("\r", 1, 1, stderr) != 1) + error("write('%s')", pathname); + } + cr = 0; + } + + /* EOF */ + if (len == 0) + break; + end = buffer + len; + + /* + * Detect whether this is a text file. The correct way to + * do this is to check the least significant bit of the + * "internal file attributes" field of the corresponding + * file header in the central directory, but libarchive + * does not read the central directory, so we have to + * guess by looking for non-ASCII characters in the + * buffer. Hopefully we won't guess wrong. If we do + * guess wrong, we print a warning message later. + */ + if (a_opt && n == 0) { + for (p = buffer; p < end; ++p) { + if (!isascii((unsigned char)*p)) { + text = 0; + break; + } + } + } + + /* simple case */ + if (!a_opt || !text) { + if (fwrite(buffer, 1, len, stdout) != (size_t)len) + error("write('%s')", pathname); + continue; + } + + /* hard case: convert \r\n to \n (sigh...) */ + for (p = buffer; p < end; p = q + 1) { + for (q = p; q < end; q++) { + if (!warn && !isascii(*q)) { + warningx("%s may be corrupted due" + " to weak text file detection" + " heuristic", pathname); + warn = 1; + } + if (q[0] != '\r') + continue; + if (&q[1] == end) { + cr = 1; + break; + } + if (q[1] == '\n') + break; + } + if (fwrite(p, 1, q - p, stdout) != (size_t)(q - p)) + error("write('%s')", pathname); + } + } + + free(pathname); +} + +/* + * Print the name of an entry to stdout. + */ +static void +list(struct archive *a, struct archive_entry *e) +{ + char buf[20]; + time_t mtime; + struct tm *tm; + + mtime = archive_entry_mtime(e); + tm = localtime(&mtime); + if (*y_str) + strftime(buf, sizeof(buf), "%m-%d-%G %R", tm); + else + strftime(buf, sizeof(buf), "%m-%d-%g %R", tm); + + if (v_opt == 1) { + printf(" %8ju %s %s\n", + (uintmax_t)archive_entry_size(e), + buf, archive_entry_pathname(e)); + } else if (v_opt == 2) { + printf("%8ju Stored %7ju 0%% %s %08x %s\n", + (uintmax_t)archive_entry_size(e), + (uintmax_t)archive_entry_size(e), + buf, + 0U, + archive_entry_pathname(e)); + } + ac(archive_read_data_skip(a)); +} + +/* + * Extract to memory to check CRC + */ +static int +test(struct archive *a, struct archive_entry *e) +{ + ssize_t len; + int error_count; + + error_count = 0; + if (S_ISDIR(archive_entry_filetype(e))) + return 0; + + info(" testing: %s\t", archive_entry_pathname(e)); + while ((len = archive_read_data(a, buffer, sizeof buffer)) > 0) + /* nothing */; + if (len < 0) { + info(" %s\n", archive_error_string(a)); + ++error_count; + } else { + info(" OK\n"); + } + + /* shouldn't be necessary, but it doesn't hurt */ + ac(archive_read_data_skip(a)); + + return error_count; +} + + +/* + * Main loop: open the zipfile, iterate over its contents and decide what + * to do with each entry. + */ +static void +unzip(const char *fn) +{ + struct archive *a; + struct archive_entry *e; + int fd, ret; + uintmax_t total_size, file_count, error_count; + + if ((fd = open(fn, O_RDONLY)) < 0) + error("%s", fn); + + a = archive_read_new(); + ac(archive_read_support_format_zip(a)); + ac(archive_read_open_fd(a, fd, 8192)); + + if (!q_opt && !p_opt) + printf("Archive: %s\n", fn); + + if (v_opt == 1) { + printf(" Length %sDate Time Name\n", y_str); + printf(" -------- %s---- ---- ----\n", y_str); + } else if (v_opt == 2) { + printf(" Length Method Size Ratio %sDate Time CRC-32 Name\n", y_str); + printf("-------- ------ ------- ----- %s---- ---- ------ ----\n", y_str); + } + + total_size = 0; + file_count = 0; + error_count = 0; + for (;;) { + ret = archive_read_next_header(a, &e); + if (ret == ARCHIVE_EOF) + break; + ac(ret); + if (t_opt) + error_count += test(a, e); + else if (v_opt) + list(a, e); + else if (p_opt || c_opt) + extract_stdout(a, e); + else + extract(a, e); + + total_size += archive_entry_size(e); + ++file_count; + } + + if (v_opt == 1) { + printf(" -------- %s-------\n", y_str); + printf(" %8ju %s%ju file%s\n", + total_size, y_str, file_count, file_count != 1 ? "s" : ""); + } else if (v_opt == 2) { + printf("-------- ------- --- %s-------\n", y_str); + printf("%8ju %7ju 0%% %s%ju file%s\n", + total_size, total_size, y_str, file_count, + file_count != 1 ? "s" : ""); + } + + ac(archive_read_close(a)); + (void)archive_read_finish(a); + + if (close(fd) != 0) + error("%s", fn); + + if (t_opt) { + if (error_count > 0) { + errorx("%ju checksum error(s) found.", error_count); + } + else { + printf("No errors detected in compressed data of %s.\n", + fn); + } + } +} + +static void __dead +usage(void) +{ + + fprintf(stderr, "Usage: %s [-aCcfjLlnopqtuvy] [-d dir] [-x pattern] " + "zipfile\n", getprogname()); + exit(1); +} + +static int +getopts(int argc, char *argv[]) +{ + int opt; + + optreset = optind = 1; + while ((opt = getopt(argc, argv, "aCcd:fjLlnopqtuvyx:")) != -1) + switch (opt) { + case 'a': + a_opt = 1; + break; + case 'C': + C_opt = 1; + break; + case 'c': + c_opt = 1; + break; + case 'd': + d_arg = optarg; + break; + case 'f': + f_opt = 1; + break; + case 'j': + j_opt = 1; + break; + case 'L': + L_opt = 1; + break; + case 'l': + if (v_opt == 0) + v_opt = 1; + break; + case 'n': + n_opt = 1; + break; + case 'o': + o_opt = 1; + q_opt = 1; + break; + case 'p': + p_opt = 1; + break; + case 'q': + q_opt = 1; + break; + case 't': + t_opt = 1; + break; + case 'u': + u_opt = 1; + break; + case 'v': + v_opt = 2; + break; + case 'x': + add_pattern(&exclude, optarg); + break; + case 'y': + y_str = " "; + break; + default: + usage(); + } + + return (optind); +} + +int +main(int argc, char *argv[]) +{ + const char *zipfile; + int nopts; + + if (isatty(STDOUT_FILENO)) + tty = 1; + + if (getenv("UNZIP_DEBUG") != NULL) + unzip_debug = 1; + for (int i = 0; i < argc; ++i) + debug("%s%c", argv[i], (i < argc - 1) ? ' ' : '\n'); + + /* + * Info-ZIP's unzip(1) expects certain options to come before the + * zipfile name, and others to come after - though it does not + * enforce this. For simplicity, we accept *all* options both + * before and after the zipfile name. + */ + nopts = getopts(argc, argv); + + if (argc <= nopts) + usage(); + zipfile = argv[nopts++]; + + while (nopts < argc && *argv[nopts] != '-') + add_pattern(&include, argv[nopts++]); + + nopts--; /* fake argv[0] */ + nopts += getopts(argc - nopts, argv + nopts); + + if (n_opt + o_opt + u_opt > 1) + errorx("-n, -o and -u are contradictory"); + + time(&now); + + unzip(zipfile); + + exit(0); +}