VFS: fix error behavior for partial pipe writes

This patch fixes two related issues:

- If a large (>PIPE_BUF) pipe write is processed partially, only to be
  followed by a write error condition, then the process is left in an
  incorrect state, possibly causing VFS to crash on a subsequent call.

- If such a partially processed large pipe write ends up resulting in
  an EPIPE error, no corresponding SIGPIPE signal is generated.

The corrected behavior is tested in test68.

Change-Id: I5540e61ab6bcc60a31201485eda04bc49ece2ca8
This commit is contained in:
David van Moolenbroek 2015-06-05 18:28:20 +00:00
parent d9494baa34
commit 179bddcf5d
4 changed files with 97 additions and 7 deletions

View file

@ -27,7 +27,7 @@ EXTERN struct fproc {
int fp_blocked_on; /* what is it blocked on */ int fp_blocked_on; /* what is it blocked on */
int fp_block_callnr; /* blocked call if rd/wr can't finish */ int fp_block_callnr; /* blocked call if rd/wr can't finish */
int fp_cum_io_partial; /* partial byte count if rd/wr can't finish */ size_t fp_cum_io_partial; /* partial byte count if write can't finish */
endpoint_t fp_task; /* which task is proc suspended on */ endpoint_t fp_task; /* which task is proc suspended on */
cp_grant_id_t fp_grant; /* revoke this grant on unsuspend if > -1 */ cp_grant_id_t fp_grant; /* revoke this grant on unsuspend if > -1 */

View file

@ -217,8 +217,17 @@ static void do_pending_pipe(void)
r = rw_pipe(op, who_e, f, scratch(fp).io.io_buffer, scratch(fp).io.io_nbytes); r = rw_pipe(op, who_e, f, scratch(fp).io.io_buffer, scratch(fp).io.io_nbytes);
if (r != SUSPEND) /* Do we have results to report? */ if (r != SUSPEND) { /* Do we have results to report? */
/* Process is writing, but there is no reader. Send a SIGPIPE signal.
* This should match the corresponding code in read_write().
*/
if (r == EPIPE && op == WRITING) {
if (!(f->filp_flags & O_NOSIGPIPE))
sys_kill(fp->fp_endpoint, SIGPIPE);
}
replycode(fp->fp_endpoint, r); replycode(fp->fp_endpoint, r);
}
unlock_filp(f); unlock_filp(f);
} }

View file

@ -251,7 +251,7 @@ int read_write(struct fproc *rfp, int rw_flag, struct filp *f,
if (r == EPIPE && rw_flag == WRITING) { if (r == EPIPE && rw_flag == WRITING) {
/* Process is writing, but there is no reader. Tell the kernel to /* Process is writing, but there is no reader. Tell the kernel to
* generate s SIGPIPE signal. * generate a SIGPIPE signal.
*/ */
if (!(f->filp_flags & O_NOSIGPIPE)) { if (!(f->filp_flags & O_NOSIGPIPE)) {
sys_kill(rfp->fp_endpoint, SIGPIPE); sys_kill(rfp->fp_endpoint, SIGPIPE);
@ -326,12 +326,23 @@ size_t req_size;
assert(rw_flag == READING || rw_flag == WRITING); assert(rw_flag == READING || rw_flag == WRITING);
/* fp->fp_cum_io_partial is only nonzero when doing partial writes */ /* fp->fp_cum_io_partial is only nonzero when doing partial writes.
* We clear the field immediately here because we expect completion or error;
* its value must be (re)assigned if we end up suspending the write (again).
*/
cum_io = fp->fp_cum_io_partial; cum_io = fp->fp_cum_io_partial;
fp->fp_cum_io_partial = 0;
r = pipe_check(f, rw_flag, oflags, req_size, 0); r = pipe_check(f, rw_flag, oflags, req_size, 0);
if (r <= 0) { if (r <= 0) {
if (r == SUSPEND) pipe_suspend(f, buf, req_size); if (r == SUSPEND) {
fp->fp_cum_io_partial = cum_io;
pipe_suspend(f, buf, req_size);
}
/* If pipe_check returns an error instead of suspending the call, we
* return that error, even if we are resuming a partially completed
* operation (ie, a large blocking write), to match NetBSD's behavior.
*/
return(r); return(r);
} }
@ -350,6 +361,7 @@ size_t req_size;
buf, size, &new_pos, &cum_io_incr); buf, size, &new_pos, &cum_io_incr);
if (r != OK) { if (r != OK) {
assert(r != SUSPEND);
return(r); return(r);
} }
@ -366,7 +378,7 @@ size_t req_size;
/* partial write on pipe with */ /* partial write on pipe with */
/* O_NONBLOCK, return write count */ /* O_NONBLOCK, return write count */
if (!(oflags & O_NONBLOCK)) { if (!(oflags & O_NONBLOCK)) {
/* partial write on pipe with req_size > PIPE_SIZE, /* partial write on pipe with req_size > PIPE_BUF,
* non-atomic * non-atomic
*/ */
fp->fp_cum_io_partial = cum_io; fp->fp_cum_io_partial = cum_io;
@ -375,7 +387,7 @@ size_t req_size;
} }
} }
fp->fp_cum_io_partial = 0; assert(fp->fp_cum_io_partial == 0);
return(cum_io); return(cum_io);
} }

View file

@ -288,6 +288,74 @@ test_pipe_flag_setting()
if (close(pipes[1]) != 0) e(22); if (close(pipes[1]) != 0) e(22);
} }
/*
* Test the behavior of a large pipe write that achieves partial progress
* before the reader end is closed. The write call is expected to return EPIPE
* and generate a SIGPIPE signal, and otherwise leave the system in good order.
*/
static void
test_pipe_partial_write(void)
{
char buf[PIPE_BUF + 2];
int pfd[2], status;
signal(SIGPIPE, pipe_handler);
if (pipe(pfd) < 0) e(1);
switch (fork()) {
case 0:
close(pfd[1]);
sleep(1); /* let the parent block on the write(2) */
/*
* This one-byte read raises the question whether the write
* should return partial progress or not, since consumption of
* part of its data is now clearly visible. NetBSD chooses
* *not* to return partial progress, and MINIX3 follows suit.
*/
if (read(pfd[0], buf, 1) != 1) e(2);
sleep(1); /* let VFS retry satisfying the write(2) */
exit(errct); /* close the reader side of the pipe */
case -1:
e(3);
default:
break;
}
close(pfd[0]);
seen_pipe_signal = 0;
/*
* The following large write should block until the child exits, as
* that is when the read end closes, thus making eventual completion of
* the write impossible.
*/
if (write(pfd[1], buf, sizeof(buf)) != -1) e(4);
if (errno != EPIPE) e(5);
if (seen_pipe_signal != 1) e(6);
seen_pipe_signal = 0;
/* A subsequent write used to cause a system crash. */
if (write(pfd[1], buf, 1) != -1) e(7);
if (errno != EPIPE) e(8);
if (seen_pipe_signal != 1) e(9);
/* Clean up. */
close(pfd[1]);
if (wait(&status) <= 0) e(10);
if (!WIFEXITED(status)) e(11);
errct += WEXITSTATUS(status);
}
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
@ -298,6 +366,7 @@ main(int argc, char *argv[])
test_pipe_cloexec(); test_pipe_cloexec();
test_pipe_nonblock(); test_pipe_nonblock();
test_pipe_nosigpipe(); test_pipe_nosigpipe();
test_pipe_partial_write();
quit(); quit();
return(-1); /* Unreachable */ return(-1); /* Unreachable */
} }