minix/minix/drivers/storage/virtio_blk/virtio_blk.c
David van Moolenbroek 0d6c408f48 libblockdriver: suspend threads for live update
Same as for VFS.

Change-Id: I0f09d43f24c32361af5e5658923140c79244d3d1
2015-09-17 17:13:52 +00:00

755 lines
17 KiB
C

/*
* virtio block driver for MINIX 3
*
* Copyright (c) 2013, A. Welzel, <arne.welzel@gmail.com>
*
* This software is released under the BSD license. See the LICENSE file
* included in the main directory of this source distribution for the
* license terms and conditions.
*/
#include <assert.h>
#include <minix/drivers.h>
#include <minix/blockdriver_mt.h>
#include <minix/drvlib.h>
#include <minix/virtio.h>
#include <minix/sysutil.h>
#include <sys/ioc_disk.h>
#include "virtio_blk.h"
#define mystatus(tid) (status_vir[(tid)] & 0xFF)
#define dprintf(s) do { \
printf("%s: ", name); \
printf s; \
printf("\n"); \
} while (0)
/* Number of threads to use */
#define VIRTIO_BLK_NUM_THREADS 4
/* virtio-blk blocksize is always 512 bytes */
#define VIRTIO_BLK_BLOCK_SIZE 512
static const char *const name = "virtio-blk";
/* static device handle */
static struct virtio_device *blk_dev;
static struct virtio_blk_config blk_config;
struct virtio_feature blkf[] = {
{ "barrier", VIRTIO_BLK_F_BARRIER, 0, 0 },
{ "sizemax", VIRTIO_BLK_F_SIZE_MAX, 0, 0 },
{ "segmax", VIRTIO_BLK_F_SEG_MAX, 0, 0 },
{ "geometry", VIRTIO_BLK_F_GEOMETRY, 0, 0 },
{ "read-only", VIRTIO_BLK_F_RO, 0, 0 },
{ "blocksize", VIRTIO_BLK_F_BLK_SIZE, 0, 0 },
{ "scsi", VIRTIO_BLK_F_SCSI, 0, 0 },
{ "flush", VIRTIO_BLK_F_FLUSH, 0, 0 },
{ "topology", VIRTIO_BLK_F_TOPOLOGY, 0, 0 },
{ "idbytes", VIRTIO_BLK_ID_BYTES, 0, 0 }
};
/* State information */
static int spurious_interrupt = 0;
static int terminating = 0;
static int open_count = 0;
/* Partition magic */
struct device part[DEV_PER_DRIVE];
struct device subpart[SUB_PER_DRIVE];
/* Headers for requests */
static struct virtio_blk_outhdr *hdrs_vir;
static phys_bytes hdrs_phys;
/* Status bytes for requests.
*
* Usually a status is only one byte in length, but we need the lowest bit
* to propagate writable. For this reason we take u16_t and use a mask for
* the lower byte later.
*/
static u16_t *status_vir;
static phys_bytes status_phys;
/* Prototypes */
static int virtio_blk_open(devminor_t minor, int access);
static int virtio_blk_close(devminor_t minor);
static ssize_t virtio_blk_transfer(devminor_t minor, int write, u64_t position,
endpoint_t endpt, iovec_t *iovec,
unsigned int cnt, int flags);
static int virtio_blk_ioctl(devminor_t minor, unsigned long req,
endpoint_t endpt, cp_grant_id_t grant, endpoint_t user_endpt);
static struct device * virtio_blk_part(devminor_t minor);
static void virtio_blk_geometry(devminor_t minor, struct part_geom *entry);
static void virtio_blk_device_intr(void);
static void virtio_blk_spurious_intr(void);
static void virtio_blk_intr(unsigned int irqs);
static int virtio_blk_device(devminor_t minor, device_id_t *id);
static int virtio_blk_flush(void);
static void virtio_blk_terminate(void);
static void virtio_blk_cleanup(void);
static int virtio_blk_status2error(u8_t status);
static int virtio_blk_alloc_requests(void);
static void virtio_blk_free_requests(void);
static int virtio_blk_feature_setup(void);
static int virtio_blk_config(void);
static int virtio_blk_probe(int skip);
/* libblockdriver driver tab */
static struct blockdriver virtio_blk_dtab = {
.bdr_type = BLOCKDRIVER_TYPE_DISK,
.bdr_open = virtio_blk_open,
.bdr_close = virtio_blk_close,
.bdr_transfer = virtio_blk_transfer,
.bdr_ioctl = virtio_blk_ioctl,
.bdr_part = virtio_blk_part,
.bdr_geometry = virtio_blk_geometry,
.bdr_intr = virtio_blk_intr,
.bdr_device = virtio_blk_device
};
static int
virtio_blk_open(devminor_t minor, int access)
{
struct device *dev = virtio_blk_part(minor);
/* Check if this device exists */
if (!dev)
return ENXIO;
/* Read only devices should only be mounted... read-only */
if ((access & BDEV_W_BIT) &&
virtio_host_supports(blk_dev, VIRTIO_BLK_F_RO))
return EACCES;
/* Partition magic when opened the first time or re-opened after
* being fully closed
*/
if (open_count == 0) {
memset(part, 0, sizeof(part));
memset(subpart, 0, sizeof(subpart));
part[0].dv_size = blk_config.capacity * VIRTIO_BLK_BLOCK_SIZE;
partition(&virtio_blk_dtab, 0, P_PRIMARY, 0 /* ATAPI */);
blockdriver_mt_set_workers(0, VIRTIO_BLK_NUM_THREADS);
}
open_count++;
return OK;
}
static int
virtio_blk_close(devminor_t minor)
{
struct device *dev = virtio_blk_part(minor);
/* Check if this device exists */
if (!dev)
return ENXIO;
if (open_count == 0) {
dprintf(("Closing one too many times?"));
return EINVAL;
}
open_count--;
/* If fully closed, flush the device and set workes to 1 */
if (open_count == 0) {
virtio_blk_flush();
blockdriver_mt_set_workers(0, 1);
}
/* If supposed to terminate and fully closed, do it! */
if (terminating && open_count == 0)
virtio_blk_terminate();
return OK;
}
static int
prepare_bufs(struct vumap_vir *vir, struct vumap_phys *phys, int cnt, int w)
{
for (int i = 0; i < cnt ; i++) {
/* So you gave us a byte aligned buffer? Good job! */
if (phys[i].vp_addr & 1) {
dprintf(("byte aligned %08lx", phys[i].vp_addr));
return EINVAL;
}
/* Check if the buffer is good */
if (phys[i].vp_size != vir[i].vv_size) {
dprintf(("Non-contig buf %08lx", phys[i].vp_addr));
return EINVAL;
}
/* If write, the buffers only need to be read */
phys[i].vp_addr |= !w;
}
return OK;
}
static int
prepare_vir_vec(endpoint_t endpt, struct vumap_vir *vir, iovec_s_t *iv,
int cnt, vir_bytes *size)
{
/* This is pretty much the same as sum_iovec from AHCI,
* except that we don't support any iovecs where the size
* is not a multiple of 512
*/
vir_bytes s, total = 0;
for (int i = 0; i < cnt; i++) {
s = iv[i].iov_size;
if (s == 0 || (s % VIRTIO_BLK_BLOCK_SIZE) || s > LONG_MAX) {
dprintf(("bad iv[%d].iov_size (%lu) from %d", i, s,
endpt));
return EINVAL;
}
total += s;
if (total > LONG_MAX) {
dprintf(("total overflow from %d", endpt));
return EINVAL;
}
if (endpt == SELF)
vir[i].vv_addr = (vir_bytes)iv[i].iov_grant;
else
vir[i].vv_grant = iv[i].iov_grant;
vir[i].vv_size = iv[i].iov_size;
}
*size = total;
return OK;
}
static ssize_t
virtio_blk_transfer(devminor_t minor, int write, u64_t position,
endpoint_t endpt, iovec_t *iovec, unsigned int cnt,
int flags)
{
/* Need to translate vir to phys */
struct vumap_vir vir[NR_IOREQS];
/* Physical addresses of buffers, including header and trailer */
struct vumap_phys phys[NR_IOREQS + 2];
/* Which thread is doing the transfer? */
thread_id_t tid = blockdriver_mt_get_tid();
vir_bytes size = 0;
vir_bytes size_tmp = 0;
struct device *dv;
u64_t sector;
u64_t end_part;
int r, pcnt = sizeof(phys) / sizeof(phys[0]);
iovec_s_t *iv = (iovec_s_t *)iovec;
int access = write ? VUA_READ : VUA_WRITE;
/* Make sure we don't touch this one anymore */
iovec = NULL;
if (cnt > NR_IOREQS)
return EINVAL;
/* position greater than capacity? */
if (position >= blk_config.capacity * VIRTIO_BLK_BLOCK_SIZE)
return 0;
dv = virtio_blk_part(minor);
/* Does device exist? */
if (!dv)
return ENXIO;
position += dv->dv_base;
end_part = dv->dv_base + dv->dv_size;
/* Hmmm, AHCI tries to fix this up, but lets just say everything
* needs to be sector (512 byte) aligned...
*/
if (position % VIRTIO_BLK_BLOCK_SIZE) {
dprintf(("Non sector-aligned access %016llx", position));
return EINVAL;
}
sector = position / VIRTIO_BLK_BLOCK_SIZE;
r = prepare_vir_vec(endpt, vir, iv, cnt, &size);
if (r != OK)
return r;
if (position >= end_part)
return 0;
/* Truncate if the partition is smaller than that */
if (position + size > end_part - 1) {
size = end_part - position;
/* Fix up later */
size_tmp = 0;
cnt = 0;
} else {
/* Use all buffers */
size_tmp = size;
}
/* Fix up the number of vectors if size was truncated */
while (size_tmp < size)
size_tmp += vir[cnt++].vv_size;
/* If the last vector was too big, just truncate it */
if (size_tmp > size) {
vir[cnt - 1].vv_size = vir[cnt -1].vv_size - (size_tmp - size);
size_tmp -= (size_tmp - size);
}
if (size % VIRTIO_BLK_BLOCK_SIZE) {
dprintf(("non-sector sized read (%lu) from %d", size, endpt));
return EINVAL;
}
/* Map vir to phys */
if ((r = sys_vumap(endpt, vir, cnt, 0, access,
&phys[1], &pcnt)) != OK) {
dprintf(("Unable to map memory from %d (%d)", endpt, r));
return r;
}
/* Prepare the header */
memset(&hdrs_vir[tid], 0, sizeof(hdrs_vir[0]));
if (write)
hdrs_vir[tid].type = VIRTIO_BLK_T_OUT;
else
hdrs_vir[tid].type = VIRTIO_BLK_T_IN;
hdrs_vir[tid].ioprio = 0;
hdrs_vir[tid].sector = sector;
/* First the header */
phys[0].vp_addr = hdrs_phys + tid * sizeof(hdrs_vir[0]);
phys[0].vp_size = sizeof(hdrs_vir[0]);
/* Put the physical buffers into phys */
if ((r = prepare_bufs(vir, &phys[1], pcnt, write)) != OK)
return r;
/* Put the status at the end */
phys[pcnt + 1].vp_addr = status_phys + tid * sizeof(status_vir[0]);
phys[pcnt + 1].vp_size = sizeof(u8_t);
/* Status always needs write access */
phys[1 + pcnt].vp_addr |= 1;
/* Send addresses to queue */
virtio_to_queue(blk_dev, 0, phys, 2 + pcnt, &tid);
/* Wait for completion */
blockdriver_mt_sleep();
/* All was good */
if (mystatus(tid) == VIRTIO_BLK_S_OK)
return size;
/* Error path */
dprintf(("ERROR status=%02x sector=%llu len=%lx cnt=%d op=%s t=%d",
mystatus(tid), sector, size, pcnt,
write ? "write" : "read", tid));
return virtio_blk_status2error(mystatus(tid));
}
static int
virtio_blk_ioctl(devminor_t minor, unsigned long req, endpoint_t endpt,
cp_grant_id_t grant, endpoint_t UNUSED(user_endpt))
{
switch (req) {
case DIOCOPENCT:
return sys_safecopyto(endpt, grant, 0,
(vir_bytes) &open_count, sizeof(open_count));
case DIOCFLUSH:
return virtio_blk_flush();
}
return ENOTTY;
}
static struct device *
virtio_blk_part(devminor_t minor)
{
/* There's only a single drive attached to this device, alyways.
* Lets take some shortcuts...
*/
/* Take care of d0 d0p0 ... */
if (minor >= 0 && minor < DEV_PER_DRIVE)
return &part[minor];
/* subparts start at MINOR_d0p0s0 */
if (minor >= MINOR_d0p0s0) {
minor -= MINOR_d0p0s0;
/* Only for the first disk */
if (minor >= SUB_PER_DRIVE)
return NULL;
return &subpart[minor];
}
return NULL;
}
static void
virtio_blk_geometry(devminor_t minor, struct part_geom *entry)
{
/* Only for the drive */
if (minor != 0)
return;
/* Only if the host supports it */
if(!virtio_host_supports(blk_dev, VIRTIO_BLK_F_GEOMETRY))
return;
entry->cylinders = blk_config.geometry.cylinders;
entry->heads = blk_config.geometry.heads;
entry->sectors = blk_config.geometry.sectors;
}
static void
virtio_blk_device_intr(void)
{
thread_id_t *tid;
/* Multiple requests might have finished */
while (!virtio_from_queue(blk_dev, 0, (void**)&tid, NULL))
blockdriver_mt_wakeup(*tid);
}
static void
virtio_blk_spurious_intr(void)
{
/* Output a single message about spurious interrupts */
if (spurious_interrupt)
return;
dprintf(("Got spurious interrupt"));
spurious_interrupt = 1;
}
static void
virtio_blk_intr(unsigned int irqs)
{
if (virtio_had_irq(blk_dev))
virtio_blk_device_intr();
else
virtio_blk_spurious_intr();
virtio_irq_enable(blk_dev);
}
static int
virtio_blk_device(devminor_t minor, device_id_t *id)
{
struct device *dev = virtio_blk_part(minor);
/* Check if this device exists */
if (!dev)
return ENXIO;
*id = 0;
return OK;
}
static int
virtio_blk_flush(void)
{
struct vumap_phys phys[2];
size_t phys_cnt = sizeof(phys) / sizeof(phys[0]);
/* Which thread is doing this request? */
thread_id_t tid = blockdriver_mt_get_tid();
/* Host may not support flushing */
if (!virtio_host_supports(blk_dev, VIRTIO_BLK_F_FLUSH))
return EOPNOTSUPP;
/* Prepare the header */
memset(&hdrs_vir[tid], 0, sizeof(hdrs_vir[0]));
hdrs_vir[tid].type = VIRTIO_BLK_T_FLUSH;
/* Let this be a barrier if the host supports it */
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_BARRIER))
hdrs_vir[tid].type |= VIRTIO_BLK_T_BARRIER;
/* Header and status for the queue */
phys[0].vp_addr = hdrs_phys + tid * sizeof(hdrs_vir[0]);
phys[0].vp_size = sizeof(hdrs_vir[0]);
phys[1].vp_addr = status_phys + tid * sizeof(status_vir[0]);
phys[1].vp_size = 1;
/* Status always needs write access */
phys[1].vp_addr |= 1;
/* Send flush request to queue */
virtio_to_queue(blk_dev, 0, phys, phys_cnt, &tid);
blockdriver_mt_sleep();
/* All was good */
if (mystatus(tid) == VIRTIO_BLK_S_OK)
return OK;
/* Error path */
dprintf(("ERROR status=%02x op=flush t=%d", mystatus(tid), tid));
return virtio_blk_status2error(mystatus(tid));
}
static void
virtio_blk_terminate(void)
{
/* Don't terminate if still opened */
if (open_count > 0)
return;
blockdriver_mt_terminate();
}
static void
virtio_blk_cleanup(void)
{
/* Just free the memory we allocated */
virtio_blk_free_requests();
virtio_reset_device(blk_dev);
virtio_free_queues(blk_dev);
virtio_free_device(blk_dev);
blk_dev = NULL;
}
static int
virtio_blk_status2error(u8_t status)
{
/* Convert a status from the host to an error */
switch (status) {
case VIRTIO_BLK_S_IOERR:
return EIO;
case VIRTIO_BLK_S_UNSUPP:
return ENOTSUP;
default:
panic("%s: unknown status: %02x", name, status);
}
/* Never reached */
return OK;
}
static int
virtio_blk_alloc_requests(void)
{
/* Allocate memory for request headers and status field */
hdrs_vir = alloc_contig(VIRTIO_BLK_NUM_THREADS * sizeof(hdrs_vir[0]),
AC_ALIGN4K, &hdrs_phys);
if (!hdrs_vir)
return ENOMEM;
status_vir = alloc_contig(VIRTIO_BLK_NUM_THREADS * sizeof(status_vir[0]),
AC_ALIGN4K, &status_phys);
if (!status_vir) {
free_contig(hdrs_vir, VIRTIO_BLK_NUM_THREADS * sizeof(hdrs_vir[0]));
return ENOMEM;
}
return OK;
}
static void
virtio_blk_free_requests(void)
{
free_contig(hdrs_vir, VIRTIO_BLK_NUM_THREADS * sizeof(hdrs_vir[0]));
free_contig(status_vir, VIRTIO_BLK_NUM_THREADS * sizeof(status_vir[0]));
}
static int
virtio_blk_feature_setup(void)
{
/* Feature setup for virtio-blk
*
* FIXME: Besides the geometry, everything is just debug output
* FIXME2: magic numbers
*/
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_SEG_MAX)) {
blk_config.seg_max = virtio_sread32(blk_dev, 12);
dprintf(("Seg Max: %d", blk_config.seg_max));
}
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_GEOMETRY)) {
blk_config.geometry.cylinders = virtio_sread16(blk_dev, 16);
blk_config.geometry.heads = virtio_sread8(blk_dev, 18);
blk_config.geometry.sectors = virtio_sread8(blk_dev, 19);
dprintf(("Geometry: cyl=%d heads=%d sectors=%d",
blk_config.geometry.cylinders,
blk_config.geometry.heads,
blk_config.geometry.sectors));
}
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_SIZE_MAX))
dprintf(("Has size max"));
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_FLUSH))
dprintf(("Supports flushing"));
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_BLK_SIZE)) {
blk_config.blk_size = virtio_sread32(blk_dev, 20);
dprintf(("Block Size: %d", blk_config.blk_size));
}
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_BARRIER))
dprintf(("Supports barrier"));
return 0;
}
static int
virtio_blk_config(void)
{
u32_t sectors_low, sectors_high, size_mbs;
/* capacity is always there */
sectors_low = virtio_sread32(blk_dev, 0);
sectors_high = virtio_sread32(blk_dev, 4);
blk_config.capacity = ((u64_t)sectors_high << 32) | sectors_low;
/* If this gets truncated, you have a big disk... */
size_mbs = (u32_t)(blk_config.capacity * 512 / 1024 / 1024);
dprintf(("Capacity: %d MB", size_mbs));
/* do feature setup */
virtio_blk_feature_setup();
return 0;
}
static int
virtio_blk_probe(int skip)
{
int r;
/* sub device id for virtio-blk is 0x0002 */
blk_dev = virtio_setup_device(0x0002, name, blkf,
sizeof(blkf) / sizeof(blkf[0]),
VIRTIO_BLK_NUM_THREADS, skip);
if (!blk_dev)
return ENXIO;
/* virtio-blk has one queue only */
if ((r = virtio_alloc_queues(blk_dev, 1)) != OK) {
virtio_free_device(blk_dev);
return r;
}
/* Allocate memory for headers and status */
if ((r = virtio_blk_alloc_requests() != OK)) {
virtio_free_queues(blk_dev);
virtio_free_device(blk_dev);
return r;
}
virtio_blk_config();
/* Let the host now that we are ready */
virtio_device_ready(blk_dev);
virtio_irq_enable(blk_dev);
return OK;
}
static int
sef_cb_init_fresh(int type, sef_init_info_t *info)
{
long instance = 0;
int r;
env_parse("instance", "d", 0, &instance, 0, 255);
if ((r = virtio_blk_probe((int)instance)) == OK) {
blockdriver_announce(type);
return OK;
}
/* Error path */
if (r == ENXIO)
panic("%s: No device found", name);
if (r == ENOMEM)
panic("%s: Not enough memory", name);
panic("%s: Unexpected failure (%d)", name, r);
}
static void
sef_cb_signal_handler(int signo)
{
/* Ignore all signals but SIGTERM */
if (signo != SIGTERM)
return;
terminating = 1;
virtio_blk_terminate();
/* If we get a signal when completely closed, call
* exit(). We only leave the blockdriver_mt_task()
* loop after completing a request which is not the
* case for signals.
*/
if (open_count == 0)
exit(0);
}
static void
sef_local_startup(void)
{
sef_setcb_init_fresh(sef_cb_init_fresh);
sef_setcb_signal_handler(sef_cb_signal_handler);
/* Enable suppor for live update. */
blockdriver_mt_support_lu();
sef_startup();
}
int
main(int argc, char **argv)
{
env_setargs(argc, argv);
sef_local_startup();
blockdriver_mt_task(&virtio_blk_dtab);
dprintf(("Terminating"));
virtio_blk_cleanup();
return OK;
}