minix/drivers/virtio_blk/virtio_blk.c
David van Moolenbroek 6700874deb Block drivers: make IOCTL request unsigned long
The block driver protocol and libblockdriver's bdr_ioctl hook are
changed, as well as the users of this hook. Other parts of the system
are expected to change accordingly eventually, since the ioctl(2)
prototype has been aligned with NetBSD's.

Change-Id: Ide46245b22cfa89ed267a38088fb0ab7696eba92
2014-02-19 11:22:15 +01:00

753 lines
17 KiB
C

/*
* virtio block driver for MINIX 3
*
* Copyright (c) 2013, A. Welzel, <arne.welzel@gmail.com>
*
* This software is released under the BSD license. See the LICENSE file
* included in the main directory of this source distribution for the
* license terms and conditions.
*/
#include <assert.h>
#include <minix/drivers.h>
#include <minix/blockdriver_mt.h>
#include <minix/drvlib.h>
#include <minix/virtio.h>
#include <minix/sysutil.h>
#include <sys/ioc_disk.h>
#include "virtio_blk.h"
#define mystatus(tid) (status_vir[(tid)] & 0xFF)
#define dprintf(s) do { \
printf("%s: ", name); \
printf s; \
printf("\n"); \
} while (0)
/* Number of threads to use */
#define VIRTIO_BLK_NUM_THREADS 4
/* virtio-blk blocksize is always 512 bytes */
#define VIRTIO_BLK_BLOCK_SIZE 512
static const char *const name = "virtio-blk";
/* static device handle */
static struct virtio_device *blk_dev;
static struct virtio_blk_config blk_config;
struct virtio_feature blkf[] = {
{ "barrier", VIRTIO_BLK_F_BARRIER, 0, 0 },
{ "sizemax", VIRTIO_BLK_F_SIZE_MAX, 0, 0 },
{ "segmax", VIRTIO_BLK_F_SEG_MAX, 0, 0 },
{ "geometry", VIRTIO_BLK_F_GEOMETRY, 0, 0 },
{ "read-only", VIRTIO_BLK_F_RO, 0, 0 },
{ "blocksize", VIRTIO_BLK_F_BLK_SIZE, 0, 0 },
{ "scsi", VIRTIO_BLK_F_SCSI, 0, 0 },
{ "flush", VIRTIO_BLK_F_FLUSH, 0, 0 },
{ "topology", VIRTIO_BLK_F_TOPOLOGY, 0, 0 },
{ "idbytes", VIRTIO_BLK_ID_BYTES, 0, 0 }
};
/* State information */
static int spurious_interrupt = 0;
static int terminating = 0;
static int open_count = 0;
/* Partition magic */
struct device part[DEV_PER_DRIVE];
struct device subpart[SUB_PER_DRIVE];
/* Headers for requests */
static struct virtio_blk_outhdr *hdrs_vir;
static phys_bytes hdrs_phys;
/* Status bytes for requests.
*
* Usually a status is only one byte in length, but we need the lowest bit
* to propagate writable. For this reason we take u16_t and use a mask for
* the lower byte later.
*/
static u16_t *status_vir;
static phys_bytes status_phys;
/* Prototypes */
static int virtio_blk_open(devminor_t minor, int access);
static int virtio_blk_close(devminor_t minor);
static ssize_t virtio_blk_transfer(devminor_t minor, int write, u64_t position,
endpoint_t endpt, iovec_t *iovec,
unsigned int cnt, int flags);
static int virtio_blk_ioctl(devminor_t minor, unsigned long req,
endpoint_t endpt, cp_grant_id_t grant, endpoint_t user_endpt);
static struct device * virtio_blk_part(devminor_t minor);
static void virtio_blk_geometry(devminor_t minor, struct part_geom *entry);
static void virtio_blk_device_intr(void);
static void virtio_blk_spurious_intr(void);
static void virtio_blk_intr(unsigned int irqs);
static int virtio_blk_device(devminor_t minor, device_id_t *id);
static int virtio_blk_flush(void);
static void virtio_blk_terminate(void);
static void virtio_blk_cleanup(void);
static int virtio_blk_status2error(u8_t status);
static int virtio_blk_alloc_requests(void);
static void virtio_blk_free_requests(void);
static int virtio_blk_feature_setup(void);
static int virtio_blk_config(void);
static int virtio_blk_probe(int skip);
/* libblockdriver driver tab */
static struct blockdriver virtio_blk_dtab = {
.bdr_type = BLOCKDRIVER_TYPE_DISK,
.bdr_open = virtio_blk_open,
.bdr_close = virtio_blk_close,
.bdr_transfer = virtio_blk_transfer,
.bdr_ioctl = virtio_blk_ioctl,
.bdr_part = virtio_blk_part,
.bdr_geometry = virtio_blk_geometry,
.bdr_intr = virtio_blk_intr,
.bdr_device = virtio_blk_device
};
static int
virtio_blk_open(devminor_t minor, int access)
{
struct device *dev = virtio_blk_part(minor);
/* Check if this device exists */
if (!dev)
return ENXIO;
/* Read only devices should only be mounted... read-only */
if ((access & BDEV_W_BIT) &&
virtio_host_supports(blk_dev, VIRTIO_BLK_F_RO))
return EACCES;
/* Partition magic when opened the first time or re-opened after
* being fully closed
*/
if (open_count == 0) {
memset(part, 0, sizeof(part));
memset(subpart, 0, sizeof(subpart));
part[0].dv_size = blk_config.capacity * VIRTIO_BLK_BLOCK_SIZE;
partition(&virtio_blk_dtab, 0, P_PRIMARY, 0 /* ATAPI */);
blockdriver_mt_set_workers(0, VIRTIO_BLK_NUM_THREADS);
}
open_count++;
return OK;
}
static int
virtio_blk_close(devminor_t minor)
{
struct device *dev = virtio_blk_part(minor);
/* Check if this device exists */
if (!dev)
return ENXIO;
if (open_count == 0) {
dprintf(("Closing one too many times?"));
return EINVAL;
}
open_count--;
/* If fully closed, flush the device and set workes to 1 */
if (open_count == 0) {
virtio_blk_flush();
blockdriver_mt_set_workers(0, 1);
}
/* If supposed to terminate and fully closed, do it! */
if (terminating && open_count == 0)
virtio_blk_terminate();
return OK;
}
static int
prepare_bufs(struct vumap_vir *vir, struct vumap_phys *phys, int cnt, int w)
{
for (int i = 0; i < cnt ; i++) {
/* So you gave us a byte aligned buffer? Good job! */
if (phys[i].vp_addr & 1) {
dprintf(("byte aligned %08lx", phys[i].vp_addr));
return EINVAL;
}
/* Check if the buffer is good */
if (phys[i].vp_size != vir[i].vv_size) {
dprintf(("Non-contig buf %08lx", phys[i].vp_addr));
return EINVAL;
}
/* If write, the buffers only need to be read */
phys[i].vp_addr |= !w;
}
return OK;
}
static int
prepare_vir_vec(endpoint_t endpt, struct vumap_vir *vir, iovec_s_t *iv,
int cnt, vir_bytes *size)
{
/* This is pretty much the same as sum_iovec from AHCI,
* except that we don't support any iovecs where the size
* is not a multiple of 512
*/
vir_bytes s, total = 0;
for (int i = 0; i < cnt; i++) {
s = iv[i].iov_size;
if (s == 0 || (s % VIRTIO_BLK_BLOCK_SIZE) || s > LONG_MAX) {
dprintf(("bad iv[%d].iov_size (%lu) from %d", i, s,
endpt));
return EINVAL;
}
total += s;
if (total > LONG_MAX) {
dprintf(("total overflow from %d", endpt));
return EINVAL;
}
if (endpt == SELF)
vir[i].vv_addr = (vir_bytes)iv[i].iov_grant;
else
vir[i].vv_grant = iv[i].iov_grant;
vir[i].vv_size = iv[i].iov_size;
}
*size = total;
return OK;
}
static ssize_t
virtio_blk_transfer(devminor_t minor, int write, u64_t position,
endpoint_t endpt, iovec_t *iovec, unsigned int cnt,
int flags)
{
/* Need to translate vir to phys */
struct vumap_vir vir[NR_IOREQS];
/* Physical addresses of buffers, including header and trailer */
struct vumap_phys phys[NR_IOREQS + 2];
/* Which thread is doing the transfer? */
thread_id_t tid = blockdriver_mt_get_tid();
vir_bytes size = 0;
vir_bytes size_tmp = 0;
struct device *dv;
u64_t sector;
u64_t end_part;
int r, pcnt = sizeof(phys) / sizeof(phys[0]);
iovec_s_t *iv = (iovec_s_t *)iovec;
int access = write ? VUA_READ : VUA_WRITE;
/* Make sure we don't touch this one anymore */
iovec = NULL;
if (cnt > NR_IOREQS)
return EINVAL;
/* position greater than capacity? */
if (position >= blk_config.capacity * VIRTIO_BLK_BLOCK_SIZE)
return 0;
dv = virtio_blk_part(minor);
/* Does device exist? */
if (!dv)
return ENXIO;
position += dv->dv_base;
end_part = dv->dv_base + dv->dv_size;
/* Hmmm, AHCI tries to fix this up, but lets just say everything
* needs to be sector (512 byte) aligned...
*/
if (position % VIRTIO_BLK_BLOCK_SIZE) {
dprintf(("Non sector-aligned access %016llx", position));
return EINVAL;
}
sector = position / VIRTIO_BLK_BLOCK_SIZE;
r = prepare_vir_vec(endpt, vir, iv, cnt, &size);
if (r != OK)
return r;
if (position >= end_part)
return 0;
/* Truncate if the partition is smaller than that */
if (position + size > end_part - 1) {
size = end_part - position;
/* Fix up later */
size_tmp = 0;
cnt = 0;
} else {
/* Use all buffers */
size_tmp = size;
}
/* Fix up the number of vectors if size was truncated */
while (size_tmp < size)
size_tmp += vir[cnt++].vv_size;
/* If the last vector was too big, just truncate it */
if (size_tmp > size) {
vir[cnt - 1].vv_size = vir[cnt -1].vv_size - (size_tmp - size);
size_tmp -= (size_tmp - size);
}
if (size % VIRTIO_BLK_BLOCK_SIZE) {
dprintf(("non-sector sized read (%lu) from %d", size, endpt));
return EINVAL;
}
/* Map vir to phys */
if ((r = sys_vumap(endpt, vir, cnt, 0, access,
&phys[1], &pcnt)) != OK) {
dprintf(("Unable to map memory from %d (%d)", endpt, r));
return r;
}
/* Prepare the header */
memset(&hdrs_vir[tid], 0, sizeof(hdrs_vir[0]));
if (write)
hdrs_vir[tid].type = VIRTIO_BLK_T_OUT;
else
hdrs_vir[tid].type = VIRTIO_BLK_T_IN;
hdrs_vir[tid].ioprio = 0;
hdrs_vir[tid].sector = sector;
/* First the header */
phys[0].vp_addr = hdrs_phys + tid * sizeof(hdrs_vir[0]);
phys[0].vp_size = sizeof(hdrs_vir[0]);
/* Put the physical buffers into phys */
if ((r = prepare_bufs(vir, &phys[1], pcnt, write)) != OK)
return r;
/* Put the status at the end */
phys[pcnt + 1].vp_addr = status_phys + tid * sizeof(status_vir[0]);
phys[pcnt + 1].vp_size = sizeof(u8_t);
/* Status always needs write access */
phys[1 + pcnt].vp_addr |= 1;
/* Send addresses to queue */
virtio_to_queue(blk_dev, 0, phys, 2 + pcnt, &tid);
/* Wait for completion */
blockdriver_mt_sleep();
/* All was good */
if (mystatus(tid) == VIRTIO_BLK_S_OK)
return size;
/* Error path */
dprintf(("ERROR status=%02x sector=%llu len=%lx cnt=%d op=%s t=%d",
mystatus(tid), sector, size, pcnt,
write ? "write" : "read", tid));
return virtio_blk_status2error(mystatus(tid));
}
static int
virtio_blk_ioctl(devminor_t minor, unsigned long req, endpoint_t endpt,
cp_grant_id_t grant, endpoint_t UNUSED(user_endpt))
{
switch (req) {
case DIOCOPENCT:
return sys_safecopyto(endpt, grant, 0,
(vir_bytes) &open_count, sizeof(open_count));
case DIOCFLUSH:
return virtio_blk_flush();
}
return ENOTTY;
}
static struct device *
virtio_blk_part(devminor_t minor)
{
/* There's only a single drive attached to this device, alyways.
* Lets take some shortcuts...
*/
/* Take care of d0 d0p0 ... */
if (minor >= 0 && minor < DEV_PER_DRIVE)
return &part[minor];
/* subparts start at MINOR_d0p0s0 */
if (minor >= MINOR_d0p0s0) {
minor -= MINOR_d0p0s0;
/* Only for the first disk */
if (minor >= SUB_PER_DRIVE)
return NULL;
return &subpart[minor];
}
return NULL;
}
static void
virtio_blk_geometry(devminor_t minor, struct part_geom *entry)
{
/* Only for the drive */
if (minor != 0)
return;
/* Only if the host supports it */
if(!virtio_host_supports(blk_dev, VIRTIO_BLK_F_GEOMETRY))
return;
entry->cylinders = blk_config.geometry.cylinders;
entry->heads = blk_config.geometry.heads;
entry->sectors = blk_config.geometry.sectors;
}
static void
virtio_blk_device_intr(void)
{
thread_id_t *tid;
/* Multiple requests might have finished */
while (!virtio_from_queue(blk_dev, 0, (void**)&tid))
blockdriver_mt_wakeup(*tid);
}
static void
virtio_blk_spurious_intr(void)
{
/* Output a single message about spurious interrupts */
if (spurious_interrupt)
return;
dprintf(("Got spurious interrupt"));
spurious_interrupt = 1;
}
static void
virtio_blk_intr(unsigned int irqs)
{
if (virtio_had_irq(blk_dev))
virtio_blk_device_intr();
else
virtio_blk_spurious_intr();
virtio_irq_enable(blk_dev);
}
static int
virtio_blk_device(devminor_t minor, device_id_t *id)
{
struct device *dev = virtio_blk_part(minor);
/* Check if this device exists */
if (!dev)
return ENXIO;
*id = 0;
return OK;
}
static int
virtio_blk_flush(void)
{
struct vumap_phys phys[2];
size_t phys_cnt = sizeof(phys) / sizeof(phys[0]);
/* Which thread is doing this request? */
thread_id_t tid = blockdriver_mt_get_tid();
/* Host may not support flushing */
if (!virtio_host_supports(blk_dev, VIRTIO_BLK_F_FLUSH))
return EOPNOTSUPP;
/* Prepare the header */
memset(&hdrs_vir[tid], 0, sizeof(hdrs_vir[0]));
hdrs_vir[tid].type = VIRTIO_BLK_T_FLUSH;
/* Let this be a barrier if the host supports it */
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_BARRIER))
hdrs_vir[tid].type |= VIRTIO_BLK_T_BARRIER;
/* Header and status for the queue */
phys[0].vp_addr = hdrs_phys + tid * sizeof(hdrs_vir[0]);
phys[0].vp_size = sizeof(hdrs_vir[0]);
phys[1].vp_addr = status_phys + tid * sizeof(status_vir[0]);
phys[1].vp_size = 1;
/* Status always needs write access */
phys[1].vp_addr |= 1;
/* Send flush request to queue */
virtio_to_queue(blk_dev, 0, phys, phys_cnt, &tid);
blockdriver_mt_sleep();
/* All was good */
if (mystatus(tid) == VIRTIO_BLK_S_OK)
return OK;
/* Error path */
dprintf(("ERROR status=%02x op=flush t=%d", mystatus(tid), tid));
return virtio_blk_status2error(mystatus(tid));
}
static void
virtio_blk_terminate(void)
{
/* Don't terminate if still opened */
if (open_count > 0)
return;
blockdriver_mt_terminate();
}
static void
virtio_blk_cleanup(void)
{
/* Just free the memory we allocated */
virtio_blk_free_requests();
virtio_reset_device(blk_dev);
virtio_free_queues(blk_dev);
virtio_free_device(blk_dev);
blk_dev = NULL;
}
static int
virtio_blk_status2error(u8_t status)
{
/* Convert a status from the host to an error */
switch (status) {
case VIRTIO_BLK_S_IOERR:
return EIO;
case VIRTIO_BLK_S_UNSUPP:
return ENOTSUP;
default:
panic("%s: unknown status: %02x", name, status);
}
/* Never reached */
return OK;
}
static int
virtio_blk_alloc_requests(void)
{
/* Allocate memory for request headers and status field */
hdrs_vir = alloc_contig(VIRTIO_BLK_NUM_THREADS * sizeof(hdrs_vir[0]),
AC_ALIGN4K, &hdrs_phys);
if (!hdrs_vir)
return ENOMEM;
status_vir = alloc_contig(VIRTIO_BLK_NUM_THREADS * sizeof(status_vir[0]),
AC_ALIGN4K, &status_phys);
if (!status_vir) {
free_contig(hdrs_vir, VIRTIO_BLK_NUM_THREADS * sizeof(hdrs_vir[0]));
return ENOMEM;
}
return OK;
}
static void
virtio_blk_free_requests(void)
{
free_contig(hdrs_vir, VIRTIO_BLK_NUM_THREADS * sizeof(hdrs_vir[0]));
free_contig(status_vir, VIRTIO_BLK_NUM_THREADS * sizeof(status_vir[0]));
}
static int
virtio_blk_feature_setup(void)
{
/* Feature setup for virtio-blk
*
* FIXME: Besides the geometry, everything is just debug output
* FIXME2: magic numbers
*/
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_SEG_MAX)) {
blk_config.seg_max = virtio_sread32(blk_dev, 12);
dprintf(("Seg Max: %d", blk_config.seg_max));
}
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_GEOMETRY)) {
blk_config.geometry.cylinders = virtio_sread16(blk_dev, 16);
blk_config.geometry.heads = virtio_sread8(blk_dev, 18);
blk_config.geometry.sectors = virtio_sread8(blk_dev, 19);
dprintf(("Geometry: cyl=%d heads=%d sectors=%d",
blk_config.geometry.cylinders,
blk_config.geometry.heads,
blk_config.geometry.sectors));
}
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_SIZE_MAX))
dprintf(("Has size max"));
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_FLUSH))
dprintf(("Supports flushing"));
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_BLK_SIZE)) {
blk_config.blk_size = virtio_sread32(blk_dev, 20);
dprintf(("Block Size: %d", blk_config.blk_size));
}
if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_BARRIER))
dprintf(("Supports barrier"));
return 0;
}
static int
virtio_blk_config(void)
{
u32_t sectors_low, sectors_high, size_mbs;
/* capacity is always there */
sectors_low = virtio_sread32(blk_dev, 0);
sectors_high = virtio_sread32(blk_dev, 4);
blk_config.capacity = ((u64_t)sectors_high << 32) | sectors_low;
/* If this gets truncated, you have a big disk... */
size_mbs = (u32_t)(blk_config.capacity * 512 / 1024 / 1024);
dprintf(("Capacity: %d MB", size_mbs));
/* do feature setup */
virtio_blk_feature_setup();
return 0;
}
static int
virtio_blk_probe(int skip)
{
int r;
/* sub device id for virtio-blk is 0x0002 */
blk_dev = virtio_setup_device(0x0002, name, blkf,
sizeof(blkf) / sizeof(blkf[0]),
VIRTIO_BLK_NUM_THREADS, skip);
if (!blk_dev)
return ENXIO;
/* virtio-blk has one queue only */
if ((r = virtio_alloc_queues(blk_dev, 1)) != OK) {
virtio_free_device(blk_dev);
return r;
}
/* Allocate memory for headers and status */
if ((r = virtio_blk_alloc_requests() != OK)) {
virtio_free_queues(blk_dev);
virtio_free_device(blk_dev);
return r;
}
virtio_blk_config();
/* Let the host now that we are ready */
virtio_device_ready(blk_dev);
virtio_irq_enable(blk_dev);
return OK;
}
static int
sef_cb_init_fresh(int type, sef_init_info_t *info)
{
long instance = 0;
int r;
env_parse("instance", "d", 0, &instance, 0, 255);
if ((r = virtio_blk_probe((int)instance)) == OK) {
blockdriver_announce(type);
return OK;
}
/* Error path */
if (r == ENXIO)
panic("%s: No device found", name);
if (r == ENOMEM)
panic("%s: Not enough memory", name);
panic("%s: Unexpected failure (%d)", name, r);
}
static void
sef_cb_signal_handler(int signo)
{
/* Ignore all signals but SIGTERM */
if (signo != SIGTERM)
return;
terminating = 1;
virtio_blk_terminate();
/* If we get a signal when completely closed, call
* exit(). We only leave the blockdriver_mt_task()
* loop after completing a request which is not the
* case for signals.
*/
if (open_count == 0)
exit(0);
}
static void
sef_local_startup(void)
{
sef_setcb_init_fresh(sef_cb_init_fresh);
sef_setcb_init_lu(sef_cb_init_fresh);
sef_setcb_signal_handler(sef_cb_signal_handler);
sef_startup();
}
int
main(int argc, char **argv)
{
env_setargs(argc, argv);
sef_local_startup();
blockdriver_mt_task(&virtio_blk_dtab);
dprintf(("Terminating"));
virtio_blk_cleanup();
return OK;
}