virtio: generic virtio library

This commit is contained in:
Arne Welzel 2012-12-15 13:47:37 +01:00 committed by Ben Gras
parent dcff984d6a
commit a58c735738
6 changed files with 1235 additions and 2 deletions

View file

@ -20,6 +20,7 @@ INCS+= acpi.h audio_fw.h bitmap.h \
syslib.h sysutil.h termios.h timers.h type.h \
tty.h u64.h usb.h usb_ch9.h vbox.h \
vboxfs.h vboxif.h vboxtype.h vm.h \
vfsif.h vtreefs.h libminixfs.h netsock.h
vfsif.h vtreefs.h libminixfs.h netsock.h \
virtio.h
.include <bsd.kinc.mk>

139
include/minix/virtio.h Normal file
View file

@ -0,0 +1,139 @@
/*
* Generic virtio library for MINIX 3
*
* Copyright (c) 2013, A. Welzel, <arne.welzel@gmail.com>
*
* This software is released under the BSD license. See the LICENSE file
* included in the main directory of this source distribution for the
* license terms and conditions.
*/
#ifndef _MINIX_VIRTIO_H
#define _MINIX_VIRTIO_H 1
#include <sys/types.h>
#define VIRTIO_VENDOR_ID 0x1AF4
#define VIRTIO_HOST_F_OFF 0x0000
#define VIRTIO_GUEST_F_OFF 0x0004
#define VIRTIO_QADDR_OFF 0x0008
#define VIRTIO_QSIZE_OFF 0x000C
#define VIRTIO_QSEL_OFF 0x000E
#define VIRTIO_QNOTFIY_OFF 0x0010
#define VIRTIO_DEV_STATUS_OFF 0x0012
#define VIRTIO_ISR_STATUS_OFF 0x0013
#define VIRTIO_DEV_SPECIFIC_OFF 0x0014
/* if msi is enabled, device specific headers shift by 4 */
#define VIRTIO_MSI_ADD_OFF 0x0004
#define VIRTIO_STATUS_ACK 0x01
#define VIRTIO_STATUS_DRV 0x02
#define VIRTIO_STATUS_DRV_OK 0x04
#define VIRTIO_STATUS_FAIL 0x80
/* Feature description */
struct virtio_feature {
const char *name;
u8_t bit;
u8_t host_support;
u8_t guest_support;
};
/* Forward declaration of struct virtio_device.
*
* This structure is opaque to the caller.
*/
struct virtio_device;
/* Find a virtio device with subdevice id subdevid. Returns a pointer
* to an opaque virtio_device instance.
*/
struct virtio_device *virtio_setup_device(u16_t subdevid,
const char *name,
struct virtio_feature *features,
int feature_count,
int threads, int skip);
/* Attempt to allocate queue_cnt memory for queues */
int virtio_alloc_queues(struct virtio_device *dev, int num_queues);
/* Register the IRQ policy and indicate to the host we are ready to go */
void virtio_device_ready(struct virtio_device *dev);
/* Unregister the IRQ and reset the device */
void virtio_reset_device(struct virtio_device *dev);
/* Free the memory used by all queues */
void virtio_free_queues(struct virtio_device *dev);
/* Free all memory allocated for the device (except the queue memory,
* which has to be freed before with virtio_free_queues()).
*
* Don't touch the device afterwards! This is like free(dev).
*/
void virtio_free_device(struct virtio_device *dev);
/* Feature helpers */
int virtio_guest_supports(struct virtio_device *dev, int bit);
int virtio_host_supports(struct virtio_device *dev, int bit);
/*
* Use num vumap_phys elements and chain these as vring_desc elements
* into the vring.
*
* Kick the queue if needed.
*
* data is opaque and returned by virtio_from_queue() when the host
* processed the descriptor chain.
*
* Note: The last bit of vp_addr is used to flag whether an iovec is
* writable. This implies that only word aligned buffers can be
* used.
*/
int virtio_to_queue(struct virtio_device *dev, int qidx,
struct vumap_phys *bufs, size_t num, void *data);
/*
* If the host used a chain of descriptors, return 0 and set data
* as was given to virtio_to_queue(). If the host has not processed
* any element returns -1.
*/
int virtio_from_queue(struct virtio_device *dev, int qidx, void **data);
/* IRQ related functions */
void virtio_irq_enable(struct virtio_device *dev);
void virtio_irq_disable(struct virtio_device *dev);
/* Checks the ISR field of the device and returns true if
* the interrupt was for this device.
*/
int virtio_had_irq(struct virtio_device *dev);
u32_t virtio_read32(struct virtio_device *dev, off_t offset);
u16_t virtio_read16(struct virtio_device *dev, off_t offset);
u8_t virtio_read8(struct virtio_device *dev, off_t offset);
void virtio_write32(struct virtio_device *dev, off_t offset, u32_t val);
void virtio_write16(struct virtio_device *dev, off_t offset, u16_t val);
void virtio_write8(struct virtio_device *dev, off_t offset, u8_t val);
/*
* Device specific reads take MSI offset into account and all reads
* are at offset 20.
*
* Something like:
* read(off) --> readX(20 + (msi ? 4 : 0) + off)
*/
u32_t virtio_sread32(struct virtio_device *dev, off_t offset);
u16_t virtio_sread16(struct virtio_device *dev, off_t offset);
u8_t virtio_sread8(struct virtio_device *dev, off_t offset);
void virtio_swrite32(struct virtio_device *dev, off_t offset, u32_t val);
void virtio_swrite16(struct virtio_device *dev, off_t offset, u16_t val);
void virtio_swrite8(struct virtio_device *dev, off_t offset, u8_t val);
#endif /* _MINIX_VIRTIO_H */

View file

@ -46,7 +46,7 @@ SUBDIR+= libcompat_minix libblockdriver libchardriver \
libnetsock libpuffs libsffs
.if (${MACHINE} == "i386")
SUBDIR += libvassert libhgfs libvboxfs
SUBDIR += libvassert libhgfs libvboxfs libvirtio
.endif
.if (${MKRUMP} != "no")

8
lib/libvirtio/Makefile Normal file
View file

@ -0,0 +1,8 @@
# Makefile for libvirtio
.include <bsd.own.mk>
LIB= virtio
SRCS= virtio.c
.include <bsd.lib.mk>

902
lib/libvirtio/virtio.c Normal file
View file

@ -0,0 +1,902 @@
/*
* Generic virtio library for MINIX 3
*
* Copyright (c) 2013, A. Welzel, <arne.welzel@gmail.com>
*
* This software is released under the BSD license. See the LICENSE file
* included in the main directory of this source distribution for the
* license terms and conditions.
*/
#define _SYSTEM 1
#include <assert.h>
#include <errno.h> /* for OK... */
#include <string.h> /* memset() */
#include <stdlib.h> /* malloc() */
#include <machine/pci.h> /* PCI_ILR, PCI_BAR... */
#include <machine/vmparam.h> /* PAGE_SIZE */
#include <minix/syslib.h> /* umap, vumap, alloc_..*/
#include <minix/sysutil.h> /* panic(), at least */
#include <minix/virtio.h> /* virtio system include */
#include "virtio_ring.h" /* virtio types / helper */
/*
* About indirect descriptors:
*
* For each possible thread, a single indirect descriptor table is allocated.
* If using direct descriptors would lead to the situation that another thread
* might not be able to add another descriptor to the ring, indirect descriptors
* are used.
*
* Indirect descriptors are pre-allocated. Each alloc_contig() call involves a
* kernel call which is critical for performance.
*
* The size of indirect descriptor tables is chosen based on MAPVEC_NR. A driver
* using this library should never add more than
*
* MAPVEC_NR + MAPVEC_NR / 2
*
* descriptors to a queue as this represent the maximum size of an indirect
* descriptor table.
*/
struct indirect_desc_table {
int in_use;
struct vring_desc *descs;
phys_bytes paddr;
size_t len;
};
struct virtio_queue {
void *vaddr; /* virtual addr of ring */
phys_bytes paddr; /* physical addr of ring */
u32_t page; /* physical guest page */
u16_t num; /* number of descriptors */
u32_t ring_size; /* size of ring in bytes */
struct vring vring;
u16_t free_num; /* free descriptors */
u16_t free_head; /* next free descriptor */
u16_t free_tail; /* last free descriptor */
u16_t last_used; /* we checked in used */
void **data; /* points to pointers */
};
struct virtio_device {
const char *name; /* for debugging */
u16_t port; /* io port */
struct virtio_feature *features; /* host / guest features */
u8_t num_features; /* max 32 */
struct virtio_queue *queues; /* our queues */
u16_t num_queues;
int irq; /* interrupt line */
int irq_hook; /* hook id */
int msi; /* is MSI enabled? */
int threads; /* max number of threads */
struct indirect_desc_table *indirect; /* indirect descriptor tables */
int num_indirect;
};
static int is_matching_device(u16_t expected_sdid, u16_t vid, u16_t sdid);
static int init_device(int devind, struct virtio_device *dev);
static int init_phys_queues(struct virtio_device *dev);
static int exchange_features(struct virtio_device *dev);
static int alloc_phys_queue(struct virtio_queue *q);
static void free_phys_queue(struct virtio_queue *q);
static void init_phys_queue(struct virtio_queue *q);
static int init_indirect_desc_table(struct indirect_desc_table *desc);
static int init_indirect_desc_tables(struct virtio_device *dev);
static void virtio_irq_register(struct virtio_device *dev);
static void virtio_irq_unregister(struct virtio_device *dev);
static int wants_kick(struct virtio_queue *q);
static void kick_queue(struct virtio_device *dev, int qidx);
struct virtio_device *
virtio_setup_device(u16_t subdevid, const char *name,
struct virtio_feature *features, int num_features,
int threads, int skip)
{
int r, devind;
u16_t vid, did, sdid;
struct virtio_device *ret;
/* bogus values? */
if (skip < 0 || name == NULL || num_features < 0 || threads <= 0)
return NULL;
pci_init();
r = pci_first_dev(&devind, &vid, &did);
while (r > 0) {
sdid = pci_attr_r16(devind, PCI_SUBDID);
if (is_matching_device(subdevid, vid, sdid)) {
/* this is the device we are looking for */
if (skip == 0)
break;
skip--;
}
r = pci_next_dev(&devind, &vid, &did);
}
/* pci_[first|next_dev()] return 0 if no device was found */
if (r == 0 || skip > 0)
return NULL;
/* allocate and set known info about the device */
ret = malloc(sizeof(*ret));
if (ret == NULL)
return NULL;
/* Prepare virtio_device intance */
memset(ret, 0, sizeof(*ret));
ret->name = name;
ret->features = features;
ret->num_features = num_features;
ret->threads = threads;
/* see comment in the beginning of this file */
ret->num_indirect = threads;
if (init_device(devind, ret) != OK) {
printf("%s: Could not initialize device\n", ret->name);
goto err;
}
/* Ack the device */
virtio_write8(ret, VIRTIO_DEV_STATUS_OFF, VIRTIO_STATUS_ACK);
if (exchange_features(ret) != OK) {
printf("%s: Could not exchange features\n", ret->name);
goto err;
}
if (init_indirect_desc_tables(ret) != OK) {
printf("%s: Could not initialize indirect tables\n", ret->name);
goto err;
}
/* We know how to drive the device... */
virtio_write8(ret, VIRTIO_DEV_STATUS_OFF, VIRTIO_STATUS_DRV);
return ret;
/* Error path */
err:
free(ret);
return NULL;
}
static int
init_device(int devind, struct virtio_device *dev)
{
u32_t base, size;
int iof, r;
pci_reserve(devind);
if ((r = pci_get_bar(devind, PCI_BAR, &base, &size, &iof)) != OK) {
printf("%s: Could not get BAR (%d)", dev->name, r);
return r;
}
if (!iof) {
printf("%s: PCI not IO space?", dev->name);
return EINVAL;
}
if (base & 0xFFFF0000) {
printf("%s: IO port weird (%08x)", dev->name, base);
return EINVAL;
}
/* store the I/O port */
dev->port = base;
/* Reset the device */
virtio_write8(dev, VIRTIO_DEV_STATUS_OFF, 0);
/* Read IRQ line */
dev->irq = pci_attr_r8(devind, PCI_ILR);
return OK;
}
static int
exchange_features(struct virtio_device *dev)
{
u32_t guest_features = 0, host_features = 0;
struct virtio_feature *f;
host_features = virtio_read32(dev, VIRTIO_HOST_F_OFF);
for (int i = 0; i < dev->num_features; i++) {
f = &dev->features[i];
/* prepare the features the driver supports */
guest_features |= (f->guest_support << f->bit);
/* just load the host feature int the struct */
f->host_support = ((host_features >> f->bit) & 1);
}
/* let the device know about our features */
virtio_write32(dev, VIRTIO_GUEST_F_OFF, guest_features);
return OK;
}
int
virtio_alloc_queues(struct virtio_device *dev, int num_queues)
{
int r = OK;
assert(dev != NULL);
/* Assume there's no device with more than 256 queues */
if (num_queues < 0 || num_queues > 256)
return EINVAL;
dev->num_queues = num_queues;
/* allocate queue memory */
dev->queues = malloc(num_queues * sizeof(dev->queues[0]));
if (dev->queues == NULL)
return ENOMEM;
memset(dev->queues, 0, num_queues * sizeof(dev->queues[0]));
if ((r = init_phys_queues(dev) != OK)) {
printf("%s: Could not initialize queues (%d)\n", dev->name, r);
free(dev->queues);
dev->queues = NULL;
}
return r;
}
static int
init_phys_queues(struct virtio_device *dev)
{
/* Initialize all queues */
int i, j, r;
struct virtio_queue *q;
for (i = 0; i < dev->num_queues; i++) {
q = &dev->queues[i];
/* select the queue */
virtio_write16(dev, VIRTIO_QSEL_OFF, i);
q->num = virtio_read16(dev, VIRTIO_QSIZE_OFF);
if (q->num & (q->num - 1)) {
printf("%s: Queue %d num=%d not ^2", dev->name, i,
q->num);
r = EINVAL;
goto free_phys_queues;
}
if ((r = alloc_phys_queue(q)) != OK)
goto free_phys_queues;
init_phys_queue(q);
/* Let the host know about the guest physical page */
virtio_write32(dev, VIRTIO_QADDR_OFF, q->page);
}
return OK;
/* Error path */
free_phys_queues:
for (j = 0; j < i; j++)
free_phys_queue(&dev->queues[i]);
return r;
}
static int
alloc_phys_queue(struct virtio_queue *q)
{
assert(q != NULL);
/* How much memory do we need? */
q->ring_size = vring_size(q->num, PAGE_SIZE);
q->vaddr = alloc_contig(q->ring_size, AC_ALIGN4K, &q->paddr);
if (q->vaddr == NULL)
return ENOMEM;
q->data = alloc_contig(sizeof(q->data[0]) * q->num, AC_ALIGN4K, NULL);
if (q->data == NULL) {
free_contig(q->vaddr, q->ring_size);
q->vaddr = NULL;
q->paddr = 0;
return ENOMEM;
}
return OK;
}
void
virtio_device_ready(struct virtio_device *dev)
{
assert(dev != NULL);
/* Register IRQ line */
virtio_irq_register(dev);
/* Driver is ready to go! */
virtio_write8(dev, VIRTIO_DEV_STATUS_OFF, VIRTIO_STATUS_DRV_OK);
}
void
virtio_free_queues(struct virtio_device *dev)
{
int i;
assert(dev != NULL);
assert(dev->queues != NULL);
assert(dev->num_queues > 0);
for (i = 0; i < dev->num_queues; i++)
free_phys_queue(&dev->queues[i]);
dev->num_queues = 0;
dev->queues = NULL;
}
static void
free_phys_queue(struct virtio_queue *q)
{
assert(q != NULL);
assert(q->vaddr != NULL);
free_contig(q->vaddr, q->ring_size);
q->vaddr = NULL;
q->paddr = 0;
q->num = 0;
free_contig(q->data, sizeof(q->data[0]));
q->data = NULL;
}
static void
init_phys_queue(struct virtio_queue *q)
{
memset(q->vaddr, 0, q->ring_size);
memset(q->data, 0, sizeof(q->data[0]) * q->num);
/* physical page in guest */
q->page = q->paddr / PAGE_SIZE;
/* Set pointers in q->vring according to size */
vring_init(&q->vring, q->num, q->vaddr, PAGE_SIZE);
/* Everything's free at this point */
for (int i = 0; i < q->num; i++) {
q->vring.desc[i].flags = VRING_DESC_F_NEXT;
q->vring.desc[i].next = (i + 1) & (q->num - 1);
}
q->free_num = q->num;
q->free_head = 0;
q->free_tail = q->num - 1;
q->last_used = 0;
return;
}
void
virtio_free_device(struct virtio_device *dev)
{
int i;
struct indirect_desc_table *desc;
assert(dev != NULL);
assert(dev->num_indirect > 0);
for (i = 0; i < dev->num_indirect; i++) {
desc = &dev->indirect[i];
free_contig(desc->descs, desc->len);
}
dev->num_indirect = 0;
assert(dev->indirect != NULL);
free(dev->indirect);
dev->indirect = NULL;
free(dev);
}
static int
init_indirect_desc_table(struct indirect_desc_table *desc)
{
desc->in_use = 0;
desc->len = (MAPVEC_NR + MAPVEC_NR / 2) * sizeof(struct vring_desc);
desc->descs = alloc_contig(desc->len, AC_ALIGN4K, &desc->paddr);
memset(desc->descs, 0, desc->len);
if (desc->descs == NULL)
return ENOMEM;
return OK;
}
static int
init_indirect_desc_tables(struct virtio_device *dev)
{
int i, j, r;
struct indirect_desc_table *desc;
dev->indirect = malloc(dev->num_indirect * sizeof(dev->indirect[0]));
if (dev->indirect == NULL) {
printf("%s: Could not allocate indirect tables\n", dev->name);
return ENOMEM;
}
memset(dev->indirect, 0, dev->num_indirect* sizeof(dev->indirect[0]));
for (i = 0; i < dev->num_indirect; i++) {
desc = &dev->indirect[i];
if ((r = init_indirect_desc_table(desc)) != OK) {
/* error path */
for (j = 0; j < i; j++) {
desc = &dev->indirect[j];
free_contig(desc->descs, desc->len);
}
free(dev->indirect);
return r;
}
}
return OK;
}
static void
clear_indirect_table(struct virtio_device *dev, struct vring_desc *vd)
{
int i;
struct indirect_desc_table *desc;
assert(vd->len > 0);
assert(vd->flags & VRING_DESC_F_INDIRECT);
vd->flags = vd->flags & ~VRING_DESC_F_INDIRECT;
vd->len = 0;;
for (i = 0; i < dev->num_indirect; i++) {
desc = &dev->indirect[i];
if (desc->paddr == vd->addr) {
assert(desc->in_use);
desc->in_use = 0;
break;
}
}
if (i >= dev->num_indirect)
panic("%s: Could not clear indirect descriptor table ");
}
static void inline
use_vring_desc(struct vring_desc *vd, struct vumap_phys *vp)
{
vd->addr = vp->vp_addr & ~1UL;
vd->len = vp->vp_size;
vd->flags = VRING_DESC_F_NEXT;
if (vp->vp_addr & 1)
vd->flags |= VRING_DESC_F_WRITE;
}
static void
set_indirect_descriptors(struct virtio_device *dev, struct virtio_queue *q,
struct vumap_phys *bufs, size_t num)
{
/* Indirect descriptor tables are simply filled from left to right */
int i;
struct indirect_desc_table *desc;
struct vring *vring = &q->vring;
struct vring_desc *vd, *ivd;
/* Find the first unused indirect descriptor table */
for (i = 0; i < dev->num_indirect; i++) {
desc = &dev->indirect[i];
/* If an unused indirect descriptor table was found,
* mark it as being used and exit the loop.
*/
if (!desc->in_use) {
desc->in_use = 1;
break;
}
}
/* Sanity check */
if (i >= dev->num_indirect)
panic("%s: No indirect descriptor tables left");
/* For indirect descriptor tables, only a single descriptor from
* the main ring is used.
*/
vd = &vring->desc[q->free_head];
vd->flags = VRING_DESC_F_INDIRECT;
vd->addr = desc->paddr;
vd->len = num * sizeof(desc->descs[0]);
/* Initialize the descriptors in the indirect descriptor table */
for (i = 0; i < num; i++) {
ivd = &desc->descs[i];
use_vring_desc(ivd, &bufs[i]);
ivd->next = i + 1;
}
/* Unset the next bit of the last descriptor */
ivd->flags = ivd->flags & ~VRING_DESC_F_NEXT;
/* Update queue, only a single descriptor was used */
q->free_num -= 1;
q->free_head = vd->next;
}
static void
set_direct_descriptors(struct virtio_queue *q, struct vumap_phys *bufs,
size_t num)
{
u16_t i;
size_t count;
struct vring *vring = &q->vring;
struct vring_desc *vd;
for (i = q->free_head, count = 0; count < num; count++) {
/* The next free descriptor */
vd = &vring->desc[i];
/* The descriptor is linked in the free list, so
* it always has the next bit set.
*/
assert(vd->flags & VRING_DESC_F_NEXT);
use_vring_desc(vd, &bufs[count]);
i = vd->next;
}
/* Unset the next bit of the last descriptor */
vd->flags = vd->flags & ~VRING_DESC_F_NEXT;
/* Update queue */
q->free_num -= num;
q->free_head = i;
}
int
virtio_to_queue(struct virtio_device *dev, int qidx, struct vumap_phys *bufs,
size_t num, void *data)
{
u16_t free_first;
int left;
struct virtio_queue *q = &dev->queues[qidx];
struct vring *vring = &q->vring;
assert(0 <= qidx && qidx <= dev->num_queues);
if (!data)
panic("%s: NULL data received queue %d", dev->name, qidx);
free_first = q->free_head;
left = (int)q->free_num - (int)num;
if (left < dev->threads)
set_indirect_descriptors(dev, q, bufs, num);
else
set_direct_descriptors(q, bufs, num);
/* Next index for host is old free_head */
vring->avail->ring[vring->avail->idx % q->num] = free_first;
/* Provided by the caller to identify this slot */
q->data[free_first] = data;
/* Make sure the host sees the new descriptors */
__insn_barrier();
/* advance last idx */
vring->avail->idx += 1;
/* Make sure the host sees the avail->idx */
__insn_barrier();
/* kick it! */
kick_queue(dev, qidx);
return 0;
}
int
virtio_from_queue(struct virtio_device *dev, int qidx, void **data)
{
struct virtio_queue *q;
struct vring *vring;
struct vring_used_elem *uel;
struct vring_desc *vd;
int count = 0;
u16_t idx;
u16_t used_idx;
assert(0 <= qidx && qidx < dev->num_queues);
q = &dev->queues[qidx];
vring = &q->vring;
/* Make sure we see changes done by the host */
__insn_barrier();
/* The index from the host */
used_idx = vring->used->idx % q->num;
/* We already saw this one, nothing to do here */
if (q->last_used == used_idx)
return -1;
/* Get the vring_used element */
uel = &q->vring.used->ring[q->last_used];
/* Update the last used element */
q->last_used = (q->last_used + 1) % q->num;
/* index of the used element */
idx = uel->id % q->num;
assert(q->data[idx] != NULL);
/* Get the descriptor */
vd = &vring->desc[idx];
/* Unconditionally set the tail->next to the first used one */
assert(vring->desc[q->free_tail].flags & VRING_DESC_F_NEXT);
vring->desc[q->free_tail].next = idx;
/* Find the last index, eventually there has to be one
* without a the next flag.
*
* FIXME: Protect from endless loop
*/
while (vd->flags & VRING_DESC_F_NEXT) {
if (vd->flags & VRING_DESC_F_INDIRECT)
clear_indirect_table(dev, vd);
idx = vd->next;
vd = &vring->desc[idx];
count++;
}
/* Didn't count the last one */
count++;
if (vd->flags & VRING_DESC_F_INDIRECT)
clear_indirect_table(dev, vd);
/* idx points to the tail now, update the queue */
q->free_tail = idx;
assert(!(vd->flags & VRING_DESC_F_NEXT));
/* We can always connect the tail with the head */
vring->desc[q->free_tail].next = q->free_head;
vring->desc[q->free_tail].flags = VRING_DESC_F_NEXT;
q->free_num += count;
assert(q->free_num <= q->num);
*data = q->data[uel->id];
q->data[uel->id] = NULL;
return 0;
}
int
virtio_had_irq(struct virtio_device *dev)
{
return virtio_read8(dev, VIRTIO_ISR_STATUS_OFF) & 1;
}
void
virtio_reset_device(struct virtio_device *dev)
{
virtio_irq_unregister(dev);
virtio_write8(dev, VIRTIO_DEV_STATUS_OFF, 0);
}
void
virtio_irq_enable(struct virtio_device *dev)
{
int r;
if ((r = sys_irqenable(&dev->irq_hook) != OK))
panic("%s Unable to enable IRQ %d", dev->name, r);
}
void
virtio_irq_disable(struct virtio_device *dev)
{
int r;
if ((r = sys_irqdisable(&dev->irq_hook) != OK))
panic("%s: Unable to disable IRQ %d", dev->name, r);
}
static int
wants_kick(struct virtio_queue *q)
{
assert(q != NULL);
return !(q->vring.used->flags & VRING_USED_F_NO_NOTIFY);
}
static void
kick_queue(struct virtio_device *dev, int qidx)
{
assert(0 <= qidx && qidx < dev->num_queues);
if (wants_kick(&dev->queues[qidx]))
virtio_write16(dev, VIRTIO_QNOTFIY_OFF, qidx);
return;
}
static int
is_matching_device(u16_t expected_sdid, u16_t vid, u16_t sdid)
{
return vid == VIRTIO_VENDOR_ID && sdid == expected_sdid;
}
static void
virtio_irq_register(struct virtio_device *dev)
{
int r;
if ((r = sys_irqsetpolicy(dev->irq, 0, &dev->irq_hook) != OK))
panic("%s: Unable to register IRQ %d", dev->name, r);
}
static void
virtio_irq_unregister(struct virtio_device *dev)
{
int r;
if ((r = sys_irqrmpolicy(&dev->irq_hook) != OK))
panic("%s: Unable to unregister IRQ %d", dev->name, r);
}
static int
_supports(struct virtio_device *dev, int bit, int host)
{
for (int i = 0; i < dev->num_features; i++) {
struct virtio_feature *f = &dev->features[i];
if (f->bit == bit)
return host ? f->host_support : f->guest_support;
}
panic("%s: Feature not found bit=%d", dev->name, bit);
}
int
virtio_host_supports(struct virtio_device *dev, int bit)
{
return _supports(dev, bit, 1);
}
int
virtio_guest_supports(struct virtio_device *dev, int bit)
{
return _supports(dev, bit, 0);
}
/* Just some wrappers around sys_read */
#define VIRTIO_READ_XX(xx, suff) \
u##xx##_t \
virtio_read##xx(struct virtio_device *dev, off_t off) \
{ \
int r; \
u32_t ret; \
if ((r = sys_in##suff(dev->port + off, &ret)) != OK) \
panic("%s: Read failed %d %d r=%d", dev->name, \
dev->port, \
off, \
r); \
\
return ret; \
}
VIRTIO_READ_XX(32, l)
VIRTIO_READ_XX(16, w)
VIRTIO_READ_XX(8, b)
/* Just some wrappers around sys_write */
#define VIRTIO_WRITE_XX(xx, suff) \
void \
virtio_write##xx(struct virtio_device *dev, off_t off, u##xx##_t val) \
{ \
int r; \
if ((r = sys_out##suff(dev->port + off, val)) != OK) \
panic("%s: Write failed %d %d r=%d", dev->name, \
dev->port, \
off, \
r); \
}
VIRTIO_WRITE_XX(32, l)
VIRTIO_WRITE_XX(16, w)
VIRTIO_WRITE_XX(8, b)
/* Just some wrappers around sys_read */
#define VIRTIO_SREAD_XX(xx, suff) \
u##xx##_t \
virtio_sread##xx(struct virtio_device *dev, off_t off) \
{ \
int r; \
u32_t ret; \
off += VIRTIO_DEV_SPECIFIC_OFF; \
\
if (dev->msi) \
off += VIRTIO_MSI_ADD_OFF; \
\
if ((r = sys_in##suff(dev->port + off, &ret)) != OK) \
panic("%s: Read failed %d %d r=%d", dev->name, \
dev->port, \
off, \
r); \
\
return ret; \
}
VIRTIO_SREAD_XX(32, l)
VIRTIO_SREAD_XX(16, w)
VIRTIO_SREAD_XX(8, b)
/* Just some wrappers around sys_write */
#define VIRTIO_SWRITE_XX(xx, suff) \
void \
virtio_swrite##xx(struct virtio_device *dev, off_t off, u##xx##_t val) \
{ \
int r; \
off += VIRTIO_DEV_SPECIFIC_OFF; \
\
if (dev->msi) \
off += VIRTIO_MSI_ADD_OFF; \
\
if ((r = sys_out##suff(dev->port + off, val)) != OK) \
panic("%s: Write failed %d %d r=%d", dev->name, \
dev->port, \
off, \
r); \
}
VIRTIO_SWRITE_XX(32, l)
VIRTIO_SWRITE_XX(16, w)
VIRTIO_SWRITE_XX(8, b)

183
lib/libvirtio/virtio_ring.h Normal file
View file

@ -0,0 +1,183 @@
#ifndef _LINUX_VIRTIO_RING_H
#define _LINUX_VIRTIO_RING_H
/* An interface for efficient virtio implementation, currently for use by KVM
* and lguest, but hopefully others soon. Do NOT change this since it will
* break existing servers and clients.
*
* This header is BSD licensed so anyone can use the definitions to implement
* compatible drivers/servers.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Copyright Rusty Russell IBM Corporation 2007. */
/* This marks a buffer as continuing via the next field. */
#define VRING_DESC_F_NEXT 1
/* This marks a buffer as write-only (otherwise read-only). */
#define VRING_DESC_F_WRITE 2
/* This means the buffer contains a list of buffer descriptors. */
#define VRING_DESC_F_INDIRECT 4
/* The Host uses this in used->flags to advise the Guest: don't kick me when
* you add a buffer. It's unreliable, so it's simply an optimization. Guest
* will still kick if it's out of buffers. */
#define VRING_USED_F_NO_NOTIFY 1
/* The Guest uses this in avail->flags to advise the Host: don't interrupt me
* when you consume a buffer. It's unreliable, so it's simply an
* optimization. */
#define VRING_AVAIL_F_NO_INTERRUPT 1
/* We support indirect buffer descriptors */
#define VIRTIO_RING_F_INDIRECT_DESC 28
/* The Guest publishes the used index for which it expects an interrupt
* at the end of the avail ring. Host should ignore the avail->flags field. */
/* The Host publishes the avail index for which it expects a kick
* at the end of the used ring. Guest should ignore the used->flags field. */
#define VIRTIO_RING_F_EVENT_IDX 29
/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
struct vring_desc {
/* Address (guest-physical). */
u64_t addr;
/* Length. */
u32_t len;
/* The flags as indicated above. */
u16_t flags;
/* We chain unused descriptors via this, too */
u16_t next;
};
struct vring_avail {
u16_t flags;
u16_t idx;
u16_t ring[];
};
/* u32 is used here for ids for padding reasons. */
struct vring_used_elem {
/* Index of start of used descriptor chain. */
u32_t id;
/* Total length of the descriptor chain which was used (written to) */
u32_t len;
};
struct vring_used {
u16_t flags;
u16_t idx;
struct vring_used_elem ring[];
};
struct vring {
unsigned int num;
struct vring_desc *desc;
struct vring_avail *avail;
struct vring_used *used;
};
/* The standard layout for the ring is a continuous chunk of memory which looks
* like this. We assume num is a power of 2.
*
* struct vring
* {
* // The actual descriptors (16 bytes each)
* struct vring_desc desc[num];
*
* // A ring of available descriptor heads with free-running index.
* u16_t avail_flags;
* u16_t avail_idx;
* u16_t available[num];
* u16_t used_event_idx;
*
* // Padding to the next align boundary.
* char pad[];
*
* // A ring of used descriptor heads with free-running index.
* u16_t used_flags;
* u16_t used_idx;
* struct vring_used_elem used[num];
* u16_t avail_event_idx;
* };
*/
/* We publish the used event index at the end of the available ring, and vice
* versa. They are at the end for backwards compatibility. */
#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
#define vring_avail_event(vr) (*(u16_t *)&(vr)->used->ring[(vr)->num])
static inline void vring_init(struct vring *vr, unsigned int num, void *p,
unsigned long align)
{
vr->num = num;
vr->desc = p;
vr->avail = p + num*sizeof(struct vring_desc);
vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(u16_t)
+ align-1) & ~(align - 1));
}
static inline unsigned vring_size(unsigned int num, unsigned long align)
{
return ((sizeof(struct vring_desc) * num + sizeof(u16_t) * (3 + num)
+ align - 1) & ~(align - 1))
+ sizeof(u16_t) * 3 + sizeof(struct vring_used_elem) * num;
}
#if 0
/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
/* Assuming a given event_idx value from the other size, if
* we have just incremented index from old to new_idx,
* should we trigger an event? */
static inline int vring_need_event(u16_t event_idx, u16_t new_idx, u16_t old)
{
/* Note: Xen has similar logic for notification hold-off
* in include/xen/interface/io/ring.h with req_event and req_prod
* corresponding to event_idx + 1 and new_idx respectively.
* Note also that req_event and req_prod in Xen start at 1,
* event indexes in virtio start at 0. */
return (u16_t)(new_idx - event_idx - 1) < (u16_t)(new_idx - old);
}
#ifdef __KERNEL__
#include <linux/irqreturn.h>
struct virtio_device;
struct virtqueue;
struct virtqueue *vring_new_virtqueue(unsigned int num,
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
void *pages,
void (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq),
const char *name);
void vring_del_virtqueue(struct virtqueue *vq);
/* Filter out transport-specific feature bits. */
void vring_transport_features(struct virtio_device *vdev);
irqreturn_t vring_interrupt(int irq, void *_vq);
#endif /* __KERNEL__ */
#endif /* 0 */
#endif /* _LINUX_VIRTIO_RING_H */