summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/acpi/power.c22
-rw-r--r--drivers/gpio/gpio-max7301.c12
-rw-r--r--drivers/gpu/drm/drm_ioctl.c10
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_kms.c2
-rw-r--r--drivers/hv/vmbus_drv.c20
-rw-r--r--drivers/hwtracing/intel_th/msu.c3
-rw-r--r--drivers/i2c/busses/Kconfig6
-rw-r--r--drivers/i2c/busses/Makefile1
-rw-r--r--drivers/i2c/busses/virtio-i2c.c356
-rw-r--r--drivers/i2c/i2c-dev.c6
-rw-r--r--drivers/input/keyboard/omap4-keypad.c16
-rw-r--r--drivers/input/mouse/elan_i2c_core.c1
-rw-r--r--drivers/iommu/dma-mapping-fast.c14
-rw-r--r--drivers/iommu/intel-iommu.c4
-rw-r--r--drivers/isdn/capi/kcapi.c4
-rw-r--r--drivers/md/Kconfig16
-rw-r--r--drivers/md/dm-verity-target.c9
-rw-r--r--drivers/media/platform/vivid/vivid-vid-cap.c2
-rw-r--r--drivers/misc/genwqe/card_utils.c2
-rw-r--r--drivers/misc/mic/card/mic_virtio.c2
-rw-r--r--drivers/mmc/core/mmc.c4
-rw-r--r--drivers/mmc/host/omap_hsmmc.c12
-rw-r--r--drivers/net/caif/Kconfig2
-rw-r--r--drivers/net/ethernet/ibm/ibmveth.c6
-rw-r--r--drivers/net/usb/hso.c18
-rw-r--r--drivers/net/wireless/b43/phy_common.c2
-rw-r--r--drivers/net/xen-netfront.c2
-rw-r--r--drivers/pci/host/pcie-altera.c201
-rw-r--r--drivers/power/olpc_battery.c4
-rw-r--r--drivers/remoteproc/remoteproc_virtio.c2
-rw-r--r--drivers/rpmsg/virtio_rpmsg_bus.c2
-rw-r--r--drivers/s390/scsi/zfcp_aux.c6
-rw-r--r--drivers/s390/virtio/kvm_virtio.c2
-rw-r--r--drivers/s390/virtio/virtio_ccw.c2
-rw-r--r--drivers/scsi/bnx2fc/bnx2fc_fcoe.c2
-rw-r--r--drivers/spi/spi-bcm2835.c16
-rw-r--r--drivers/staging/android/sync.c2
-rw-r--r--drivers/usb/class/cdc-acm.c17
-rw-r--r--drivers/usb/class/cdc-acm.h1
-rw-r--r--drivers/usb/core/quirks.c3
-rw-r--r--drivers/usb/host/r8a66597-hcd.c5
-rw-r--r--drivers/usb/host/xhci-hub.c3
-rw-r--r--drivers/usb/misc/ks_bridge.c3
-rw-r--r--drivers/usb/serial/option.c20
-rw-r--r--drivers/usb/serial/pl2303.c5
-rw-r--r--drivers/usb/serial/pl2303.h5
-rw-r--r--drivers/usb/storage/scsiglue.c8
-rw-r--r--drivers/usb/storage/unusual_devs.h12
-rw-r--r--drivers/vhost/Kconfig18
-rw-r--r--drivers/vhost/Kconfig.vringh5
-rw-r--r--drivers/vhost/Makefile4
-rw-r--r--drivers/vhost/net.c214
-rw-r--r--drivers/vhost/scsi.c2
-rw-r--r--drivers/vhost/test.c2
-rw-r--r--drivers/vhost/vhost.c1023
-rw-r--r--drivers/vhost/vhost.h69
-rw-r--r--drivers/vhost/vsock.c797
-rw-r--r--drivers/virtio/Kconfig2
-rw-r--r--drivers/virtio/virtio_balloon.c2
-rw-r--r--drivers/virtio/virtio_input.c2
-rw-r--r--drivers/virtio/virtio_mmio.c2
-rw-r--r--drivers/virtio/virtio_pci_common.c4
-rw-r--r--drivers/virtio/virtio_pci_common.h2
-rw-r--r--drivers/virtio/virtio_pci_modern.c2
-rw-r--r--drivers/virtio/virtio_ring.c249
66 files changed, 2478 insertions, 797 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index d563f5c13544..d3f690ab5b27 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -141,6 +141,7 @@ obj-$(CONFIG_OF) += of/
obj-$(CONFIG_SSB) += ssb/
obj-$(CONFIG_BCMA) += bcma/
obj-$(CONFIG_VHOST_RING) += vhost/
+obj-$(CONFIG_VHOST) += vhost/
obj-$(CONFIG_VLYNQ) += vlynq/
obj-$(CONFIG_STAGING) += staging/
obj-y += platform/
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 1c2b846c5776..f28b4949cb9d 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -131,6 +131,23 @@ void acpi_power_resources_list_free(struct list_head *list)
}
}
+static bool acpi_power_resource_is_dup(union acpi_object *package,
+ unsigned int start, unsigned int i)
+{
+ acpi_handle rhandle, dup;
+ unsigned int j;
+
+ /* The caller is expected to check the package element types */
+ rhandle = package->package.elements[i].reference.handle;
+ for (j = start; j < i; j++) {
+ dup = package->package.elements[j].reference.handle;
+ if (dup == rhandle)
+ return true;
+ }
+
+ return false;
+}
+
int acpi_extract_power_resources(union acpi_object *package, unsigned int start,
struct list_head *list)
{
@@ -150,6 +167,11 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start,
err = -ENODEV;
break;
}
+
+ /* Some ACPI tables contain duplicate power resource references */
+ if (acpi_power_resource_is_dup(package, start, i))
+ continue;
+
err = acpi_add_power_resource(rhandle);
if (err)
break;
diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c
index 05813fbf3daf..647dfbbc4e1c 100644
--- a/drivers/gpio/gpio-max7301.c
+++ b/drivers/gpio/gpio-max7301.c
@@ -25,7 +25,7 @@ static int max7301_spi_write(struct device *dev, unsigned int reg,
struct spi_device *spi = to_spi_device(dev);
u16 word = ((reg & 0x7F) << 8) | (val & 0xFF);
- return spi_write(spi, (const u8 *)&word, sizeof(word));
+ return spi_write_then_read(spi, &word, sizeof(word), NULL, 0);
}
/* A read from the MAX7301 means two transfers; here, one message each */
@@ -37,14 +37,8 @@ static int max7301_spi_read(struct device *dev, unsigned int reg)
struct spi_device *spi = to_spi_device(dev);
word = 0x8000 | (reg << 8);
- ret = spi_write(spi, (const u8 *)&word, sizeof(word));
- if (ret)
- return ret;
- /*
- * This relies on the fact, that a transfer with NULL tx_buf shifts out
- * zero bytes (=NOOP for MAX7301)
- */
- ret = spi_read(spi, (u8 *)&word, sizeof(word));
+ ret = spi_write_then_read(spi, &word, sizeof(word), &word,
+ sizeof(word));
if (ret)
return ret;
return word & 0xff;
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index ebb7e1d1778c..b31c02783d69 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -36,6 +36,7 @@
#include <linux/pci.h>
#include <linux/export.h>
+#include <linux/nospec.h>
static int drm_version(struct drm_device *dev, void *data,
struct drm_file *file_priv);
@@ -705,13 +706,17 @@ long drm_ioctl(struct file *filp,
if (is_driver_ioctl) {
/* driver ioctl */
- if (nr - DRM_COMMAND_BASE >= dev->driver->num_ioctls)
+ unsigned int index = nr - DRM_COMMAND_BASE;
+
+ if (index >= dev->driver->num_ioctls)
goto err_i1;
- ioctl = &dev->driver->ioctls[nr - DRM_COMMAND_BASE];
+ index = array_index_nospec(index, dev->driver->num_ioctls);
+ ioctl = &dev->driver->ioctls[index];
} else {
/* core ioctl */
if (nr >= DRM_CORE_IOCTL_COUNT)
goto err_i1;
+ nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT);
ioctl = &drm_ioctls[nr];
}
@@ -813,6 +818,7 @@ bool drm_ioctl_flags(unsigned int nr, unsigned int *flags)
if (nr >= DRM_CORE_IOCTL_COUNT)
return false;
+ nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT);
*flags = drm_ioctls[nr].flags;
return true;
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 06496a128162..4150873d432e 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -130,7 +130,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
static vq_callback_t *callbacks[] = {
virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack
};
- static const char *names[] = { "control", "cursor" };
+ static const char * const names[] = { "control", "cursor" };
struct virtio_gpu_device *vgdev;
/* this will expand later */
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 802dcb409030..b877cce0409b 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -316,6 +316,8 @@ static ssize_t out_intr_mask_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
+ if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
+ return -EINVAL;
hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
}
@@ -329,6 +331,8 @@ static ssize_t out_read_index_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
+ if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
+ return -EINVAL;
hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
return sprintf(buf, "%d\n", outbound.current_read_index);
}
@@ -343,6 +347,8 @@ static ssize_t out_write_index_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
+ if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
+ return -EINVAL;
hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
return sprintf(buf, "%d\n", outbound.current_write_index);
}
@@ -357,6 +363,8 @@ static ssize_t out_read_bytes_avail_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
+ if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
+ return -EINVAL;
hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
}
@@ -371,6 +379,8 @@ static ssize_t out_write_bytes_avail_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
+ if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
+ return -EINVAL;
hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
}
@@ -384,6 +394,8 @@ static ssize_t in_intr_mask_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
+ if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
+ return -EINVAL;
hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
}
@@ -397,6 +409,8 @@ static ssize_t in_read_index_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
+ if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
+ return -EINVAL;
hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
return sprintf(buf, "%d\n", inbound.current_read_index);
}
@@ -410,6 +424,8 @@ static ssize_t in_write_index_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
+ if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
+ return -EINVAL;
hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
return sprintf(buf, "%d\n", inbound.current_write_index);
}
@@ -424,6 +440,8 @@ static ssize_t in_read_bytes_avail_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
+ if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
+ return -EINVAL;
hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
}
@@ -438,6 +456,8 @@ static ssize_t in_write_bytes_avail_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
+ if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
+ return -EINVAL;
hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
}
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index 70ca27e45602..9d9e47eb0842 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -1418,7 +1418,8 @@ nr_pages_store(struct device *dev, struct device_attribute *attr,
if (!end)
break;
- len -= end - p;
+ /* consume the number and the following comma, hence +1 */
+ len -= end - p + 1;
p = end + 1;
} while (len);
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 192c850a8b92..5b57c7edbc72 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -1209,11 +1209,5 @@ config I2C_MSM_V2
This driver can also be built as a module. If so, the module
will be called i2c-msm-v2.
-config VIRTIO_I2C
- tristate "VIRTIO_I2C"
- depends on VIRTIO
- help
- If you say yes to this option, the i2c virtualization will be
- supported.
endmenu
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 47e5ed02ea7f..0c9891812aa8 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -97,7 +97,6 @@ obj-$(CONFIG_I2C_XLR) += i2c-xlr.o
obj-$(CONFIG_I2C_XLP9XX) += i2c-xlp9xx.o
obj-$(CONFIG_I2C_RCAR) += i2c-rcar.o
obj-$(CONFIG_I2C_MSM_V2) += i2c-msm-v2.o
-obj-$(CONFIG_VIRTIO_I2C) += virtio-i2c.o
# External I2C/SMBus adapter drivers
obj-$(CONFIG_I2C_DIOLAN_U2C) += i2c-diolan-u2c.o
diff --git a/drivers/i2c/busses/virtio-i2c.c b/drivers/i2c/busses/virtio-i2c.c
deleted file mode 100644
index 84d045266143..000000000000
--- a/drivers/i2c/busses/virtio-i2c.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/* Copyright (c) 2019, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/kthread.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/idr.h>
-#include <linux/jiffies.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/virtio.h>
-#include <uapi/linux/virtio_ids.h>
-#include <linux/virtio_config.h>
-#include <linux/i2c.h>
-
-#define I2C_ADAPTER_NR 0x00
-
-#define I2C_VIRTIO_RD 0x01
-#define I2C_VIRTIO_WR 0x02
-#define I2C_VIRTIO_RDWR 0x03
-
-/**
- * struct virtio_i2c - virtio i2c device
- * @adapter: i2c adapter
- * @vdev: the virtio device
- * @vq: i2c virtqueue
- */
-struct virtio_i2c {
- struct i2c_adapter adapter;
- struct virtio_device *vdev;
- struct virtqueue *vq;
- wait_queue_head_t inq;
-};
-
-struct i2c_transfer_head {
- u32 type; /* read or write from or to slave */
- u32 addr; /* slave addr */
- u32 length; /* buffer length */
- u32 total_length; /* merge write and read will use this segment */
-};
-
-struct i2c_transfer_end {
- u32 result; /* return value from backend */
-};
-
-struct virtio_i2c_req {
- struct i2c_transfer_head head;
- char *buf;
- struct i2c_transfer_end end;
-};
-
-static int virti2c_transfer(struct virtio_i2c *vi2c,
- struct virtio_i2c_req *i2c_req)
-{
- struct virtqueue *vq = vi2c->vq;
- struct scatterlist outhdr, bufhdr, inhdr, *sgs[3];
- unsigned int num_out = 0, num_in = 0, err, len;
- struct virtio_i2c_req *req_handled = NULL;
-
- /* send the head queue to the backend */
- sg_init_one(&outhdr, &i2c_req->head, sizeof(i2c_req->head));
- sgs[num_out++] = &outhdr;
-
- /* send the buffer queue to the backend */
- sg_init_one(&bufhdr, i2c_req->buf,
- (i2c_req->head.type == I2C_VIRTIO_RDWR) ?
- i2c_req->head.total_length : i2c_req->head.length);
- if (i2c_req->head.type & I2C_VIRTIO_WR)
- sgs[num_out++] = &bufhdr;
- else
- sgs[num_out + num_in++] = &bufhdr;
-
- /* send the result queue to the backend */
- sg_init_one(&inhdr, &i2c_req->end, sizeof(i2c_req->end));
- sgs[num_out + num_in++] = &inhdr;
-
- /* call the virtqueue function */
- err = virtqueue_add_sgs(vq, sgs, num_out, num_in, i2c_req, GFP_KERNEL);
- if (err)
- goto req_exit;
-
- /* Tell Host to go! */
- err = virtqueue_kick(vq);
-
- wait_event(vi2c->inq,
- (req_handled = virtqueue_get_buf(vq, &len)));
-
- if (i2c_req->head.type == I2C_VIRTIO_RDWR) {
- if (i2c_req->end.result ==
- i2c_req->head.total_length - i2c_req->head.length)
- err = 0;
- else
- err = -EINVAL;
- } else {
- if (i2c_req->end.result == i2c_req->head.length)
- err = 0;
- else
- err = -EINVAL;
- }
-req_exit:
- return err;
-}
-
-/* prepare the transfer req */
-static struct virtio_i2c_req *virti2c_transfer_prepare(struct i2c_msg *msg_1,
- struct i2c_msg *msg_2)
-{
- char *ptr = NULL;
- int merge = 0;
- struct virtio_i2c_req *i2c_req;
-
- if (msg_1 == NULL)
- return NULL;
-
- if (msg_2)
- merge = 1;
-
- i2c_req = kzalloc(sizeof(struct virtio_i2c_req), GFP_KERNEL);
- if (i2c_req == NULL)
- return NULL;
-
- if (merge)
- ptr = kzalloc((msg_1->len + msg_2->len), GFP_KERNEL);
- else
- ptr = msg_1->buf;
- if (ptr == NULL)
- goto err_mem;
-
- /* prepare the head */
- i2c_req->head.type = merge ?
- I2C_VIRTIO_RDWR : ((msg_1->flags & I2C_M_RD) ?
- I2C_VIRTIO_RD : I2C_VIRTIO_WR);
- i2c_req->head.addr = msg_1->addr;
- i2c_req->head.length = msg_1->len;
- if (merge)
- i2c_req->head.total_length = msg_1->len + msg_2->len;
-
- /* prepare the buf */
- if (merge)
- memcpy(ptr, msg_1->buf, msg_1->len);
- i2c_req->buf = ptr;
-
- return i2c_req;
-err_mem:
- kfree(i2c_req);
- i2c_req = NULL;
- return NULL;
-}
-
-static void virti2c_transfer_end(struct virtio_i2c_req *req,
- struct i2c_msg *msg)
-{
- if (req->head.type == I2C_VIRTIO_RDWR) {
- memcpy(msg->buf, req->buf + req->head.length, msg->len);
- kfree(req->buf);
- req->buf = NULL;
- }
-
- kfree(req);
- req = NULL;
-}
-
-static int virtio_i2c_master_xfer(struct i2c_adapter *adap,
- struct i2c_msg *msgs, int num)
-{
- int i, ret;
- struct virtio_i2c_req *i2c_req;
- struct virtio_i2c *vi2c = i2c_get_adapdata(adap);
-
- if (num < 1) {
- dev_err(&vi2c->vdev->dev,
- "error on number of msgs(%d) received\n", num);
- return -EINVAL;
- }
-
- if (IS_ERR_OR_NULL(msgs)) {
- dev_err(&vi2c->vdev->dev, " error no msgs Accessing invalid pointer location\n");
- return PTR_ERR(msgs);
- }
-
- for (i = 0; i < num; i++) {
-
- if (msgs[i].flags & I2C_M_RD) {
- /* read the data from slave to master*/
- i2c_req = virti2c_transfer_prepare(&msgs[i], NULL);
-
- } else if ((i + 1 < num) && (msgs[i + 1].flags & I2C_M_RD) &&
- (msgs[i].addr == msgs[i + 1].addr)) {
- /* write then read from same address*/
- i2c_req = virti2c_transfer_prepare(&msgs[i],
- &msgs[i+1]);
- i += 1;
-
- } else {
- /* write the data to slave */
- i2c_req = virti2c_transfer_prepare(&msgs[i], NULL);
- }
-
- if (i2c_req == NULL) {
- ret = -ENOMEM;
- goto err;
- }
- ret = virti2c_transfer(vi2c, i2c_req);
- virti2c_transfer_end(i2c_req, &msgs[i]);
- if (ret)
- goto err;
- }
- return 0;
-err:
- return ret;
-}
-
-static u32 virtio_i2c_functionality(struct i2c_adapter *adapter)
-{
- return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
-}
-
-static struct i2c_algorithm virtio_i2c_algorithm = {
- .master_xfer = virtio_i2c_master_xfer,
- .functionality = virtio_i2c_functionality,
-};
-
-/* virtqueue incoming data interrupt IRQ */
-static void virti2c_vq_isr(struct virtqueue *vq)
-{
- struct virtio_i2c *vi2c = vq->vdev->priv;
-
- wake_up(&vi2c->inq);
-}
-
-static int virti2c_init_vqs(struct virtio_i2c *vi2c)
-{
- struct virtqueue *vqs[1];
- vq_callback_t *cbs[] = { virti2c_vq_isr };
- static const char * const names[] = { "virti2c_vq_isr" };
- int err;
-
- err = vi2c->vdev->config->find_vqs(vi2c->vdev, 1, vqs, cbs, names);
- if (err)
- return err;
- vi2c->vq = vqs[0];
-
- return 0;
-}
-
-static void virti2c_del_vqs(struct virtio_i2c *vi2c)
-{
- vi2c->vdev->config->del_vqs(vi2c->vdev);
-}
-
-static int virti2c_init_hw(struct virtio_device *vdev,
- struct virtio_i2c *vi2c)
-{
- int err;
-
- i2c_set_adapdata(&vi2c->adapter, vi2c);
- vi2c->adapter.algo = &virtio_i2c_algorithm;
-
- vi2c->adapter.owner = THIS_MODULE;
- vi2c->adapter.dev.parent = &vdev->dev;
- vi2c->adapter.dev.of_node = NULL;
-
- /* read virtio i2c config info */
- vi2c->adapter.nr = virtio_cread32(vdev, I2C_ADAPTER_NR);
- snprintf(vi2c->adapter.name, sizeof(vi2c->adapter.name),
- "virtio_i2c_%d", vi2c->adapter.nr);
-
- err = i2c_add_numbered_adapter(&vi2c->adapter);
- if (err)
- return err;
- return 0;
-}
-
-static int virti2c_probe(struct virtio_device *vdev)
-{
- struct virtio_i2c *vi2c;
- int err = 0;
-
- if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
- return -ENODEV;
-
- vi2c = kzalloc(sizeof(*vi2c), GFP_KERNEL);
- if (!vi2c)
- return -ENOMEM;
-
- vi2c->vdev = vdev;
- vdev->priv = vi2c;
- init_waitqueue_head(&vi2c->inq);
-
- err = virti2c_init_vqs(vi2c);
- if (err)
- goto err_init_vq;
-
- err = virti2c_init_hw(vdev, vi2c);
- if (err)
- goto err_init_hw;
-
- virtio_device_ready(vdev);
-
- virtqueue_enable_cb(vi2c->vq);
- return 0;
-
-err_init_hw:
- virti2c_del_vqs(vi2c);
-err_init_vq:
- kfree(vi2c);
- return err;
-}
-static void virti2c_remove(struct virtio_device *vdev)
-{
- struct virtio_i2c *vi2c = vdev->priv;
-
- i2c_del_adapter(&vi2c->adapter);
- vdev->config->reset(vdev);
- virti2c_del_vqs(vi2c);
- kfree(vi2c);
-}
-
-static unsigned int features[] = {
- /* none */
-};
-static struct virtio_device_id id_table[] = {
- { VIRTIO_ID_I2C, VIRTIO_DEV_ANY_ID },
- { 0 },
-};
-
-static struct virtio_driver virtio_i2c_driver = {
- .driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
- .feature_table = features,
- .feature_table_size = ARRAY_SIZE(features),
- .id_table = id_table,
- .probe = virti2c_probe,
- .remove = virti2c_remove,
-};
-
-module_virtio_driver(virtio_i2c_driver);
-MODULE_DEVICE_TABLE(virtio, id_table);
-
-MODULE_DESCRIPTION("Virtio i2c frontend driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 94c837046786..57e3790c87b1 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -459,9 +459,15 @@ static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return i2cdev_ioctl_smbus(client, arg);
case I2C_RETRIES:
+ if (arg > INT_MAX)
+ return -EINVAL;
+
client->adapter->retries = arg;
break;
case I2C_TIMEOUT:
+ if (arg > INT_MAX)
+ return -EINVAL;
+
/* For historical reasons, user-space sets the timeout
* value in units of 10 ms.
*/
diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c
index f78c464899db..3d2c60c8de83 100644
--- a/drivers/input/keyboard/omap4-keypad.c
+++ b/drivers/input/keyboard/omap4-keypad.c
@@ -126,12 +126,8 @@ static irqreturn_t omap4_keypad_irq_handler(int irq, void *dev_id)
{
struct omap4_keypad *keypad_data = dev_id;
- if (kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)) {
- /* Disable interrupts */
- kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE,
- OMAP4_VAL_IRQDISABLE);
+ if (kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS))
return IRQ_WAKE_THREAD;
- }
return IRQ_NONE;
}
@@ -173,11 +169,6 @@ static irqreturn_t omap4_keypad_irq_thread_fn(int irq, void *dev_id)
kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS,
kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS));
- /* enable interrupts */
- kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE,
- OMAP4_DEF_IRQENABLE_EVENTEN |
- OMAP4_DEF_IRQENABLE_LONGKEY);
-
return IRQ_HANDLED;
}
@@ -214,9 +205,10 @@ static void omap4_keypad_close(struct input_dev *input)
disable_irq(keypad_data->irq);
- /* Disable interrupts */
+ /* Disable interrupts and wake-up events */
kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE,
OMAP4_VAL_IRQDISABLE);
+ kbd_writel(keypad_data, OMAP4_KBD_WAKEUPENABLE, 0);
/* clear pending interrupts */
kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS,
@@ -364,7 +356,7 @@ static int omap4_keypad_probe(struct platform_device *pdev)
}
error = request_threaded_irq(keypad_data->irq, omap4_keypad_irq_handler,
- omap4_keypad_irq_thread_fn, 0,
+ omap4_keypad_irq_thread_fn, IRQF_ONESHOT,
"omap4-keypad", keypad_data);
if (error) {
dev_err(&pdev->dev, "failed to register interrupt\n");
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 471984ec2db0..30adc5745cba 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1240,6 +1240,7 @@ MODULE_DEVICE_TABLE(i2c, elan_id);
static const struct acpi_device_id elan_acpi_id[] = {
{ "ELAN0000", 0 },
{ "ELAN0100", 0 },
+ { "ELAN0501", 0 },
{ "ELAN0600", 0 },
{ "ELAN0602", 0 },
{ "ELAN0605", 0 },
diff --git a/drivers/iommu/dma-mapping-fast.c b/drivers/iommu/dma-mapping-fast.c
index 0881d68f34d8..66c2abd358f8 100644
--- a/drivers/iommu/dma-mapping-fast.c
+++ b/drivers/iommu/dma-mapping-fast.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2017,2019, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -513,12 +513,22 @@ static void *fast_smmu_alloc(struct device *dev, size_t size,
av8l_fast_iopte *ptep;
unsigned long flags;
struct sg_mapping_iter miter;
- unsigned int count = ALIGN(size, SZ_4K) >> PAGE_SHIFT;
+ size_t count = ALIGN(size, SZ_4K) >> PAGE_SHIFT;
int prot = IOMMU_READ | IOMMU_WRITE; /* TODO: extract from attrs */
bool is_coherent = is_dma_coherent(dev, attrs);
pgprot_t remap_prot = __get_dma_pgprot(attrs, PAGE_KERNEL, is_coherent);
struct page **pages;
+ /*
+ * sg_alloc_table_from_pages accepts unsigned int value for count
+ * so check count doesn't exceed UINT_MAX.
+ */
+
+ if (count > UINT_MAX) {
+ dev_err(dev, "count: %zx exceeds UNIT_MAX\n", count);
+ return NULL;
+ }
+
prot = __get_iommu_pgprot(attrs, prot, is_coherent);
*handle = DMA_ERROR_CODE;
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 7feaa82f8c7c..8b4a4d95669a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2041,7 +2041,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
* than default. Unnecessary for PT mode.
*/
if (translation != CONTEXT_TT_PASS_THROUGH) {
- for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
ret = -ENOMEM;
pgd = phys_to_virt(dma_pte_addr(pgd));
if (!dma_pte_present(pgd))
@@ -2055,7 +2055,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
translation = CONTEXT_TT_MULTI_LEVEL;
context_set_address_root(context, virt_to_phys(pgd));
- context_set_address_width(context, iommu->agaw);
+ context_set_address_width(context, agaw);
} else {
/*
* In pass through mode, AW must be programmed to
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index dd7e38ac29bd..d15347de415a 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -851,7 +851,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf)
u16 ret;
if (contr == 0) {
- strlcpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN);
+ strncpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN);
return CAPI_NOERROR;
}
@@ -859,7 +859,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf)
ctr = get_capi_ctr_by_nr(contr);
if (ctr && ctr->state == CAPI_CTR_RUNNING) {
- strlcpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN);
+ strncpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN);
ret = CAPI_NOERROR;
} else
ret = CAPI_REGNOTINSTALLED;
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 0ad06670fa99..7bf1cdb582f5 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -476,21 +476,6 @@ config DM_VERITY
If unsure, say N.
-config DM_VERITY_HASH_PREFETCH_MIN_SIZE_128
- bool "Prefetch size 128"
-
-config DM_VERITY_HASH_PREFETCH_MIN_SIZE
- int "Verity hash prefetch minimum size"
- depends on DM_VERITY
- range 1 4096
- default 128 if DM_VERITY_HASH_PREFETCH_MIN_SIZE_128
- default 1
- ---help---
- This sets minimum number of hash blocks to prefetch for dm-verity.
- For devices like eMMC, having larger prefetch size like 128 can improve
- performance with increased memory consumption for keeping more hashes
- in RAM.
-
config DM_VERITY_FEC
bool "Verity forward error correction support"
depends on DM_VERITY
@@ -554,7 +539,6 @@ config DM_ANDROID_VERITY
depends on ASYMMETRIC_KEY_TYPE
depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
depends on MD_LINEAR=y
- select DM_VERITY_HASH_PREFETCH_MIN_SIZE_128
---help---
This device-mapper target is virtually a VERITY target. This
target is setup by reading the metadata contents piggybacked
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index d2e3abc182b3..131077aabd08 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -529,7 +529,6 @@ static void verity_prefetch_io(struct work_struct *work)
container_of(work, struct dm_verity_prefetch_work, work);
struct dm_verity *v = pw->v;
int i;
- sector_t prefetch_size;
for (i = v->levels - 2; i >= 0; i--) {
sector_t hash_block_start;
@@ -552,14 +551,8 @@ static void verity_prefetch_io(struct work_struct *work)
hash_block_end = v->hash_blocks - 1;
}
no_prefetch_cluster:
- // for emmc, it is more efficient to send bigger read
- prefetch_size = max((sector_t)CONFIG_DM_VERITY_HASH_PREFETCH_MIN_SIZE,
- hash_block_end - hash_block_start + 1);
- if ((hash_block_start + prefetch_size) >= (v->hash_start + v->hash_blocks)) {
- prefetch_size = hash_block_end - hash_block_start + 1;
- }
dm_bufio_prefetch(v->bufio, hash_block_start,
- prefetch_size);
+ hash_block_end - hash_block_start + 1);
}
kfree(pw);
diff --git a/drivers/media/platform/vivid/vivid-vid-cap.c b/drivers/media/platform/vivid/vivid-vid-cap.c
index ef5412311b2f..a84954f1be34 100644
--- a/drivers/media/platform/vivid/vivid-vid-cap.c
+++ b/drivers/media/platform/vivid/vivid-vid-cap.c
@@ -461,6 +461,8 @@ void vivid_update_format_cap(struct vivid_dev *dev, bool keep_controls)
tpg_s_rgb_range(&dev->tpg, v4l2_ctrl_g_ctrl(dev->rgb_range_cap));
break;
}
+ vfree(dev->bitmap_cap);
+ dev->bitmap_cap = NULL;
vivid_update_quality(dev);
tpg_reset_source(&dev->tpg, dev->src_rect.width, dev->src_rect.height, dev->field_cap);
dev->crop_cap = dev->src_rect;
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
index 524660510599..0c15ba21fa54 100644
--- a/drivers/misc/genwqe/card_utils.c
+++ b/drivers/misc/genwqe/card_utils.c
@@ -217,7 +217,7 @@ u32 genwqe_crc32(u8 *buff, size_t len, u32 init)
void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
dma_addr_t *dma_handle)
{
- if (get_order(size) > MAX_ORDER)
+ if (get_order(size) >= MAX_ORDER)
return NULL;
return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle,
diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c
index e486a0c26267..f6ed57d3125c 100644
--- a/drivers/misc/mic/card/mic_virtio.c
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -311,7 +311,7 @@ unmap:
static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct mic_vdev *mvdev = to_micvdev(vdev);
struct mic_device_ctrl __iomem *dc = mvdev->dc;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 999fdb9bad7d..1ba28b350e91 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -2087,9 +2087,11 @@ reinit:
if (err) {
pr_warn("%s: Enabling HPI failed\n",
mmc_hostname(card->host));
+ card->ext_csd.hpi_en = 0;
err = 0;
- } else
+ } else {
card->ext_csd.hpi_en = 1;
+ }
}
/*
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 6b814d7d6560..af937d3e8c3e 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2117,7 +2117,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
mmc->max_blk_size = 512; /* Block Length at max can be 1024 */
mmc->max_blk_count = 0xFFFF; /* No. of Blocks is 16 bits */
mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
- mmc->max_seg_size = mmc->max_req_size;
mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE;
@@ -2174,6 +2173,17 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
goto err_irq;
}
+ /*
+ * Limit the maximum segment size to the lower of the request size
+ * and the DMA engine device segment size limits. In reality, with
+ * 32-bit transfers, the DMA engine can do longer segments than this
+ * but there is no way to represent that in the DMA model - if we
+ * increase this figure here, we get warnings from the DMA API debug.
+ */
+ mmc->max_seg_size = min3(mmc->max_req_size,
+ dma_get_max_seg_size(host->rx_chan->device->dev),
+ dma_get_max_seg_size(host->tx_chan->device->dev));
+
/* Request IRQ for MMC operations */
ret = devm_request_irq(&pdev->dev, host->irq, omap_hsmmc_irq, 0,
mmc_hostname(mmc), host);
diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig
index 547098086773..f81df91a9ce1 100644
--- a/drivers/net/caif/Kconfig
+++ b/drivers/net/caif/Kconfig
@@ -52,5 +52,5 @@ config CAIF_VIRTIO
The caif driver for CAIF over Virtio.
if CAIF_VIRTIO
-source "drivers/vhost/Kconfig"
+source "drivers/vhost/Kconfig.vringh"
endif
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 2f9b12cf9ee5..61a9ab4fe047 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1163,11 +1163,15 @@ out:
map_failed_frags:
last = i+1;
- for (i = 0; i < last; i++)
+ for (i = 1; i < last; i++)
dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
DMA_TO_DEVICE);
+ dma_unmap_single(&adapter->vdev->dev,
+ descs[0].fields.address,
+ descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK,
+ DMA_TO_DEVICE);
map_failed:
if (!firmware_has_feature(FW_FEATURE_CMO))
netdev_err(netdev, "tx: unable to map xmit buffer\n");
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 111d907e0c11..79cede19e0c4 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2825,6 +2825,12 @@ static int hso_get_config_data(struct usb_interface *interface)
return -EIO;
}
+ /* check if we have a valid interface */
+ if (if_num > 16) {
+ kfree(config_data);
+ return -EINVAL;
+ }
+
switch (config_data[if_num]) {
case 0x0:
result = 0;
@@ -2895,10 +2901,18 @@ static int hso_probe(struct usb_interface *interface,
/* Get the interface/port specification from either driver_info or from
* the device itself */
- if (id->driver_info)
+ if (id->driver_info) {
+ /* if_num is controlled by the device, driver_info is a 0 terminated
+ * array. Make sure, the access is in bounds! */
+ for (i = 0; i <= if_num; ++i)
+ if (((u32 *)(id->driver_info))[i] == 0)
+ goto exit;
port_spec = ((u32 *)(id->driver_info))[if_num];
- else
+ } else {
port_spec = hso_get_config_data(interface);
+ if (port_spec < 0)
+ goto exit;
+ }
/* Check if we need to switch to alt interfaces prior to port
* configuration */
diff --git a/drivers/net/wireless/b43/phy_common.c b/drivers/net/wireless/b43/phy_common.c
index ec2b9c577b90..3644c9edaf81 100644
--- a/drivers/net/wireless/b43/phy_common.c
+++ b/drivers/net/wireless/b43/phy_common.c
@@ -616,7 +616,7 @@ struct b43_c32 b43_cordic(int theta)
u8 i;
s32 tmp;
s8 signx = 1;
- u32 angle = 0;
+ s32 angle = 0;
struct b43_c32 ret = { .i = 39797, .q = 0, };
while (theta > (180 << 16))
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 0a4bd73caae5..6f55ab4f7959 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -889,7 +889,7 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
- BUG_ON(pull_to <= skb_headlen(skb));
+ BUG_ON(pull_to < skb_headlen(skb));
__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
}
if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c
index 99da549d5d06..0118287a8a10 100644
--- a/drivers/pci/host/pcie-altera.c
+++ b/drivers/pci/host/pcie-altera.c
@@ -40,8 +40,10 @@
#define P2A_INT_ENABLE 0x3070
#define P2A_INT_ENA_ALL 0xf
#define RP_LTSSM 0x3c64
+#define RP_LTSSM_MASK 0x1f
#define LTSSM_L0 0xf
+#define PCIE_CAP_OFFSET 0x80
/* TLP configuration type 0 and 1 */
#define TLP_FMTTYPE_CFGRD0 0x04 /* Configuration Read Type 0 */
#define TLP_FMTTYPE_CFGWR0 0x44 /* Configuration Write Type 0 */
@@ -60,6 +62,9 @@
#define TLP_LOOP 500
#define RP_DEVFN 0
+#define LINK_UP_TIMEOUT HZ
+#define LINK_RETRAIN_TIMEOUT HZ
+
#define INTX_NUM 4
#define DWORD_MASK 3
@@ -80,25 +85,21 @@ struct tlp_rp_regpair_t {
u32 reg1;
};
-static void altera_pcie_retrain(struct pci_dev *dev)
+static inline void cra_writel(struct altera_pcie *pcie, const u32 value,
+ const u32 reg)
{
- u16 linkcap, linkstat;
-
- /*
- * Set the retrain bit if the PCIe rootport support > 2.5GB/s, but
- * current speed is 2.5 GB/s.
- */
- pcie_capability_read_word(dev, PCI_EXP_LNKCAP, &linkcap);
+ writel_relaxed(value, pcie->cra_base + reg);
+}
- if ((linkcap & PCI_EXP_LNKCAP_SLS) <= PCI_EXP_LNKCAP_SLS_2_5GB)
- return;
+static inline u32 cra_readl(struct altera_pcie *pcie, const u32 reg)
+{
+ return readl_relaxed(pcie->cra_base + reg);
+}
- pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &linkstat);
- if ((linkstat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB)
- pcie_capability_set_word(dev, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_RL);
+static bool altera_pcie_link_is_up(struct altera_pcie *pcie)
+{
+ return !!((cra_readl(pcie, RP_LTSSM) & RP_LTSSM_MASK) == LTSSM_L0);
}
-DECLARE_PCI_FIXUP_EARLY(0x1172, PCI_ANY_ID, altera_pcie_retrain);
/*
* Altera PCIe port uses BAR0 of RC's configuration space as the translation
@@ -119,17 +120,6 @@ static bool altera_pcie_hide_rc_bar(struct pci_bus *bus, unsigned int devfn,
return false;
}
-static inline void cra_writel(struct altera_pcie *pcie, const u32 value,
- const u32 reg)
-{
- writel_relaxed(value, pcie->cra_base + reg);
-}
-
-static inline u32 cra_readl(struct altera_pcie *pcie, const u32 reg)
-{
- return readl_relaxed(pcie->cra_base + reg);
-}
-
static void tlp_write_tx(struct altera_pcie *pcie,
struct tlp_rp_regpair_t *tlp_rp_regdata)
{
@@ -138,11 +128,6 @@ static void tlp_write_tx(struct altera_pcie *pcie,
cra_writel(pcie, tlp_rp_regdata->ctrl, RP_TX_CNTRL);
}
-static bool altera_pcie_link_is_up(struct altera_pcie *pcie)
-{
- return !!(cra_readl(pcie, RP_LTSSM) & LTSSM_L0);
-}
-
static bool altera_pcie_valid_config(struct altera_pcie *pcie,
struct pci_bus *bus, int dev)
{
@@ -286,22 +271,14 @@ static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn,
return PCIBIOS_SUCCESSFUL;
}
-static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn,
- int where, int size, u32 *value)
+static int _altera_pcie_cfg_read(struct altera_pcie *pcie, u8 busno,
+ unsigned int devfn, int where, int size,
+ u32 *value)
{
- struct altera_pcie *pcie = bus->sysdata;
int ret;
u32 data;
u8 byte_en;
- if (altera_pcie_hide_rc_bar(bus, devfn, where))
- return PCIBIOS_BAD_REGISTER_NUMBER;
-
- if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) {
- *value = 0xffffffff;
- return PCIBIOS_DEVICE_NOT_FOUND;
- }
-
switch (size) {
case 1:
byte_en = 1 << (where & 3);
@@ -314,7 +291,7 @@ static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn,
break;
}
- ret = tlp_cfg_dword_read(pcie, bus->number, devfn,
+ ret = tlp_cfg_dword_read(pcie, busno, devfn,
(where & ~DWORD_MASK), byte_en, &data);
if (ret != PCIBIOS_SUCCESSFUL)
return ret;
@@ -334,20 +311,14 @@ static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn,
return PCIBIOS_SUCCESSFUL;
}
-static int altera_pcie_cfg_write(struct pci_bus *bus, unsigned int devfn,
- int where, int size, u32 value)
+static int _altera_pcie_cfg_write(struct altera_pcie *pcie, u8 busno,
+ unsigned int devfn, int where, int size,
+ u32 value)
{
- struct altera_pcie *pcie = bus->sysdata;
u32 data32;
u32 shift = 8 * (where & 3);
u8 byte_en;
- if (altera_pcie_hide_rc_bar(bus, devfn, where))
- return PCIBIOS_BAD_REGISTER_NUMBER;
-
- if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn)))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
switch (size) {
case 1:
data32 = (value & 0xff) << shift;
@@ -363,8 +334,40 @@ static int altera_pcie_cfg_write(struct pci_bus *bus, unsigned int devfn,
break;
}
- return tlp_cfg_dword_write(pcie, bus->number, devfn,
- (where & ~DWORD_MASK), byte_en, data32);
+ return tlp_cfg_dword_write(pcie, busno, devfn, (where & ~DWORD_MASK),
+ byte_en, data32);
+}
+
+static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ struct altera_pcie *pcie = bus->sysdata;
+
+ if (altera_pcie_hide_rc_bar(bus, devfn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) {
+ *value = 0xffffffff;
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+
+ return _altera_pcie_cfg_read(pcie, bus->number, devfn, where, size,
+ value);
+}
+
+static int altera_pcie_cfg_write(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ struct altera_pcie *pcie = bus->sysdata;
+
+ if (altera_pcie_hide_rc_bar(bus, devfn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn)))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return _altera_pcie_cfg_write(pcie, bus->number, devfn, where, size,
+ value);
}
static struct pci_ops altera_pcie_ops = {
@@ -372,6 +375,90 @@ static struct pci_ops altera_pcie_ops = {
.write = altera_pcie_cfg_write,
};
+static int altera_read_cap_word(struct altera_pcie *pcie, u8 busno,
+ unsigned int devfn, int offset, u16 *value)
+{
+ u32 data;
+ int ret;
+
+ ret = _altera_pcie_cfg_read(pcie, busno, devfn,
+ PCIE_CAP_OFFSET + offset, sizeof(*value),
+ &data);
+ *value = data;
+ return ret;
+}
+
+static int altera_write_cap_word(struct altera_pcie *pcie, u8 busno,
+ unsigned int devfn, int offset, u16 value)
+{
+ return _altera_pcie_cfg_write(pcie, busno, devfn,
+ PCIE_CAP_OFFSET + offset, sizeof(value),
+ value);
+}
+
+static void altera_wait_link_retrain(struct altera_pcie *pcie)
+{
+ u16 reg16;
+ unsigned long start_jiffies;
+
+ /* Wait for link training end. */
+ start_jiffies = jiffies;
+ for (;;) {
+ altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN,
+ PCI_EXP_LNKSTA, &reg16);
+ if (!(reg16 & PCI_EXP_LNKSTA_LT))
+ break;
+
+ if (time_after(jiffies, start_jiffies + LINK_RETRAIN_TIMEOUT)) {
+ dev_err(&pcie->pdev->dev, "link retrain timeout\n");
+ break;
+ }
+ udelay(100);
+ }
+
+ /* Wait for link is up */
+ start_jiffies = jiffies;
+ for (;;) {
+ if (altera_pcie_link_is_up(pcie))
+ break;
+
+ if (time_after(jiffies, start_jiffies + LINK_UP_TIMEOUT)) {
+ dev_err(&pcie->pdev->dev, "link up timeout\n");
+ break;
+ }
+ udelay(100);
+ }
+}
+
+static void altera_pcie_retrain(struct altera_pcie *pcie)
+{
+ u16 linkcap, linkstat, linkctl;
+
+ if (!altera_pcie_link_is_up(pcie))
+ return;
+
+ /*
+ * Set the retrain bit if the PCIe rootport support > 2.5GB/s, but
+ * current speed is 2.5 GB/s.
+ */
+ altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, PCI_EXP_LNKCAP,
+ &linkcap);
+ if ((linkcap & PCI_EXP_LNKCAP_SLS) <= PCI_EXP_LNKCAP_SLS_2_5GB)
+ return;
+
+ altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, PCI_EXP_LNKSTA,
+ &linkstat);
+ if ((linkstat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB) {
+ altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN,
+ PCI_EXP_LNKCTL, &linkctl);
+ linkctl |= PCI_EXP_LNKCTL_RL;
+ altera_write_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN,
+ PCI_EXP_LNKCTL, linkctl);
+
+ altera_wait_link_retrain(pcie);
+ }
+}
+
static int altera_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
irq_hw_number_t hwirq)
{
@@ -506,6 +593,11 @@ static int altera_pcie_parse_dt(struct altera_pcie *pcie)
return 0;
}
+static void altera_pcie_host_init(struct altera_pcie *pcie)
+{
+ altera_pcie_retrain(pcie);
+}
+
static int altera_pcie_probe(struct platform_device *pdev)
{
struct altera_pcie *pcie;
@@ -543,6 +635,7 @@ static int altera_pcie_probe(struct platform_device *pdev)
cra_writel(pcie, P2A_INT_STS_ALL, P2A_INT_STATUS);
/* enable all interrupts */
cra_writel(pcie, P2A_INT_ENA_ALL, P2A_INT_ENABLE);
+ altera_pcie_host_init(pcie);
bus = pci_scan_root_bus(&pdev->dev, pcie->root_bus_nr, &altera_pcie_ops,
pcie, &pcie->resources);
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index 9e29b1321648..15783869e1a0 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -427,14 +427,14 @@ static int olpc_bat_get_property(struct power_supply *psy,
if (ret)
return ret;
- val->intval = (s16)be16_to_cpu(ec_word) * 100 / 256;
+ val->intval = (s16)be16_to_cpu(ec_word) * 10 / 256;
break;
case POWER_SUPPLY_PROP_TEMP_AMBIENT:
ret = olpc_ec_cmd(EC_AMB_TEMP, NULL, 0, (void *)&ec_word, 2);
if (ret)
return ret;
- val->intval = (int)be16_to_cpu(ec_word) * 100 / 256;
+ val->intval = (int)be16_to_cpu(ec_word) * 10 / 256;
break;
case POWER_SUPPLY_PROP_CHARGE_COUNTER:
ret = olpc_ec_cmd(EC_BAT_ACR, NULL, 0, (void *)&ec_word, 2);
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index e1a10232a943..e44872fb9e5e 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -147,7 +147,7 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev)
static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct rproc *rproc = vdev_to_rproc(vdev);
int i, ret;
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 73354ee27877..1fcd27c1f183 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -945,7 +945,7 @@ static void rpmsg_ns_cb(struct rpmsg_channel *rpdev, void *data, int len,
static int rpmsg_probe(struct virtio_device *vdev)
{
vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done };
- const char *names[] = { "input", "output" };
+ static const char * const names[] = { "input", "output" };
struct virtqueue *vqs[2];
struct virtproc_info *vrp;
void *bufs_va;
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 38c8e308d4c8..a96c98e3fc73 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -275,16 +275,16 @@ static void zfcp_free_low_mem_buffers(struct zfcp_adapter *adapter)
*/
int zfcp_status_read_refill(struct zfcp_adapter *adapter)
{
- while (atomic_read(&adapter->stat_miss) > 0)
+ while (atomic_add_unless(&adapter->stat_miss, -1, 0))
if (zfcp_fsf_status_read(adapter->qdio)) {
+ atomic_inc(&adapter->stat_miss); /* undo add -1 */
if (atomic_read(&adapter->stat_miss) >=
adapter->stat_read_buf_num) {
zfcp_erp_adapter_reopen(adapter, 0, "axsref1");
return 1;
}
break;
- } else
- atomic_dec(&adapter->stat_miss);
+ }
return 0;
}
diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c
index 53fb975c404b..1d060fd293a3 100644
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -255,7 +255,7 @@ static void kvm_del_vqs(struct virtio_device *vdev)
static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct kvm_device *kdev = to_kvmdev(vdev);
int i;
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index ff06bdfd2b20..9e685246b98d 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -639,7 +639,7 @@ out:
static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
unsigned long *indicatorp = NULL;
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index d0b227ffbd5f..573aeec7a02b 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2279,7 +2279,7 @@ static int _bnx2fc_create(struct net_device *netdev,
if (!interface) {
printk(KERN_ERR PFX "bnx2fc_interface_create failed\n");
rc = -ENOMEM;
- goto ifput_err;
+ goto netdev_err;
}
if (netdev->priv_flags & IFF_802_1Q_VLAN) {
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index cf04960cc3e6..1a1368f5863c 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -88,7 +88,7 @@ struct bcm2835_spi {
u8 *rx_buf;
int tx_len;
int rx_len;
- bool dma_pending;
+ unsigned int dma_pending;
};
static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg)
@@ -155,8 +155,7 @@ static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
/* Write as many bytes as possible to FIFO */
bcm2835_wr_fifo(bs);
- /* based on flags decide if we can finish the transfer */
- if (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE) {
+ if (!bs->rx_len) {
/* Transfer complete - reset SPI HW */
bcm2835_spi_reset_hw(master);
/* wake up the framework */
@@ -233,10 +232,9 @@ static void bcm2835_spi_dma_done(void *data)
* is called the tx-dma must have finished - can't get to this
* situation otherwise...
*/
- dmaengine_terminate_all(master->dma_tx);
-
- /* mark as no longer pending */
- bs->dma_pending = 0;
+ if (cmpxchg(&bs->dma_pending, true, false)) {
+ dmaengine_terminate_all(master->dma_tx);
+ }
/* and mark as completed */;
complete(&master->xfer_completion);
@@ -342,6 +340,7 @@ static int bcm2835_spi_transfer_one_dma(struct spi_master *master,
if (ret) {
/* need to reset on errors */
dmaengine_terminate_all(master->dma_tx);
+ bs->dma_pending = false;
bcm2835_spi_reset_hw(master);
return ret;
}
@@ -617,10 +616,9 @@ static void bcm2835_spi_handle_err(struct spi_master *master,
struct bcm2835_spi *bs = spi_master_get_devdata(master);
/* if an error occurred and we have an active dma, then terminate */
- if (bs->dma_pending) {
+ if (cmpxchg(&bs->dma_pending, true, false)) {
dmaengine_terminate_all(master->dma_tx);
dmaengine_terminate_all(master->dma_rx);
- bs->dma_pending = 0;
}
/* and reset */
bcm2835_spi_reset_hw(master);
diff --git a/drivers/staging/android/sync.c b/drivers/staging/android/sync.c
index 5238d67490ce..39b99740a6d8 100644
--- a/drivers/staging/android/sync.c
+++ b/drivers/staging/android/sync.c
@@ -451,6 +451,8 @@ static bool android_fence_signaled(struct fence *fence)
int ret;
ret = parent->ops->has_signaled(pt);
+ if (!ret && parent->destroyed)
+ ret = -ENOENT;
if (ret < 0)
fence->status = ret;
return ret;
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 0a8e5ac891d4..736de1021d8b 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -507,6 +507,13 @@ static int acm_tty_install(struct tty_driver *driver, struct tty_struct *tty)
if (retval)
goto error_init_termios;
+ /*
+ * Suppress initial echoing for some devices which might send data
+ * immediately after acm driver has been installed.
+ */
+ if (acm->quirks & DISABLE_ECHO)
+ tty->termios.c_lflag &= ~ECHO;
+
tty->driver_data = acm;
return 0;
@@ -1677,6 +1684,9 @@ static const struct usb_device_id acm_ids[] = {
{ USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */
.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
},
+ { USB_DEVICE(0x0e8d, 0x2000), /* MediaTek Inc Preloader */
+ .driver_info = DISABLE_ECHO, /* DISABLE ECHO in termios flag */
+ },
{ USB_DEVICE(0x0e8d, 0x3329), /* MediaTek Inc GPS */
.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
},
@@ -1875,6 +1885,13 @@ static const struct usb_device_id acm_ids[] = {
.driver_info = IGNORE_DEVICE,
},
+ { USB_DEVICE(0x1bc7, 0x0021), /* Telit 3G ACM only composition */
+ .driver_info = SEND_ZERO_PACKET,
+ },
+ { USB_DEVICE(0x1bc7, 0x0023), /* Telit 3G ACM + ECM composition */
+ .driver_info = SEND_ZERO_PACKET,
+ },
+
/* control interfaces without any protocol set */
{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
USB_CDC_PROTO_NONE) },
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index b30ac5fcde68..1ad9ff9f493d 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -134,3 +134,4 @@ struct acm {
#define QUIRK_CONTROL_LINE_STATE BIT(6)
#define CLEAR_HALT_CONDITIONS BIT(7)
#define SEND_ZERO_PACKET BIT(8)
+#define DISABLE_ECHO BIT(9)
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index cf378b1ed373..733479ddf8a7 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -240,7 +240,8 @@ static const struct usb_device_id usb_quirk_list[] = {
USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
/* Corsair K70 RGB */
- { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT },
+ { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT |
+ USB_QUIRK_DELAY_CTRL_MSG },
/* Corsair Strafe */
{ USB_DEVICE(0x1b1c, 0x1b15), .driver_info = USB_QUIRK_DELAY_INIT |
diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index a11c2c8bda53..a217f71b45c6 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -1990,6 +1990,8 @@ static int r8a66597_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
static void r8a66597_endpoint_disable(struct usb_hcd *hcd,
struct usb_host_endpoint *hep)
+__acquires(r8a66597->lock)
+__releases(r8a66597->lock)
{
struct r8a66597 *r8a66597 = hcd_to_r8a66597(hcd);
struct r8a66597_pipe *pipe = (struct r8a66597_pipe *)hep->hcpriv;
@@ -2002,13 +2004,14 @@ static void r8a66597_endpoint_disable(struct usb_hcd *hcd,
return;
pipenum = pipe->info.pipenum;
+ spin_lock_irqsave(&r8a66597->lock, flags);
if (pipenum == 0) {
kfree(hep->hcpriv);
hep->hcpriv = NULL;
+ spin_unlock_irqrestore(&r8a66597->lock, flags);
return;
}
- spin_lock_irqsave(&r8a66597->lock, flags);
pipe_stop(r8a66597, pipe);
pipe_irq_disable(r8a66597, pipenum);
disable_irq_empty(r8a66597, pipenum);
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 61da1a22b98a..5293596577bf 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -1504,7 +1504,8 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
portsc_buf[port_index] = 0;
/* Bail out if a USB3 port has a new device in link training */
- if ((t1 & PORT_PLS_MASK) == XDEV_POLLING) {
+ if ((hcd->speed >= HCD_USB3) &&
+ (t1 & PORT_PLS_MASK) == XDEV_POLLING) {
bus_state->bus_suspended = 0;
spin_unlock_irqrestore(&xhci->lock, flags);
xhci_dbg(xhci, "Bus suspend bailout, port in polling\n");
diff --git a/drivers/usb/misc/ks_bridge.c b/drivers/usb/misc/ks_bridge.c
index dad51be0820e..c6fd30349ecc 100644
--- a/drivers/usb/misc/ks_bridge.c
+++ b/drivers/usb/misc/ks_bridge.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2014, 2017-2018, Linux Foundation. All rights reserved.
+ * Copyright (c) 2012-2014, 2017-2019, Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -700,6 +700,7 @@ ksb_usb_probe(struct usb_interface *ifc, const struct usb_device_id *id)
case 0x9025:
case 0x9091:
case 0x901D:
+ case 0x901F:
/* 1-1 mapping between ksb and udev port which starts with 1 */
ksb_port_num = udev->portnum - 1;
dev_dbg(&udev->dev, "ifc_count: %u, port_num:%u\n", ifc_count,
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 2b81939fecd7..7bc2c9fef605 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1163,6 +1163,10 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214),
.driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) },
+ { USB_DEVICE(TELIT_VENDOR_ID, 0x1900), /* Telit LN940 (QMI) */
+ .driver_info = NCTRL(0) | RSVD(1) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff), /* Telit LN940 (MBIM) */
+ .driver_info = NCTRL(0) },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff),
.driver_info = RSVD(1) },
@@ -1327,6 +1331,7 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(4) },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0414, 0xff, 0xff, 0xff) },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0417, 0xff, 0xff, 0xff) },
+ { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x0602, 0xff) }, /* GosunCn ZTE WeLink ME3630 (MBIM mode) */
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1008, 0xff, 0xff, 0xff),
.driver_info = RSVD(4) },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1010, 0xff, 0xff, 0xff),
@@ -1530,6 +1535,7 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(2) },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1428, 0xff, 0xff, 0xff), /* Telewell TW-LTE 4G v2 */
.driver_info = RSVD(2) },
+ { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) },
@@ -1757,6 +1763,7 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) },
{ USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E),
.driver_info = RSVD(5) | RSVD(6) },
+ { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9003, 0xff) }, /* Simcom SIM7500/SIM7600 MBIM mode */
{ USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200),
.driver_info = NCTRL(0) | NCTRL(1) | RSVD(4) },
{ USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X220_X500D),
@@ -1941,7 +1948,18 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) },
{ USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) },
{ USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) },
- { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x10) }, /* HP lt4132 (Huawei ME906s-158) */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x12) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x13) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x14) },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x1b) },
+ { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 */
+ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) },
+ { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */
+ .driver_info = RSVD(4) | RSVD(5) },
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */
+ .driver_info = RSVD(6) },
{ } /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, option_ids);
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 3da25ad267a2..4966768d3c98 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -86,9 +86,14 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(YCCABLE_VENDOR_ID, YCCABLE_PRODUCT_ID) },
{ USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_LD220TA_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_LD960TA_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) },
{ USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) },
{ USB_DEVICE(ZEAGLE_VENDOR_ID, ZEAGLE_N2ITION3_PRODUCT_ID) },
{ USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) },
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index 123289085ee2..a84f0959ab34 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -123,10 +123,15 @@
/* Hewlett-Packard POS Pole Displays */
#define HP_VENDOR_ID 0x03f0
+#define HP_LM920_PRODUCT_ID 0x026b
+#define HP_TD620_PRODUCT_ID 0x0956
#define HP_LD960_PRODUCT_ID 0x0b39
#define HP_LCM220_PRODUCT_ID 0x3139
#define HP_LCM960_PRODUCT_ID 0x3239
#define HP_LD220_PRODUCT_ID 0x3524
+#define HP_LD220TA_PRODUCT_ID 0x4349
+#define HP_LD960TA_PRODUCT_ID 0x4439
+#define HP_LM940_PRODUCT_ID 0x5039
/* Cressi Edy (diving computer) PC interface */
#define CRESSI_VENDOR_ID 0x04b8
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 6c186b4df94a..b3344a77dcce 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -223,8 +223,12 @@ static int slave_configure(struct scsi_device *sdev)
if (!(us->fflags & US_FL_NEEDS_CAP16))
sdev->try_rc_10_first = 1;
- /* assume SPC3 or latter devices support sense size > 18 */
- if (sdev->scsi_level > SCSI_SPC_2)
+ /*
+ * assume SPC3 or latter devices support sense size > 18
+ * unless US_FL_BAD_SENSE quirk is specified.
+ */
+ if (sdev->scsi_level > SCSI_SPC_2 &&
+ !(us->fflags & US_FL_BAD_SENSE))
us->fflags |= US_FL_SANE_SENSE;
/* USB-IDE bridges tend to report SK = 0x04 (Non-recoverable
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 898215cad351..d92b974f0635 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1393,6 +1393,18 @@ UNUSUAL_DEV( 0x0d49, 0x7310, 0x0000, 0x9999,
US_FL_SANE_SENSE),
/*
+ * Reported by Icenowy Zheng <icenowy@aosc.io>
+ * The SMI SM3350 USB-UFS bridge controller will enter a wrong state
+ * that do not process read/write command if a long sense is requested,
+ * so force to use 18-byte sense.
+ */
+UNUSUAL_DEV( 0x090c, 0x3350, 0x0000, 0xffff,
+ "SMI",
+ "SM3350 UFS-to-USB-Mass-Storage bridge",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_BAD_SENSE ),
+
+/*
* Pete Zaitcev <zaitcev@yahoo.com>, bz#164688.
* The device blatantly ignores LUN and returns 1 in GetMaxLUN.
*/
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 533eaf04f12f..40764ecad9ce 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -2,7 +2,6 @@ config VHOST_NET
tristate "Host kernel accelerator for virtio net"
depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP)
select VHOST
- select VHOST_RING
---help---
This kernel module can be loaded in host kernel to accelerate
guest networking with virtio_net. Not to be confused with virtio_net
@@ -15,17 +14,24 @@ config VHOST_SCSI
tristate "VHOST_SCSI TCM fabric driver"
depends on TARGET_CORE && EVENTFD && m
select VHOST
- select VHOST_RING
default n
---help---
Say M here to enable the vhost_scsi TCM fabric module
for use with virtio-scsi guests
-config VHOST_RING
- tristate
+config VHOST_VSOCK
+ tristate "vhost virtio-vsock driver"
+ depends on VSOCKETS && EVENTFD
+ select VIRTIO_VSOCKETS_COMMON
+ select VHOST
+ default n
---help---
- This option is selected by any driver which needs to access
- the host side of a virtio ring.
+ This kernel module can be loaded in the host kernel to provide AF_VSOCK
+ sockets for communicating with guests. The guests must have the
+ virtio_transport.ko driver loaded to use the virtio-vsock device.
+
+ To compile this driver as a module, choose M here: the module will be called
+ vhost_vsock.
config VHOST
tristate
diff --git a/drivers/vhost/Kconfig.vringh b/drivers/vhost/Kconfig.vringh
new file mode 100644
index 000000000000..6a4490c09d7f
--- /dev/null
+++ b/drivers/vhost/Kconfig.vringh
@@ -0,0 +1,5 @@
+config VHOST_RING
+ tristate
+ ---help---
+ This option is selected by any driver which needs to access
+ the host side of a virtio ring.
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
index e0441c34db1c..6b012b986b57 100644
--- a/drivers/vhost/Makefile
+++ b/drivers/vhost/Makefile
@@ -4,5 +4,9 @@ vhost_net-y := net.o
obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
vhost_scsi-y := scsi.o
+obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o
+vhost_vsock-y := vsock.o
+
obj-$(CONFIG_VHOST_RING) += vringh.o
+
obj-$(CONFIG_VHOST) += vhost.o
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 645b2197930e..53cf130922f3 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -61,7 +61,8 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
enum {
VHOST_NET_FEATURES = VHOST_FEATURES |
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
- (1ULL << VIRTIO_NET_F_MRG_RXBUF)
+ (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
+ (1ULL << VIRTIO_F_IOMMU_PLATFORM)
};
enum {
@@ -287,6 +288,69 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
rcu_read_unlock_bh();
}
+static inline unsigned long busy_clock(void)
+{
+ return local_clock() >> 10;
+}
+
+static bool vhost_can_busy_poll(struct vhost_dev *dev,
+ unsigned long endtime)
+{
+ return likely(!need_resched()) &&
+ likely(!time_after(busy_clock(), endtime)) &&
+ likely(!signal_pending(current)) &&
+ !vhost_has_work(dev);
+}
+
+static void vhost_net_disable_vq(struct vhost_net *n,
+ struct vhost_virtqueue *vq)
+{
+ struct vhost_net_virtqueue *nvq =
+ container_of(vq, struct vhost_net_virtqueue, vq);
+ struct vhost_poll *poll = n->poll + (nvq - n->vqs);
+ if (!vq->private_data)
+ return;
+ vhost_poll_stop(poll);
+}
+
+static int vhost_net_enable_vq(struct vhost_net *n,
+ struct vhost_virtqueue *vq)
+{
+ struct vhost_net_virtqueue *nvq =
+ container_of(vq, struct vhost_net_virtqueue, vq);
+ struct vhost_poll *poll = n->poll + (nvq - n->vqs);
+ struct socket *sock;
+
+ sock = vq->private_data;
+ if (!sock)
+ return 0;
+
+ return vhost_poll_start(poll, sock->file);
+}
+
+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
+ struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num)
+{
+ unsigned long uninitialized_var(endtime);
+ int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+ out_num, in_num, NULL, NULL);
+
+ if (r == vq->num && vq->busyloop_timeout) {
+ preempt_disable();
+ endtime = busy_clock() + vq->busyloop_timeout;
+ while (vhost_can_busy_poll(vq->dev, endtime) &&
+ vhost_vq_avail_empty(vq->dev, vq))
+ cpu_relax_lowlatency();
+ preempt_enable();
+ r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+ out_num, in_num, NULL, NULL);
+ }
+
+ return r;
+}
+
/* Expects to be always run from workqueue - which acts as
* read-size critical section for our kind of RCU. */
static void handle_tx(struct vhost_net *net)
@@ -314,6 +378,9 @@ static void handle_tx(struct vhost_net *net)
if (!sock)
goto out;
+ if (!vq_iotlb_prefetch(vq))
+ goto out;
+
vhost_disable_notify(&net->dev, vq);
hdr_size = nvq->vhost_hlen;
@@ -331,10 +398,9 @@ static void handle_tx(struct vhost_net *net)
% UIO_MAXIOV == nvq->done_idx))
break;
- head = vhost_get_vq_desc(vq, vq->iov,
- ARRAY_SIZE(vq->iov),
- &out, &in,
- NULL, NULL);
+ head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -435,6 +501,43 @@ static int peek_head_len(struct sock *sk)
return len;
}
+static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
+{
+ struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+ struct vhost_virtqueue *vq = &nvq->vq;
+ unsigned long uninitialized_var(endtime);
+ int len = peek_head_len(sk);
+
+ if (!len && vq->busyloop_timeout) {
+ /* Both tx vq and rx socket were polled here */
+ mutex_lock_nested(&vq->mutex, 1);
+ vhost_disable_notify(&net->dev, vq);
+
+ preempt_disable();
+ endtime = busy_clock() + vq->busyloop_timeout;
+
+ while (vhost_can_busy_poll(&net->dev, endtime) &&
+ skb_queue_empty(&sk->sk_receive_queue) &&
+ vhost_vq_avail_empty(&net->dev, vq))
+ cpu_relax_lowlatency();
+
+ preempt_enable();
+
+ if (!vhost_vq_avail_empty(&net->dev, vq))
+ vhost_poll_queue(&vq->poll);
+ else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+ vhost_disable_notify(&net->dev, vq);
+ vhost_poll_queue(&vq->poll);
+ }
+
+ mutex_unlock(&vq->mutex);
+
+ len = peek_head_len(sk);
+ }
+
+ return len;
+}
+
/* This is a multi-buffer version of vhost_get_desc, that works if
* vq has read descriptors only.
* @vq - the relevant virtqueue
@@ -540,11 +643,16 @@ static void handle_rx(struct vhost_net *net)
struct iov_iter fixup;
__virtio16 num_buffers;
- mutex_lock(&vq->mutex);
+ mutex_lock_nested(&vq->mutex, 0);
sock = vq->private_data;
if (!sock)
goto out;
+
+ if (!vq_iotlb_prefetch(vq))
+ goto out;
+
vhost_disable_notify(&net->dev, vq);
+ vhost_net_disable_vq(net, vq);
vhost_hlen = nvq->vhost_hlen;
sock_hlen = nvq->sock_hlen;
@@ -553,7 +661,7 @@ static void handle_rx(struct vhost_net *net)
vq->log : NULL;
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
- while ((sock_len = peek_head_len(sock->sk))) {
+ while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen;
headcount = get_rx_bufs(vq, vq->heads, vhost_len,
@@ -561,7 +669,7 @@ static void handle_rx(struct vhost_net *net)
likely(mergeable) ? UIO_MAXIOV : 1);
/* On error, stop handling until the next kick. */
if (unlikely(headcount < 0))
- break;
+ goto out;
/* On overrun, truncate and discard */
if (unlikely(headcount > UIO_MAXIOV)) {
iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
@@ -580,7 +688,7 @@ static void handle_rx(struct vhost_net *net)
}
/* Nothing new? Wait for eventfd to tell us
* they refilled. */
- break;
+ goto out;
}
/* We don't need to be notified again. */
iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
@@ -608,7 +716,7 @@ static void handle_rx(struct vhost_net *net)
&fixup) != sizeof(hdr)) {
vq_err(vq, "Unable to write vnet_hdr "
"at addr %p\n", vq->iov->iov_base);
- break;
+ goto out;
}
} else {
/* Header came from socket; we'll need to patch
@@ -624,7 +732,7 @@ static void handle_rx(struct vhost_net *net)
&fixup) != sizeof num_buffers) {
vq_err(vq, "Failed num_buffers write");
vhost_discard_vq_desc(vq, headcount);
- break;
+ goto out;
}
vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
headcount);
@@ -633,9 +741,10 @@ static void handle_rx(struct vhost_net *net)
total_len += vhost_len;
if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
vhost_poll_queue(&vq->poll);
- break;
+ goto out;
}
}
+ vhost_net_enable_vq(net, vq);
out:
mutex_unlock(&vq->mutex);
}
@@ -714,32 +823,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
return 0;
}
-static void vhost_net_disable_vq(struct vhost_net *n,
- struct vhost_virtqueue *vq)
-{
- struct vhost_net_virtqueue *nvq =
- container_of(vq, struct vhost_net_virtqueue, vq);
- struct vhost_poll *poll = n->poll + (nvq - n->vqs);
- if (!vq->private_data)
- return;
- vhost_poll_stop(poll);
-}
-
-static int vhost_net_enable_vq(struct vhost_net *n,
- struct vhost_virtqueue *vq)
-{
- struct vhost_net_virtqueue *nvq =
- container_of(vq, struct vhost_net_virtqueue, vq);
- struct vhost_poll *poll = n->poll + (nvq - n->vqs);
- struct socket *sock;
-
- sock = vq->private_data;
- if (!sock)
- return 0;
-
- return vhost_poll_start(poll, sock->file);
-}
-
static struct socket *vhost_net_stop_vq(struct vhost_net *n,
struct vhost_virtqueue *vq)
{
@@ -917,7 +1000,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
vhost_net_disable_vq(n, vq);
vq->private_data = sock;
- r = vhost_init_used(vq);
+ r = vhost_vq_init_access(vq);
if (r)
goto err_used;
r = vhost_net_enable_vq(n, vq);
@@ -969,21 +1052,21 @@ static long vhost_net_reset_owner(struct vhost_net *n)
struct socket *tx_sock = NULL;
struct socket *rx_sock = NULL;
long err;
- struct vhost_memory *memory;
+ struct vhost_umem *umem;
mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev);
if (err)
goto done;
- memory = vhost_dev_reset_owner_prepare();
- if (!memory) {
+ umem = vhost_dev_reset_owner_prepare();
+ if (!umem) {
err = -ENOMEM;
goto done;
}
vhost_net_stop(n, &tx_sock, &rx_sock);
vhost_net_flush(n);
vhost_dev_stop(&n->dev);
- vhost_dev_reset_owner(&n->dev, memory);
+ vhost_dev_reset_owner(&n->dev, umem);
vhost_net_vq_reset(n);
done:
mutex_unlock(&n->dev.mutex);
@@ -1014,10 +1097,14 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
}
mutex_lock(&n->dev.mutex);
if ((features & (1 << VHOST_F_LOG_ALL)) &&
- !vhost_log_access_ok(&n->dev)) {
- mutex_unlock(&n->dev.mutex);
- return -EFAULT;
+ !vhost_log_access_ok(&n->dev))
+ goto out_unlock;
+
+ if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) {
+ if (vhost_init_device_iotlb(&n->dev, true))
+ goto out_unlock;
}
+
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
mutex_lock(&n->vqs[i].vq.mutex);
n->vqs[i].vq.acked_features = features;
@@ -1027,6 +1114,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
}
mutex_unlock(&n->dev.mutex);
return 0;
+
+out_unlock:
+ mutex_unlock(&n->dev.mutex);
+ return -EFAULT;
}
static long vhost_net_set_owner(struct vhost_net *n)
@@ -1100,9 +1191,40 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
}
#endif
+static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct file *file = iocb->ki_filp;
+ struct vhost_net *n = file->private_data;
+ struct vhost_dev *dev = &n->dev;
+ int noblock = file->f_flags & O_NONBLOCK;
+
+ return vhost_chr_read_iter(dev, to, noblock);
+}
+
+static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb,
+ struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct vhost_net *n = file->private_data;
+ struct vhost_dev *dev = &n->dev;
+
+ return vhost_chr_write_iter(dev, from);
+}
+
+static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait)
+{
+ struct vhost_net *n = file->private_data;
+ struct vhost_dev *dev = &n->dev;
+
+ return vhost_chr_poll(file, dev, wait);
+}
+
static const struct file_operations vhost_net_fops = {
.owner = THIS_MODULE,
.release = vhost_net_release,
+ .read_iter = vhost_net_chr_read_iter,
+ .write_iter = vhost_net_chr_write_iter,
+ .poll = vhost_net_chr_poll,
.unlocked_ioctl = vhost_net_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = vhost_net_compat_ioctl,
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 8fc62a03637a..009315f006bf 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1277,7 +1277,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
vq->private_data = vs_tpg;
- vhost_init_used(vq);
+ vhost_vq_init_access(vq);
mutex_unlock(&vq->mutex);
}
ret = 0;
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index f2882ac98726..388eec4e1a90 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -196,7 +196,7 @@ static long vhost_test_run(struct vhost_test *n, int test)
oldpriv = vq->private_data;
vq->private_data = priv;
- r = vhost_init_used(&n->vqs[index]);
+ r = vhost_vq_init_access(&n->vqs[index]);
mutex_unlock(&vq->mutex);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c54d388310f0..53b1b3cfce84 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -27,6 +27,7 @@
#include <linux/cgroup.h>
#include <linux/module.h>
#include <linux/sort.h>
+#include <linux/interval_tree_generic.h>
#include <linux/nospec.h>
#include "vhost.h"
@@ -35,6 +36,10 @@ static ushort max_mem_regions = 64;
module_param(max_mem_regions, ushort, 0444);
MODULE_PARM_DESC(max_mem_regions,
"Maximum number of memory regions in memory map. (default: 64)");
+static int max_iotlb_entries = 2048;
+module_param(max_iotlb_entries, int, 0444);
+MODULE_PARM_DESC(max_iotlb_entries,
+ "Maximum number of iotlb entries. (default: 2048)");
enum {
VHOST_MEMORY_F_LOG = 0x1,
@@ -43,12 +48,26 @@ enum {
#define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num])
#define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num])
+INTERVAL_TREE_DEFINE(struct vhost_umem_node,
+ rb, __u64, __subtree_last,
+ START, LAST, , vhost_umem_interval_tree);
+
#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
-static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq)
+static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
{
vq->user_be = !virtio_legacy_is_little_endian();
}
+static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq)
+{
+ vq->user_be = true;
+}
+
+static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq)
+{
+ vq->user_be = false;
+}
+
static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
{
struct vhost_vring_state s;
@@ -63,7 +82,10 @@ static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
s.num != VHOST_VRING_BIG_ENDIAN)
return -EINVAL;
- vq->user_be = s.num;
+ if (s.num == VHOST_VRING_BIG_ENDIAN)
+ vhost_enable_cross_endian_big(vq);
+ else
+ vhost_enable_cross_endian_little(vq);
return 0;
}
@@ -92,7 +114,7 @@ static void vhost_init_is_le(struct vhost_virtqueue *vq)
vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
}
#else
-static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq)
+static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
{
}
@@ -109,11 +131,29 @@ static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx,
static void vhost_init_is_le(struct vhost_virtqueue *vq)
{
- if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
- vq->is_le = true;
+ vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1)
+ || virtio_legacy_is_little_endian();
}
#endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */
+static void vhost_reset_is_le(struct vhost_virtqueue *vq)
+{
+ vhost_init_is_le(vq);
+}
+
+struct vhost_flush_struct {
+ struct vhost_work work;
+ struct completion wait_event;
+};
+
+static void vhost_flush_work(struct vhost_work *work)
+{
+ struct vhost_flush_struct *s;
+
+ s = container_of(work, struct vhost_flush_struct, work);
+ complete(&s->wait_event);
+}
+
static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
{
@@ -138,11 +178,9 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
{
- INIT_LIST_HEAD(&work->node);
+ clear_bit(VHOST_WORK_QUEUED, &work->flags);
work->fn = fn;
init_waitqueue_head(&work->done);
- work->flushing = 0;
- work->queue_seq = work->done_seq = 0;
}
EXPORT_SYMBOL_GPL(vhost_work_init);
@@ -193,31 +231,17 @@ void vhost_poll_stop(struct vhost_poll *poll)
}
EXPORT_SYMBOL_GPL(vhost_poll_stop);
-static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
- unsigned seq)
-{
- int left;
-
- spin_lock_irq(&dev->work_lock);
- left = seq - work->done_seq;
- spin_unlock_irq(&dev->work_lock);
- return left <= 0;
-}
-
void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
{
- unsigned seq;
- int flushing;
+ struct vhost_flush_struct flush;
+
+ if (dev->worker) {
+ init_completion(&flush.wait_event);
+ vhost_work_init(&flush.work, vhost_flush_work);
- spin_lock_irq(&dev->work_lock);
- seq = work->queue_seq;
- work->flushing++;
- spin_unlock_irq(&dev->work_lock);
- wait_event(work->done, vhost_work_seq_done(dev, work, seq));
- spin_lock_irq(&dev->work_lock);
- flushing = --work->flushing;
- spin_unlock_irq(&dev->work_lock);
- BUG_ON(flushing < 0);
+ vhost_work_queue(dev, &flush.work);
+ wait_for_completion(&flush.wait_event);
+ }
}
EXPORT_SYMBOL_GPL(vhost_work_flush);
@@ -231,20 +255,27 @@ EXPORT_SYMBOL_GPL(vhost_poll_flush);
void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
{
- unsigned long flags;
+ if (!dev->worker)
+ return;
- spin_lock_irqsave(&dev->work_lock, flags);
- if (list_empty(&work->node)) {
- list_add_tail(&work->node, &dev->work_list);
- work->queue_seq++;
- spin_unlock_irqrestore(&dev->work_lock, flags);
+ if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
+ /* We can only add the work to the list after we're
+ * sure it was not in the list.
+ */
+ smp_mb();
+ llist_add(&work->node, &dev->work_list);
wake_up_process(dev->worker);
- } else {
- spin_unlock_irqrestore(&dev->work_lock, flags);
}
}
EXPORT_SYMBOL_GPL(vhost_work_queue);
+/* A lockless hint for busy polling code to exit the loop */
+bool vhost_has_work(struct vhost_dev *dev)
+{
+ return !llist_empty(&dev->work_list);
+}
+EXPORT_SYMBOL_GPL(vhost_has_work);
+
void vhost_poll_queue(struct vhost_poll *poll)
{
vhost_work_queue(poll->dev, &poll->work);
@@ -275,16 +306,18 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->call_ctx = NULL;
vq->call = NULL;
vq->log_ctx = NULL;
- vq->memory = NULL;
- vq->is_le = virtio_legacy_is_little_endian();
- vhost_vq_reset_user_be(vq);
+ vhost_reset_is_le(vq);
+ vhost_disable_cross_endian(vq);
+ vq->busyloop_timeout = 0;
+ vq->umem = NULL;
+ vq->iotlb = NULL;
}
static int vhost_worker(void *data)
{
struct vhost_dev *dev = data;
- struct vhost_work *work = NULL;
- unsigned uninitialized_var(seq);
+ struct vhost_work *work, *work_next;
+ struct llist_node *node;
mm_segment_t oldfs = get_fs();
set_fs(USER_DS);
@@ -294,35 +327,25 @@ static int vhost_worker(void *data)
/* mb paired w/ kthread_stop */
set_current_state(TASK_INTERRUPTIBLE);
- spin_lock_irq(&dev->work_lock);
- if (work) {
- work->done_seq = seq;
- if (work->flushing)
- wake_up_all(&work->done);
- }
-
if (kthread_should_stop()) {
- spin_unlock_irq(&dev->work_lock);
__set_current_state(TASK_RUNNING);
break;
}
- if (!list_empty(&dev->work_list)) {
- work = list_first_entry(&dev->work_list,
- struct vhost_work, node);
- list_del_init(&work->node);
- seq = work->queue_seq;
- } else
- work = NULL;
- spin_unlock_irq(&dev->work_lock);
- if (work) {
+ node = llist_del_all(&dev->work_list);
+ if (!node)
+ schedule();
+
+ node = llist_reverse_order(node);
+ /* make sure flag is seen after deletion */
+ smp_wmb();
+ llist_for_each_entry_safe(work, work_next, node, node) {
+ clear_bit(VHOST_WORK_QUEUED, &work->flags);
__set_current_state(TASK_RUNNING);
work->fn(work);
if (need_resched())
schedule();
- } else
- schedule();
-
+ }
}
unuse_mm(dev->mm);
set_fs(oldfs);
@@ -381,11 +404,16 @@ void vhost_dev_init(struct vhost_dev *dev,
mutex_init(&dev->mutex);
dev->log_ctx = NULL;
dev->log_file = NULL;
- dev->memory = NULL;
+ dev->umem = NULL;
+ dev->iotlb = NULL;
dev->mm = NULL;
- spin_lock_init(&dev->work_lock);
- INIT_LIST_HEAD(&dev->work_list);
dev->worker = NULL;
+ init_llist_head(&dev->work_list);
+ init_waitqueue_head(&dev->wait);
+ INIT_LIST_HEAD(&dev->read_list);
+ INIT_LIST_HEAD(&dev->pending_list);
+ spin_lock_init(&dev->iotlb_lock);
+
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
@@ -486,27 +514,36 @@ err_mm:
}
EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
-struct vhost_memory *vhost_dev_reset_owner_prepare(void)
+static void *vhost_kvzalloc(unsigned long size)
+{
+ void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+
+ if (!n)
+ n = vzalloc(size);
+ return n;
+}
+
+struct vhost_umem *vhost_dev_reset_owner_prepare(void)
{
- return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
+ return vhost_kvzalloc(sizeof(struct vhost_umem));
}
EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare);
/* Caller should have device mutex */
-void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory)
+void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem)
{
int i;
vhost_dev_cleanup(dev, true);
/* Restore memory to default empty mapping. */
- memory->nregions = 0;
- dev->memory = memory;
+ INIT_LIST_HEAD(&umem->umem_list);
+ dev->umem = umem;
/* We don't need VQ locks below since vhost_dev_cleanup makes sure
* VQs aren't running.
*/
for (i = 0; i < dev->nvqs; ++i)
- dev->vqs[i]->memory = memory;
+ dev->vqs[i]->umem = umem;
}
EXPORT_SYMBOL_GPL(vhost_dev_reset_owner);
@@ -523,6 +560,47 @@ void vhost_dev_stop(struct vhost_dev *dev)
}
EXPORT_SYMBOL_GPL(vhost_dev_stop);
+static void vhost_umem_free(struct vhost_umem *umem,
+ struct vhost_umem_node *node)
+{
+ vhost_umem_interval_tree_remove(node, &umem->umem_tree);
+ list_del(&node->link);
+ kfree(node);
+ umem->numem--;
+}
+
+static void vhost_umem_clean(struct vhost_umem *umem)
+{
+ struct vhost_umem_node *node, *tmp;
+
+ if (!umem)
+ return;
+
+ list_for_each_entry_safe(node, tmp, &umem->umem_list, link)
+ vhost_umem_free(umem, node);
+
+ kvfree(umem);
+}
+
+static void vhost_clear_msg(struct vhost_dev *dev)
+{
+ struct vhost_msg_node *node, *n;
+
+ spin_lock(&dev->iotlb_lock);
+
+ list_for_each_entry_safe(node, n, &dev->read_list, node) {
+ list_del(&node->node);
+ kfree(node);
+ }
+
+ list_for_each_entry_safe(node, n, &dev->pending_list, node) {
+ list_del(&node->node);
+ kfree(node);
+ }
+
+ spin_unlock(&dev->iotlb_lock);
+}
+
/* Caller should have device mutex if and only if locked is set */
void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
{
@@ -549,9 +627,13 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
fput(dev->log_file);
dev->log_file = NULL;
/* No one will access memory at this point */
- kvfree(dev->memory);
- dev->memory = NULL;
- WARN_ON(!list_empty(&dev->work_list));
+ vhost_umem_clean(dev->umem);
+ dev->umem = NULL;
+ vhost_umem_clean(dev->iotlb);
+ dev->iotlb = NULL;
+ vhost_clear_msg(dev);
+ wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM);
+ WARN_ON(!llist_empty(&dev->work_list));
if (dev->worker) {
kthread_stop(dev->worker);
dev->worker = NULL;
@@ -575,26 +657,34 @@ static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
(sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
}
+static bool vhost_overflow(u64 uaddr, u64 size)
+{
+ /* Make sure 64 bit math will not overflow. */
+ return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size;
+}
+
/* Caller should have vq mutex and device mutex. */
-static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem,
+static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
int log_all)
{
- int i;
+ struct vhost_umem_node *node;
- if (!mem)
+ if (!umem)
return 0;
- for (i = 0; i < mem->nregions; ++i) {
- struct vhost_memory_region *m = mem->regions + i;
- unsigned long a = m->userspace_addr;
- if (m->memory_size > ULONG_MAX)
+ list_for_each_entry(node, &umem->umem_list, link) {
+ unsigned long a = node->userspace_addr;
+
+ if (vhost_overflow(node->userspace_addr, node->size))
return 0;
- else if (!access_ok(VERIFY_WRITE, (void __user *)a,
- m->memory_size))
+
+
+ if (!access_ok(VERIFY_WRITE, (void __user *)a,
+ node->size))
return 0;
else if (log_all && !log_access_ok(log_base,
- m->guest_phys_addr,
- m->memory_size))
+ node->start,
+ node->size))
return 0;
}
return 1;
@@ -602,7 +692,7 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem,
/* Can we switch to this memory table? */
/* Caller should have device mutex but not vq mutex */
-static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
+static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
int log_all)
{
int i;
@@ -615,7 +705,8 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL);
/* If ring is inactive, will check when it's enabled. */
if (d->vqs[i]->private_data)
- ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log);
+ ok = vq_memory_access_ok(d->vqs[i]->log_base,
+ umem, log);
else
ok = 1;
mutex_unlock(&d->vqs[i]->mutex);
@@ -625,12 +716,388 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
return 1;
}
+static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
+ struct iovec iov[], int iov_size, int access);
+
+static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to,
+ const void *from, unsigned size)
+{
+ int ret;
+
+ if (!vq->iotlb)
+ return __copy_to_user(to, from, size);
+ else {
+ /* This function should be called after iotlb
+ * prefetch, which means we're sure that all vq
+ * could be access through iotlb. So -EAGAIN should
+ * not happen in this case.
+ */
+ /* TODO: more fast path */
+ struct iov_iter t;
+ ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
+ ARRAY_SIZE(vq->iotlb_iov),
+ VHOST_ACCESS_WO);
+ if (ret < 0)
+ goto out;
+ iov_iter_init(&t, WRITE, vq->iotlb_iov, ret, size);
+ ret = copy_to_iter(from, size, &t);
+ if (ret == size)
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
+ void *from, unsigned size)
+{
+ int ret;
+
+ if (!vq->iotlb)
+ return __copy_from_user(to, from, size);
+ else {
+ /* This function should be called after iotlb
+ * prefetch, which means we're sure that vq
+ * could be access through iotlb. So -EAGAIN should
+ * not happen in this case.
+ */
+ /* TODO: more fast path */
+ struct iov_iter f;
+ ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
+ ARRAY_SIZE(vq->iotlb_iov),
+ VHOST_ACCESS_RO);
+ if (ret < 0) {
+ vq_err(vq, "IOTLB translation failure: uaddr "
+ "%p size 0x%llx\n", from,
+ (unsigned long long) size);
+ goto out;
+ }
+ iov_iter_init(&f, READ, vq->iotlb_iov, ret, size);
+ ret = copy_from_iter(to, size, &f);
+ if (ret == size)
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
+
+static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
+ void *addr, unsigned size)
+{
+ int ret;
+
+ /* This function should be called after iotlb
+ * prefetch, which means we're sure that vq
+ * could be access through iotlb. So -EAGAIN should
+ * not happen in this case.
+ */
+ /* TODO: more fast path */
+ ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
+ ARRAY_SIZE(vq->iotlb_iov),
+ VHOST_ACCESS_RO);
+ if (ret < 0) {
+ vq_err(vq, "IOTLB translation failure: uaddr "
+ "%p size 0x%llx\n", addr,
+ (unsigned long long) size);
+ return NULL;
+ }
+
+ if (ret != 1 || vq->iotlb_iov[0].iov_len != size) {
+ vq_err(vq, "Non atomic userspace memory access: uaddr "
+ "%p size 0x%llx\n", addr,
+ (unsigned long long) size);
+ return NULL;
+ }
+
+ return vq->iotlb_iov[0].iov_base;
+}
+
+#define vhost_put_user(vq, x, ptr) \
+({ \
+ int ret = -EFAULT; \
+ if (!vq->iotlb) { \
+ ret = __put_user(x, ptr); \
+ } else { \
+ __typeof__(ptr) to = \
+ (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+ if (to != NULL) \
+ ret = __put_user(x, to); \
+ else \
+ ret = -EFAULT; \
+ } \
+ ret; \
+})
+
+#define vhost_get_user(vq, x, ptr) \
+({ \
+ int ret; \
+ if (!vq->iotlb) { \
+ ret = __get_user(x, ptr); \
+ } else { \
+ __typeof__(ptr) from = \
+ (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+ if (from != NULL) \
+ ret = __get_user(x, from); \
+ else \
+ ret = -EFAULT; \
+ } \
+ ret; \
+})
+
+static void vhost_dev_lock_vqs(struct vhost_dev *d)
+{
+ int i = 0;
+ for (i = 0; i < d->nvqs; ++i)
+ mutex_lock_nested(&d->vqs[i]->mutex, i);
+}
+
+static void vhost_dev_unlock_vqs(struct vhost_dev *d)
+{
+ int i = 0;
+ for (i = 0; i < d->nvqs; ++i)
+ mutex_unlock(&d->vqs[i]->mutex);
+}
+
+static int vhost_new_umem_range(struct vhost_umem *umem,
+ u64 start, u64 size, u64 end,
+ u64 userspace_addr, int perm)
+{
+ struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC);
+
+ if (!node)
+ return -ENOMEM;
+
+ if (umem->numem == max_iotlb_entries) {
+ tmp = list_first_entry(&umem->umem_list, typeof(*tmp), link);
+ vhost_umem_free(umem, tmp);
+ }
+
+ node->start = start;
+ node->size = size;
+ node->last = end;
+ node->userspace_addr = userspace_addr;
+ node->perm = perm;
+ INIT_LIST_HEAD(&node->link);
+ list_add_tail(&node->link, &umem->umem_list);
+ vhost_umem_interval_tree_insert(node, &umem->umem_tree);
+ umem->numem++;
+
+ return 0;
+}
+
+static void vhost_del_umem_range(struct vhost_umem *umem,
+ u64 start, u64 end)
+{
+ struct vhost_umem_node *node;
+
+ while ((node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
+ start, end)))
+ vhost_umem_free(umem, node);
+}
+
+static void vhost_iotlb_notify_vq(struct vhost_dev *d,
+ struct vhost_iotlb_msg *msg)
+{
+ struct vhost_msg_node *node, *n;
+
+ spin_lock(&d->iotlb_lock);
+
+ list_for_each_entry_safe(node, n, &d->pending_list, node) {
+ struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb;
+ if (msg->iova <= vq_msg->iova &&
+ msg->iova + msg->size - 1 >= vq_msg->iova &&
+ vq_msg->type == VHOST_IOTLB_MISS) {
+ vhost_poll_queue(&node->vq->poll);
+ list_del(&node->node);
+ kfree(node);
+ }
+ }
+
+ spin_unlock(&d->iotlb_lock);
+}
+
+static int umem_access_ok(u64 uaddr, u64 size, int access)
+{
+ unsigned long a = uaddr;
+
+ /* Make sure 64 bit math will not overflow. */
+ if (vhost_overflow(uaddr, size))
+ return -EFAULT;
+
+ if ((access & VHOST_ACCESS_RO) &&
+ !access_ok(VERIFY_READ, (void __user *)a, size))
+ return -EFAULT;
+ if ((access & VHOST_ACCESS_WO) &&
+ !access_ok(VERIFY_WRITE, (void __user *)a, size))
+ return -EFAULT;
+ return 0;
+}
+
+int vhost_process_iotlb_msg(struct vhost_dev *dev,
+ struct vhost_iotlb_msg *msg)
+{
+ int ret = 0;
+
+ mutex_lock(&dev->mutex);
+ vhost_dev_lock_vqs(dev);
+ switch (msg->type) {
+ case VHOST_IOTLB_UPDATE:
+ if (!dev->iotlb) {
+ ret = -EFAULT;
+ break;
+ }
+ if (umem_access_ok(msg->uaddr, msg->size, msg->perm)) {
+ ret = -EFAULT;
+ break;
+ }
+ if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size,
+ msg->iova + msg->size - 1,
+ msg->uaddr, msg->perm)) {
+ ret = -ENOMEM;
+ break;
+ }
+ vhost_iotlb_notify_vq(dev, msg);
+ break;
+ case VHOST_IOTLB_INVALIDATE:
+ vhost_del_umem_range(dev->iotlb, msg->iova,
+ msg->iova + msg->size - 1);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ vhost_dev_unlock_vqs(dev);
+ mutex_unlock(&dev->mutex);
+
+ return ret;
+}
+ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
+ struct iov_iter *from)
+{
+ struct vhost_msg_node node;
+ unsigned size = sizeof(struct vhost_msg);
+ size_t ret;
+ int err;
+
+ if (iov_iter_count(from) < size)
+ return 0;
+ ret = copy_from_iter(&node.msg, size, from);
+ if (ret != size)
+ goto done;
+
+ switch (node.msg.type) {
+ case VHOST_IOTLB_MSG:
+ err = vhost_process_iotlb_msg(dev, &node.msg.iotlb);
+ if (err)
+ ret = err;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+done:
+ return ret;
+}
+EXPORT_SYMBOL(vhost_chr_write_iter);
+
+unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev,
+ poll_table *wait)
+{
+ unsigned int mask = 0;
+
+ poll_wait(file, &dev->wait, wait);
+
+ if (!list_empty(&dev->read_list))
+ mask |= POLLIN | POLLRDNORM;
+
+ return mask;
+}
+EXPORT_SYMBOL(vhost_chr_poll);
+
+ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
+ int noblock)
+{
+ DEFINE_WAIT(wait);
+ struct vhost_msg_node *node;
+ ssize_t ret = 0;
+ unsigned size = sizeof(struct vhost_msg);
+
+ if (iov_iter_count(to) < size)
+ return 0;
+
+ while (1) {
+ if (!noblock)
+ prepare_to_wait(&dev->wait, &wait,
+ TASK_INTERRUPTIBLE);
+
+ node = vhost_dequeue_msg(dev, &dev->read_list);
+ if (node)
+ break;
+ if (noblock) {
+ ret = -EAGAIN;
+ break;
+ }
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+ if (!dev->iotlb) {
+ ret = -EBADFD;
+ break;
+ }
+
+ schedule();
+ }
+
+ if (!noblock)
+ finish_wait(&dev->wait, &wait);
+
+ if (node) {
+ ret = copy_to_iter(&node->msg, size, to);
+
+ if (ret != size || node->msg.type != VHOST_IOTLB_MISS) {
+ kfree(node);
+ return ret;
+ }
+
+ vhost_enqueue_msg(dev, &dev->pending_list, node);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vhost_chr_read_iter);
+
+static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
+{
+ struct vhost_dev *dev = vq->dev;
+ struct vhost_msg_node *node;
+ struct vhost_iotlb_msg *msg;
+
+ node = vhost_new_msg(vq, VHOST_IOTLB_MISS);
+ if (!node)
+ return -ENOMEM;
+
+ msg = &node->msg.iotlb;
+ msg->type = VHOST_IOTLB_MISS;
+ msg->iova = iova;
+ msg->perm = access;
+
+ vhost_enqueue_msg(dev, &dev->read_list, node);
+
+ return 0;
+}
+
static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
struct vring_desc __user *desc,
struct vring_avail __user *avail,
struct vring_used __user *used)
+
{
size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+
return access_ok(VERIFY_READ, desc, num * sizeof *desc) &&
access_ok(VERIFY_READ, avail,
sizeof *avail + num * sizeof *avail->ring + s) &&
@@ -638,11 +1105,59 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
sizeof *used + num * sizeof *used->ring + s);
}
+static int iotlb_access_ok(struct vhost_virtqueue *vq,
+ int access, u64 addr, u64 len)
+{
+ const struct vhost_umem_node *node;
+ struct vhost_umem *umem = vq->iotlb;
+ u64 s = 0, size;
+
+ while (len > s) {
+ node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
+ addr,
+ addr + len - 1);
+ if (node == NULL || node->start > addr) {
+ vhost_iotlb_miss(vq, addr, access);
+ return false;
+ } else if (!(node->perm & access)) {
+ /* Report the possible access violation by
+ * request another translation from userspace.
+ */
+ return false;
+ }
+
+ size = node->size - addr + node->start;
+ s += size;
+ addr += size;
+ }
+
+ return true;
+}
+
+int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
+{
+ size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+ unsigned int num = vq->num;
+
+ if (!vq->iotlb)
+ return 1;
+
+ return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
+ num * sizeof *vq->desc) &&
+ iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
+ sizeof *vq->avail +
+ num * sizeof *vq->avail->ring + s) &&
+ iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used,
+ sizeof *vq->used +
+ num * sizeof *vq->used->ring + s);
+}
+EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
+
/* Can we log writes? */
/* Caller should have device mutex but not vq mutex */
int vhost_log_access_ok(struct vhost_dev *dev)
{
- return memory_access_ok(dev, dev->memory, 1);
+ return memory_access_ok(dev, dev->umem, 1);
}
EXPORT_SYMBOL_GPL(vhost_log_access_ok);
@@ -653,7 +1168,7 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq,
{
size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
- return vq_memory_access_ok(log_base, vq->memory,
+ return vq_memory_access_ok(log_base, vq->umem,
vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
(!vq->log_used || log_access_ok(log_base, vq->log_addr,
sizeof *vq->used +
@@ -664,33 +1179,36 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq,
/* Caller should have vq mutex and device mutex */
int vhost_vq_access_ok(struct vhost_virtqueue *vq)
{
- return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used) &&
- vq_log_access_ok(vq, vq->log_base);
-}
-EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
+ if (!vq_log_access_ok(vq, vq->log_base))
+ return 0;
-static int vhost_memory_reg_sort_cmp(const void *p1, const void *p2)
-{
- const struct vhost_memory_region *r1 = p1, *r2 = p2;
- if (r1->guest_phys_addr < r2->guest_phys_addr)
+ /* Access validation occurs at prefetch time with IOTLB */
+ if (vq->iotlb)
return 1;
- if (r1->guest_phys_addr > r2->guest_phys_addr)
- return -1;
- return 0;
+
+ return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
}
+EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
-static void *vhost_kvzalloc(unsigned long size)
+static struct vhost_umem *vhost_umem_alloc(void)
{
- void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ struct vhost_umem *umem = vhost_kvzalloc(sizeof(*umem));
- if (!n)
- n = vzalloc(size);
- return n;
+ if (!umem)
+ return NULL;
+
+ umem->umem_tree = RB_ROOT;
+ umem->numem = 0;
+ INIT_LIST_HEAD(&umem->umem_list);
+
+ return umem;
}
static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
{
- struct vhost_memory mem, *newmem, *oldmem;
+ struct vhost_memory mem, *newmem;
+ struct vhost_memory_region *region;
+ struct vhost_umem *newumem, *oldumem;
unsigned long size = offsetof(struct vhost_memory, regions);
int i;
@@ -710,24 +1228,47 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
kvfree(newmem);
return -EFAULT;
}
- sort(newmem->regions, newmem->nregions, sizeof(*newmem->regions),
- vhost_memory_reg_sort_cmp, NULL);
- if (!memory_access_ok(d, newmem, 0)) {
+ newumem = vhost_umem_alloc();
+ if (!newumem) {
kvfree(newmem);
- return -EFAULT;
+ return -ENOMEM;
+ }
+
+ for (region = newmem->regions;
+ region < newmem->regions + mem.nregions;
+ region++) {
+ if (vhost_new_umem_range(newumem,
+ region->guest_phys_addr,
+ region->memory_size,
+ region->guest_phys_addr +
+ region->memory_size - 1,
+ region->userspace_addr,
+ VHOST_ACCESS_RW))
+ goto err;
}
- oldmem = d->memory;
- d->memory = newmem;
+
+ if (!memory_access_ok(d, newumem, 0))
+ goto err;
+
+ oldumem = d->umem;
+ d->umem = newumem;
/* All memory accesses are done under some VQ mutex. */
for (i = 0; i < d->nvqs; ++i) {
mutex_lock(&d->vqs[i]->mutex);
- d->vqs[i]->memory = newmem;
+ d->vqs[i]->umem = newumem;
mutex_unlock(&d->vqs[i]->mutex);
}
- kvfree(oldmem);
+
+ kvfree(newmem);
+ vhost_umem_clean(oldumem);
return 0;
+
+err:
+ vhost_umem_clean(newumem);
+ kvfree(newmem);
+ return -EFAULT;
}
long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
@@ -913,6 +1454,19 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
case VHOST_GET_VRING_ENDIAN:
r = vhost_get_vring_endian(vq, idx, argp);
break;
+ case VHOST_SET_VRING_BUSYLOOP_TIMEOUT:
+ if (copy_from_user(&s, argp, sizeof(s))) {
+ r = -EFAULT;
+ break;
+ }
+ vq->busyloop_timeout = s.num;
+ break;
+ case VHOST_GET_VRING_BUSYLOOP_TIMEOUT:
+ s.index = idx;
+ s.num = vq->busyloop_timeout;
+ if (copy_to_user(argp, &s, sizeof(s)))
+ r = -EFAULT;
+ break;
default:
r = -ENOIOCTLCMD;
}
@@ -936,6 +1490,30 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
}
EXPORT_SYMBOL_GPL(vhost_vring_ioctl);
+int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled)
+{
+ struct vhost_umem *niotlb, *oiotlb;
+ int i;
+
+ niotlb = vhost_umem_alloc();
+ if (!niotlb)
+ return -ENOMEM;
+
+ oiotlb = d->iotlb;
+ d->iotlb = niotlb;
+
+ for (i = 0; i < d->nvqs; ++i) {
+ mutex_lock(&d->vqs[i]->mutex);
+ d->vqs[i]->iotlb = niotlb;
+ mutex_unlock(&d->vqs[i]->mutex);
+ }
+
+ vhost_umem_clean(oiotlb);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vhost_init_device_iotlb);
+
/* Caller must have device mutex */
long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
{
@@ -1018,28 +1596,6 @@ done:
}
EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
-static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
- __u64 addr, __u32 len)
-{
- const struct vhost_memory_region *reg;
- int start = 0, end = mem->nregions;
-
- while (start < end) {
- int slot = start + (end - start) / 2;
- reg = mem->regions + slot;
- if (addr >= reg->guest_phys_addr)
- end = slot;
- else
- start = slot + 1;
- }
-
- reg = mem->regions + start;
- if (addr >= reg->guest_phys_addr &&
- reg->guest_phys_addr + reg->memory_size > addr)
- return reg;
- return NULL;
-}
-
/* TODO: This is really inefficient. We need something like get_user()
* (instruction directly accesses the data, with an exception table entry
* returning -EFAULT). See Documentation/x86/exception-tables.txt.
@@ -1118,7 +1674,8 @@ EXPORT_SYMBOL_GPL(vhost_log_write);
static int vhost_update_used_flags(struct vhost_virtqueue *vq)
{
void __user *used;
- if (__put_user(cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags) < 0)
+ if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
+ &vq->used->flags) < 0)
return -EFAULT;
if (unlikely(vq->log_used)) {
/* Make sure the flag is seen before log. */
@@ -1136,7 +1693,8 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq)
static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
{
- if (__put_user(cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq)))
+ if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
+ vhost_avail_event(vq)))
return -EFAULT;
if (unlikely(vq->log_used)) {
void __user *used;
@@ -1153,62 +1711,84 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
return 0;
}
-int vhost_init_used(struct vhost_virtqueue *vq)
+int vhost_vq_init_access(struct vhost_virtqueue *vq)
{
__virtio16 last_used_idx;
int r;
- if (!vq->private_data) {
- vq->is_le = virtio_legacy_is_little_endian();
+ bool is_le = vq->is_le;
+
+ if (!vq->private_data)
return 0;
- }
vhost_init_is_le(vq);
r = vhost_update_used_flags(vq);
if (r)
- return r;
+ goto err;
vq->signalled_used_valid = false;
- if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx))
- return -EFAULT;
- r = __get_user(last_used_idx, &vq->used->idx);
- if (r)
- return r;
+ if (!vq->iotlb &&
+ !access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) {
+ r = -EFAULT;
+ goto err;
+ }
+ r = vhost_get_user(vq, last_used_idx, &vq->used->idx);
+ if (r) {
+ vq_err(vq, "Can't access used idx at %p\n",
+ &vq->used->idx);
+ goto err;
+ }
vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx);
return 0;
+
+err:
+ vq->is_le = is_le;
+ return r;
}
-EXPORT_SYMBOL_GPL(vhost_init_used);
+EXPORT_SYMBOL_GPL(vhost_vq_init_access);
static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
- struct iovec iov[], int iov_size)
+ struct iovec iov[], int iov_size, int access)
{
- const struct vhost_memory_region *reg;
- struct vhost_memory *mem;
+ const struct vhost_umem_node *node;
+ struct vhost_dev *dev = vq->dev;
+ struct vhost_umem *umem = dev->iotlb ? dev->iotlb : dev->umem;
struct iovec *_iov;
u64 s = 0;
int ret = 0;
- mem = vq->memory;
while ((u64)len > s) {
u64 size;
if (unlikely(ret >= iov_size)) {
ret = -ENOBUFS;
break;
}
- reg = find_region(mem, addr, len);
- if (unlikely(!reg)) {
- ret = -EFAULT;
+
+ node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
+ addr, addr + len - 1);
+ if (node == NULL || node->start > addr) {
+ if (umem != dev->iotlb) {
+ ret = -EFAULT;
+ break;
+ }
+ ret = -EAGAIN;
+ break;
+ } else if (!(node->perm & access)) {
+ ret = -EPERM;
break;
}
+
_iov = iov + ret;
- size = reg->memory_size - addr + reg->guest_phys_addr;
+ size = node->size - addr + node->start;
_iov->iov_len = min((u64)len - s, size);
_iov->iov_base = (void __user *)(unsigned long)
- (reg->userspace_addr + addr - reg->guest_phys_addr);
+ (node->userspace_addr + addr - node->start);
s += size;
addr += size;
++ret;
}
+ if (ret == -EAGAIN)
+ vhost_iotlb_miss(vq, addr, access);
return ret;
}
@@ -1243,7 +1823,7 @@ static int get_indirect(struct vhost_virtqueue *vq,
unsigned int i = 0, count, found = 0;
u32 len = vhost32_to_cpu(vq, indirect->len);
struct iov_iter from;
- int ret;
+ int ret, access;
/* Sanity check */
if (unlikely(len % sizeof desc)) {
@@ -1255,9 +1835,10 @@ static int get_indirect(struct vhost_virtqueue *vq,
}
ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect,
- UIO_MAXIOV);
+ UIO_MAXIOV, VHOST_ACCESS_RO);
if (unlikely(ret < 0)) {
- vq_err(vq, "Translation failure %d in indirect.\n", ret);
+ if (ret != -EAGAIN)
+ vq_err(vq, "Translation failure %d in indirect.\n", ret);
return ret;
}
iov_iter_init(&from, READ, vq->indirect, ret, len);
@@ -1295,16 +1876,22 @@ static int get_indirect(struct vhost_virtqueue *vq,
return -EINVAL;
}
+ if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE))
+ access = VHOST_ACCESS_WO;
+ else
+ access = VHOST_ACCESS_RO;
+
ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr),
vhost32_to_cpu(vq, desc.len), iov + iov_count,
- iov_size - iov_count);
+ iov_size - iov_count, access);
if (unlikely(ret < 0)) {
- vq_err(vq, "Translation failure %d indirect idx %d\n",
- ret, i);
+ if (ret != -EAGAIN)
+ vq_err(vq, "Translation failure %d indirect idx %d\n",
+ ret, i);
return ret;
}
/* If this is an input descriptor, increment that count. */
- if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) {
+ if (access == VHOST_ACCESS_WO) {
*in_num += ret;
if (unlikely(log)) {
log[*log_num].addr = vhost64_to_cpu(vq, desc.addr);
@@ -1343,11 +1930,11 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
u16 last_avail_idx;
__virtio16 avail_idx;
__virtio16 ring_head;
- int ret;
+ int ret, access;
/* Check it isn't doing very strange things with descriptor numbers. */
last_avail_idx = vq->last_avail_idx;
- if (unlikely(__get_user(avail_idx, &vq->avail->idx))) {
+ if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
vq_err(vq, "Failed to access avail idx at %p\n",
&vq->avail->idx);
return -EFAULT;
@@ -1369,8 +1956,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
/* Grab the next descriptor number they're advertising, and increment
* the index we've seen. */
- if (unlikely(__get_user(ring_head,
- &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
+ if (unlikely(vhost_get_user(vq, ring_head,
+ &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
vq_err(vq, "Failed to read head: idx %d address %p\n",
last_avail_idx,
&vq->avail->ring[last_avail_idx % vq->num]);
@@ -1405,7 +1992,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
i, vq->num, head);
return -EINVAL;
}
- ret = __copy_from_user(&desc, vq->desc + i, sizeof desc);
+ ret = vhost_copy_from_user(vq, &desc, vq->desc + i,
+ sizeof desc);
if (unlikely(ret)) {
vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
i, vq->desc + i);
@@ -1416,22 +2004,28 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
out_num, in_num,
log, log_num, &desc);
if (unlikely(ret < 0)) {
- vq_err(vq, "Failure detected "
- "in indirect descriptor at idx %d\n", i);
+ if (ret != -EAGAIN)
+ vq_err(vq, "Failure detected "
+ "in indirect descriptor at idx %d\n", i);
return ret;
}
continue;
}
+ if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE))
+ access = VHOST_ACCESS_WO;
+ else
+ access = VHOST_ACCESS_RO;
ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr),
vhost32_to_cpu(vq, desc.len), iov + iov_count,
- iov_size - iov_count);
+ iov_size - iov_count, access);
if (unlikely(ret < 0)) {
- vq_err(vq, "Translation failure %d descriptor idx %d\n",
- ret, i);
+ if (ret != -EAGAIN)
+ vq_err(vq, "Translation failure %d descriptor idx %d\n",
+ ret, i);
return ret;
}
- if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) {
+ if (access == VHOST_ACCESS_WO) {
/* If this is an input descriptor,
* increment that count. */
*in_num += ret;
@@ -1493,15 +2087,15 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
start = vq->last_used_idx & (vq->num - 1);
used = vq->used->ring + start;
if (count == 1) {
- if (__put_user(heads[0].id, &used->id)) {
+ if (vhost_put_user(vq, heads[0].id, &used->id)) {
vq_err(vq, "Failed to write used id");
return -EFAULT;
}
- if (__put_user(heads[0].len, &used->len)) {
+ if (vhost_put_user(vq, heads[0].len, &used->len)) {
vq_err(vq, "Failed to write used len");
return -EFAULT;
}
- } else if (__copy_to_user(used, heads, count * sizeof *used)) {
+ } else if (vhost_copy_to_user(vq, used, heads, count * sizeof *used)) {
vq_err(vq, "Failed to write used");
return -EFAULT;
}
@@ -1545,11 +2139,14 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
/* Make sure buffer is written before we update index. */
smp_wmb();
- if (__put_user(cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx)) {
+ if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
+ &vq->used->idx)) {
vq_err(vq, "Failed to increment used idx");
return -EFAULT;
}
if (unlikely(vq->log_used)) {
+ /* Make sure used idx is seen before log. */
+ smp_wmb();
/* Log used index update. */
log_write(vq->log_base,
vq->log_addr + offsetof(struct vring_used, idx),
@@ -1577,7 +2174,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
__virtio16 flags;
- if (__get_user(flags, &vq->avail->flags)) {
+ if (vhost_get_user(vq, flags, &vq->avail->flags)) {
vq_err(vq, "Failed to get flags");
return true;
}
@@ -1591,7 +2188,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
if (unlikely(!v))
return true;
- if (__get_user(event, vhost_used_event(vq))) {
+ if (vhost_get_user(vq, event, vhost_used_event(vq))) {
vq_err(vq, "Failed to get used event idx");
return true;
}
@@ -1627,6 +2224,20 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev,
}
EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
+/* return true if we're sure that avaiable ring is empty */
+bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+ __virtio16 avail_idx;
+ int r;
+
+ r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
+ if (r)
+ return false;
+
+ return vhost16_to_cpu(vq, avail_idx) == vq->avail_idx;
+}
+EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
+
/* OK, now we need to know about added descriptors. */
bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
@@ -1654,7 +2265,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
/* They could have slipped one in as we were doing that: make
* sure it's written, then check again. */
smp_mb();
- r = __get_user(avail_idx, &vq->avail->idx);
+ r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
if (r) {
vq_err(vq, "Failed to check avail idx at %p: %d\n",
&vq->avail->idx, r);
@@ -1682,6 +2293,50 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
}
EXPORT_SYMBOL_GPL(vhost_disable_notify);
+/* Create a new message. */
+struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
+{
+ struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL);
+ if (!node)
+ return NULL;
+
+ /* Make sure all padding within the structure is initialized. */
+ memset(&node->msg, 0, sizeof node->msg);
+ node->vq = vq;
+ node->msg.type = type;
+ return node;
+}
+EXPORT_SYMBOL_GPL(vhost_new_msg);
+
+void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head,
+ struct vhost_msg_node *node)
+{
+ spin_lock(&dev->iotlb_lock);
+ list_add_tail(&node->node, head);
+ spin_unlock(&dev->iotlb_lock);
+
+ wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM);
+}
+EXPORT_SYMBOL_GPL(vhost_enqueue_msg);
+
+struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
+ struct list_head *head)
+{
+ struct vhost_msg_node *node = NULL;
+
+ spin_lock(&dev->iotlb_lock);
+ if (!list_empty(head)) {
+ node = list_first_entry(head, struct vhost_msg_node,
+ node);
+ list_del(&node->node);
+ }
+ spin_unlock(&dev->iotlb_lock);
+
+ return node;
+}
+EXPORT_SYMBOL_GPL(vhost_dequeue_msg);
+
+
static int __init vhost_init(void)
{
return 0;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index d3f767448a72..78f3c5fc02e4 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -15,13 +15,15 @@
struct vhost_work;
typedef void (*vhost_work_fn_t)(struct vhost_work *work);
+#define VHOST_WORK_QUEUED 1
struct vhost_work {
- struct list_head node;
+ struct llist_node node;
vhost_work_fn_t fn;
wait_queue_head_t done;
int flushing;
unsigned queue_seq;
unsigned done_seq;
+ unsigned long flags;
};
/* Poll a file (eventfd or socket) */
@@ -37,6 +39,7 @@ struct vhost_poll {
void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
+bool vhost_has_work(struct vhost_dev *dev);
void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
unsigned long mask, struct vhost_dev *dev);
@@ -52,6 +55,27 @@ struct vhost_log {
u64 len;
};
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+
+struct vhost_umem_node {
+ struct rb_node rb;
+ struct list_head link;
+ __u64 start;
+ __u64 last;
+ __u64 size;
+ __u64 userspace_addr;
+ __u32 perm;
+ __u32 flags_padding;
+ __u64 __subtree_last;
+};
+
+struct vhost_umem {
+ struct rb_root umem_tree;
+ struct list_head umem_list;
+ int numem;
+};
+
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
@@ -97,10 +121,12 @@ struct vhost_virtqueue {
u64 log_addr;
struct iovec iov[UIO_MAXIOV];
+ struct iovec iotlb_iov[64];
struct iovec *indirect;
struct vring_used_elem *heads;
/* Protected by virtqueue mutex. */
- struct vhost_memory *memory;
+ struct vhost_umem *umem;
+ struct vhost_umem *iotlb;
void *private_data;
u64 acked_features;
/* Log write descriptors */
@@ -114,27 +140,38 @@ struct vhost_virtqueue {
/* Ring endianness requested by userspace for cross-endian support. */
bool user_be;
#endif
+ u32 busyloop_timeout;
+};
+
+struct vhost_msg_node {
+ struct vhost_msg msg;
+ struct vhost_virtqueue *vq;
+ struct list_head node;
};
struct vhost_dev {
- struct vhost_memory *memory;
struct mm_struct *mm;
struct mutex mutex;
struct vhost_virtqueue **vqs;
int nvqs;
struct file *log_file;
struct eventfd_ctx *log_ctx;
- spinlock_t work_lock;
- struct list_head work_list;
+ struct llist_head work_list;
struct task_struct *worker;
+ struct vhost_umem *umem;
+ struct vhost_umem *iotlb;
+ spinlock_t iotlb_lock;
+ struct list_head read_list;
+ struct list_head pending_list;
+ wait_queue_head_t wait;
};
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
long vhost_dev_set_owner(struct vhost_dev *dev);
bool vhost_dev_has_owner(struct vhost_dev *dev);
long vhost_dev_check_owner(struct vhost_dev *);
-struct vhost_memory *vhost_dev_reset_owner_prepare(void);
-void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *);
+struct vhost_umem *vhost_dev_reset_owner_prepare(void);
+void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *);
void vhost_dev_cleanup(struct vhost_dev *, bool locked);
void vhost_dev_stop(struct vhost_dev *);
long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
@@ -148,7 +185,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
struct vhost_log *log, unsigned int *log_num);
void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
-int vhost_init_used(struct vhost_virtqueue *);
+int vhost_vq_init_access(struct vhost_virtqueue *);
int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
unsigned count);
@@ -158,10 +195,26 @@ void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
struct vring_used_elem *heads, unsigned count);
void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
+bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);
bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
unsigned int log_num, u64 len);
+int vq_iotlb_prefetch(struct vhost_virtqueue *vq);
+
+struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);
+void vhost_enqueue_msg(struct vhost_dev *dev,
+ struct list_head *head,
+ struct vhost_msg_node *node);
+struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
+ struct list_head *head);
+unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev,
+ poll_table *wait);
+ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
+ int noblock);
+ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
+ struct iov_iter *from);
+int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
#define vq_err(vq, fmt, ...) do { \
pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
new file mode 100644
index 000000000000..72e914de473e
--- /dev/null
+++ b/drivers/vhost/vsock.c
@@ -0,0 +1,797 @@
+/*
+ * vhost transport for vsock
+ *
+ * Copyright (C) 2013-2015 Red Hat, Inc.
+ * Author: Asias He <asias@redhat.com>
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include <linux/miscdevice.h>
+#include <linux/atomic.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/vmalloc.h>
+#include <net/sock.h>
+#include <linux/virtio_vsock.h>
+#include <linux/vhost.h>
+#include <linux/hashtable.h>
+
+#include <net/af_vsock.h>
+#include "vhost.h"
+
+#define VHOST_VSOCK_DEFAULT_HOST_CID 2
+
+enum {
+ VHOST_VSOCK_FEATURES = VHOST_FEATURES,
+};
+
+/* Used to track all the vhost_vsock instances on the system. */
+static DEFINE_SPINLOCK(vhost_vsock_lock);
+static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
+
+struct vhost_vsock {
+ struct vhost_dev dev;
+ struct vhost_virtqueue vqs[2];
+
+ /* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */
+ struct hlist_node hash;
+
+ struct vhost_work send_pkt_work;
+ spinlock_t send_pkt_list_lock;
+ struct list_head send_pkt_list; /* host->guest pending packets */
+
+ atomic_t queued_replies;
+
+ u32 guest_cid;
+};
+
+static u32 vhost_transport_get_local_cid(void)
+{
+ return VHOST_VSOCK_DEFAULT_HOST_CID;
+}
+
+/* Callers that dereference the return value must hold vhost_vsock_lock or the
+ * RCU read lock.
+ */
+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
+{
+ struct vhost_vsock *vsock;
+
+ hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
+ u32 other_cid = vsock->guest_cid;
+
+ /* Skip instances that have no CID yet */
+ if (other_cid == 0)
+ continue;
+
+ if (other_cid == guest_cid) {
+ return vsock;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
+ struct vhost_virtqueue *vq)
+{
+ struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
+ bool added = false;
+ bool restart_tx = false;
+
+ mutex_lock(&vq->mutex);
+
+ if (!vq->private_data)
+ goto out;
+
+ /* Avoid further vmexits, we're already processing the virtqueue */
+ vhost_disable_notify(&vsock->dev, vq);
+
+ for (;;) {
+ struct virtio_vsock_pkt *pkt;
+ struct iov_iter iov_iter;
+ unsigned out, in;
+ size_t nbytes;
+ size_t len;
+ int head;
+
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ if (list_empty(&vsock->send_pkt_list)) {
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+ vhost_enable_notify(&vsock->dev, vq);
+ break;
+ }
+
+ pkt = list_first_entry(&vsock->send_pkt_list,
+ struct virtio_vsock_pkt, list);
+ list_del_init(&pkt->list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+ &out, &in, NULL, NULL);
+ if (head < 0) {
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ list_add(&pkt->list, &vsock->send_pkt_list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+ break;
+ }
+
+ if (head == vq->num) {
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ list_add(&pkt->list, &vsock->send_pkt_list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ /* We cannot finish yet if more buffers snuck in while
+ * re-enabling notify.
+ */
+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
+ vhost_disable_notify(&vsock->dev, vq);
+ continue;
+ }
+ break;
+ }
+
+ if (out) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Expected 0 output buffers, got %u\n", out);
+ break;
+ }
+
+ len = iov_length(&vq->iov[out], in);
+ iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
+
+ nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
+ if (nbytes != sizeof(pkt->hdr)) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Faulted on copying pkt hdr\n");
+ break;
+ }
+
+ nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter);
+ if (nbytes != pkt->len) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Faulted on copying pkt buf\n");
+ break;
+ }
+
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+ added = true;
+
+ if (pkt->reply) {
+ int val;
+
+ val = atomic_dec_return(&vsock->queued_replies);
+
+ /* Do we have resources to resume tx processing? */
+ if (val + 1 == tx_vq->num)
+ restart_tx = true;
+ }
+
+ virtio_transport_free_pkt(pkt);
+ }
+ if (added)
+ vhost_signal(&vsock->dev, vq);
+
+out:
+ mutex_unlock(&vq->mutex);
+
+ if (restart_tx)
+ vhost_poll_queue(&tx_vq->poll);
+}
+
+static void vhost_transport_send_pkt_work(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq;
+ struct vhost_vsock *vsock;
+
+ vsock = container_of(work, struct vhost_vsock, send_pkt_work);
+ vq = &vsock->vqs[VSOCK_VQ_RX];
+
+ vhost_transport_do_send_pkt(vsock, vq);
+}
+
+static int
+vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
+{
+ struct vhost_vsock *vsock;
+ struct vhost_virtqueue *vq;
+ int len = pkt->len;
+
+ rcu_read_lock();
+
+ /* Find the vhost_vsock according to guest context id */
+ vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
+ if (!vsock) {
+ rcu_read_unlock();
+ virtio_transport_free_pkt(pkt);
+ return -ENODEV;
+ }
+
+ vq = &vsock->vqs[VSOCK_VQ_RX];
+
+ if (pkt->reply)
+ atomic_inc(&vsock->queued_replies);
+
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ list_add_tail(&pkt->list, &vsock->send_pkt_list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
+
+ rcu_read_unlock();
+ return len;
+}
+
+static int
+vhost_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+ struct vhost_vsock *vsock;
+ struct virtio_vsock_pkt *pkt, *n;
+ int cnt = 0;
+ int ret = -ENODEV;
+ LIST_HEAD(freeme);
+
+ rcu_read_lock();
+
+ /* Find the vhost_vsock according to guest context id */
+ vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
+ if (!vsock)
+ goto out;
+
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+ if (pkt->vsk != vsk)
+ continue;
+ list_move(&pkt->list, &freeme);
+ }
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ list_for_each_entry_safe(pkt, n, &freeme, list) {
+ if (pkt->reply)
+ cnt++;
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+
+ if (cnt) {
+ struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
+ int new_cnt;
+
+ new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
+ if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
+ vhost_poll_queue(&tx_vq->poll);
+ }
+
+ ret = 0;
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
+static struct virtio_vsock_pkt *
+vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
+ unsigned int out, unsigned int in)
+{
+ struct virtio_vsock_pkt *pkt;
+ struct iov_iter iov_iter;
+ size_t nbytes;
+ size_t len;
+
+ if (in != 0) {
+ vq_err(vq, "Expected 0 input buffers, got %u\n", in);
+ return NULL;
+ }
+
+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+ if (!pkt)
+ return NULL;
+
+ len = iov_length(vq->iov, out);
+ iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
+
+ nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
+ if (nbytes != sizeof(pkt->hdr)) {
+ vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
+ sizeof(pkt->hdr), nbytes);
+ kfree(pkt);
+ return NULL;
+ }
+
+ if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
+ pkt->len = le32_to_cpu(pkt->hdr.len);
+
+ /* No payload */
+ if (!pkt->len)
+ return pkt;
+
+ /* The pkt is too big */
+ if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
+ kfree(pkt);
+ return NULL;
+ }
+
+ pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
+ if (!pkt->buf) {
+ kfree(pkt);
+ return NULL;
+ }
+
+ nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
+ if (nbytes != pkt->len) {
+ vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
+ pkt->len, nbytes);
+ virtio_transport_free_pkt(pkt);
+ return NULL;
+ }
+
+ return pkt;
+}
+
+/* Is there space left for replies to rx packets? */
+static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
+{
+ struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
+ int val;
+
+ smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
+ val = atomic_read(&vsock->queued_replies);
+
+ return val < vq->num;
+}
+
+static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+ poll.work);
+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
+ dev);
+ struct virtio_vsock_pkt *pkt;
+ int head;
+ unsigned int out, in;
+ bool added = false;
+
+ mutex_lock(&vq->mutex);
+
+ if (!vq->private_data)
+ goto out;
+
+ vhost_disable_notify(&vsock->dev, vq);
+ for (;;) {
+ u32 len;
+
+ if (!vhost_vsock_more_replies(vsock)) {
+ /* Stop tx until the device processes already
+ * pending replies. Leave tx virtqueue
+ * callbacks disabled.
+ */
+ goto no_more_replies;
+ }
+
+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+ &out, &in, NULL, NULL);
+ if (head < 0)
+ break;
+
+ if (head == vq->num) {
+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
+ vhost_disable_notify(&vsock->dev, vq);
+ continue;
+ }
+ break;
+ }
+
+ pkt = vhost_vsock_alloc_pkt(vq, out, in);
+ if (!pkt) {
+ vq_err(vq, "Faulted on pkt\n");
+ continue;
+ }
+
+ len = pkt->len;
+
+ /* Only accept correctly addressed packets */
+ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
+ virtio_transport_recv_pkt(pkt);
+ else
+ virtio_transport_free_pkt(pkt);
+
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
+ added = true;
+ }
+
+no_more_replies:
+ if (added)
+ vhost_signal(&vsock->dev, vq);
+
+out:
+ mutex_unlock(&vq->mutex);
+}
+
+static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+ poll.work);
+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
+ dev);
+
+ vhost_transport_do_send_pkt(vsock, vq);
+}
+
+static int vhost_vsock_start(struct vhost_vsock *vsock)
+{
+ struct vhost_virtqueue *vq;
+ size_t i;
+ int ret;
+
+ mutex_lock(&vsock->dev.mutex);
+
+ ret = vhost_dev_check_owner(&vsock->dev);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+
+ if (!vhost_vq_access_ok(vq)) {
+ ret = -EFAULT;
+ goto err_vq;
+ }
+
+ if (!vq->private_data) {
+ vq->private_data = vsock;
+ ret = vhost_vq_init_access(vq);
+ if (ret)
+ goto err_vq;
+ }
+
+ mutex_unlock(&vq->mutex);
+ }
+
+ mutex_unlock(&vsock->dev.mutex);
+ return 0;
+
+err_vq:
+ vq->private_data = NULL;
+ mutex_unlock(&vq->mutex);
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+ vq->private_data = NULL;
+ mutex_unlock(&vq->mutex);
+ }
+err:
+ mutex_unlock(&vsock->dev.mutex);
+ return ret;
+}
+
+static int vhost_vsock_stop(struct vhost_vsock *vsock)
+{
+ size_t i;
+ int ret;
+
+ mutex_lock(&vsock->dev.mutex);
+
+ ret = vhost_dev_check_owner(&vsock->dev);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ struct vhost_virtqueue *vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+ vq->private_data = NULL;
+ mutex_unlock(&vq->mutex);
+ }
+
+err:
+ mutex_unlock(&vsock->dev.mutex);
+ return ret;
+}
+
+static void vhost_vsock_free(struct vhost_vsock *vsock)
+{
+ kvfree(vsock);
+}
+
+static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
+{
+ struct vhost_virtqueue **vqs;
+ struct vhost_vsock *vsock;
+ int ret;
+
+ /* This struct is large and allocation could fail, fall back to vmalloc
+ * if there is no other way.
+ */
+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ if (!vsock) {
+ vsock = vmalloc(sizeof(*vsock));
+ if (!vsock)
+ return -ENOMEM;
+ }
+
+ vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
+ if (!vqs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vsock->guest_cid = 0; /* no CID assigned yet */
+
+ atomic_set(&vsock->queued_replies, 0);
+
+ vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
+ vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
+ vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
+ vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
+
+ vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs));
+
+ file->private_data = vsock;
+ spin_lock_init(&vsock->send_pkt_list_lock);
+ INIT_LIST_HEAD(&vsock->send_pkt_list);
+ vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
+ return 0;
+
+out:
+ vhost_vsock_free(vsock);
+ return ret;
+}
+
+static void vhost_vsock_flush(struct vhost_vsock *vsock)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
+ if (vsock->vqs[i].handle_kick)
+ vhost_poll_flush(&vsock->vqs[i].poll);
+ vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
+}
+
+static void vhost_vsock_reset_orphans(struct sock *sk)
+{
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ /* vmci_transport.c doesn't take sk_lock here either. At least we're
+ * under vsock_table_lock so the sock cannot disappear while we're
+ * executing.
+ */
+
+ /* If the peer is still valid, no need to reset connection */
+ if (vhost_vsock_get(vsk->remote_addr.svm_cid))
+ return;
+
+ /* If the close timeout is pending, let it expire. This avoids races
+ * with the timeout callback.
+ */
+ if (vsk->close_work_scheduled)
+ return;
+
+ sock_set_flag(sk, SOCK_DONE);
+ vsk->peer_shutdown = SHUTDOWN_MASK;
+ sk->sk_state = SS_UNCONNECTED;
+ sk->sk_err = ECONNRESET;
+ sk->sk_error_report(sk);
+}
+
+static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
+{
+ struct vhost_vsock *vsock = file->private_data;
+
+ spin_lock_bh(&vhost_vsock_lock);
+ if (vsock->guest_cid)
+ hash_del_rcu(&vsock->hash);
+ spin_unlock_bh(&vhost_vsock_lock);
+
+ /* Wait for other CPUs to finish using vsock */
+ synchronize_rcu();
+
+ /* Iterating over all connections for all CIDs to find orphans is
+ * inefficient. Room for improvement here. */
+ vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
+
+ vhost_vsock_stop(vsock);
+ vhost_vsock_flush(vsock);
+ vhost_dev_stop(&vsock->dev);
+
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ while (!list_empty(&vsock->send_pkt_list)) {
+ struct virtio_vsock_pkt *pkt;
+
+ pkt = list_first_entry(&vsock->send_pkt_list,
+ struct virtio_vsock_pkt, list);
+ list_del_init(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ vhost_dev_cleanup(&vsock->dev, false);
+ kfree(vsock->dev.vqs);
+ vhost_vsock_free(vsock);
+ return 0;
+}
+
+static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
+{
+ struct vhost_vsock *other;
+
+ /* Refuse reserved CIDs */
+ if (guest_cid <= VMADDR_CID_HOST ||
+ guest_cid == U32_MAX)
+ return -EINVAL;
+
+ /* 64-bit CIDs are not yet supported */
+ if (guest_cid > U32_MAX)
+ return -EINVAL;
+
+ /* Refuse if CID is already in use */
+ spin_lock_bh(&vhost_vsock_lock);
+ other = vhost_vsock_get(guest_cid);
+ if (other && other != vsock) {
+ spin_unlock_bh(&vhost_vsock_lock);
+ return -EADDRINUSE;
+ }
+
+ if (vsock->guest_cid)
+ hash_del_rcu(&vsock->hash);
+
+ vsock->guest_cid = guest_cid;
+ hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid);
+ spin_unlock_bh(&vhost_vsock_lock);
+
+ return 0;
+}
+
+static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
+{
+ struct vhost_virtqueue *vq;
+ int i;
+
+ if (features & ~VHOST_VSOCK_FEATURES)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&vsock->dev.mutex);
+ if ((features & (1 << VHOST_F_LOG_ALL)) &&
+ !vhost_log_access_ok(&vsock->dev)) {
+ mutex_unlock(&vsock->dev.mutex);
+ return -EFAULT;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ vq = &vsock->vqs[i];
+ mutex_lock(&vq->mutex);
+ vq->acked_features = features;
+ mutex_unlock(&vq->mutex);
+ }
+ mutex_unlock(&vsock->dev.mutex);
+ return 0;
+}
+
+static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct vhost_vsock *vsock = f->private_data;
+ void __user *argp = (void __user *)arg;
+ u64 guest_cid;
+ u64 features;
+ int start;
+ int r;
+
+ switch (ioctl) {
+ case VHOST_VSOCK_SET_GUEST_CID:
+ if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
+ return -EFAULT;
+ return vhost_vsock_set_cid(vsock, guest_cid);
+ case VHOST_VSOCK_SET_RUNNING:
+ if (copy_from_user(&start, argp, sizeof(start)))
+ return -EFAULT;
+ if (start)
+ return vhost_vsock_start(vsock);
+ else
+ return vhost_vsock_stop(vsock);
+ case VHOST_GET_FEATURES:
+ features = VHOST_VSOCK_FEATURES;
+ if (copy_to_user(argp, &features, sizeof(features)))
+ return -EFAULT;
+ return 0;
+ case VHOST_SET_FEATURES:
+ if (copy_from_user(&features, argp, sizeof(features)))
+ return -EFAULT;
+ return vhost_vsock_set_features(vsock, features);
+ default:
+ mutex_lock(&vsock->dev.mutex);
+ r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
+ if (r == -ENOIOCTLCMD)
+ r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
+ else
+ vhost_vsock_flush(vsock);
+ mutex_unlock(&vsock->dev.mutex);
+ return r;
+ }
+}
+
+static const struct file_operations vhost_vsock_fops = {
+ .owner = THIS_MODULE,
+ .open = vhost_vsock_dev_open,
+ .release = vhost_vsock_dev_release,
+ .llseek = noop_llseek,
+ .unlocked_ioctl = vhost_vsock_dev_ioctl,
+};
+
+static struct miscdevice vhost_vsock_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "vhost-vsock",
+ .fops = &vhost_vsock_fops,
+};
+
+static struct virtio_transport vhost_transport = {
+ .transport = {
+ .get_local_cid = vhost_transport_get_local_cid,
+
+ .init = virtio_transport_do_socket_init,
+ .destruct = virtio_transport_destruct,
+ .release = virtio_transport_release,
+ .connect = virtio_transport_connect,
+ .shutdown = virtio_transport_shutdown,
+ .cancel_pkt = vhost_transport_cancel_pkt,
+
+ .dgram_enqueue = virtio_transport_dgram_enqueue,
+ .dgram_dequeue = virtio_transport_dgram_dequeue,
+ .dgram_bind = virtio_transport_dgram_bind,
+ .dgram_allow = virtio_transport_dgram_allow,
+
+ .stream_enqueue = virtio_transport_stream_enqueue,
+ .stream_dequeue = virtio_transport_stream_dequeue,
+ .stream_has_data = virtio_transport_stream_has_data,
+ .stream_has_space = virtio_transport_stream_has_space,
+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
+ .stream_is_active = virtio_transport_stream_is_active,
+ .stream_allow = virtio_transport_stream_allow,
+
+ .notify_poll_in = virtio_transport_notify_poll_in,
+ .notify_poll_out = virtio_transport_notify_poll_out,
+ .notify_recv_init = virtio_transport_notify_recv_init,
+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
+ .notify_send_init = virtio_transport_notify_send_init,
+ .notify_send_pre_block = virtio_transport_notify_send_pre_block,
+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+
+ .set_buffer_size = virtio_transport_set_buffer_size,
+ .set_min_buffer_size = virtio_transport_set_min_buffer_size,
+ .set_max_buffer_size = virtio_transport_set_max_buffer_size,
+ .get_buffer_size = virtio_transport_get_buffer_size,
+ .get_min_buffer_size = virtio_transport_get_min_buffer_size,
+ .get_max_buffer_size = virtio_transport_get_max_buffer_size,
+ },
+
+ .send_pkt = vhost_transport_send_pkt,
+};
+
+static int __init vhost_vsock_init(void)
+{
+ int ret;
+
+ ret = vsock_core_init(&vhost_transport.transport);
+ if (ret < 0)
+ return ret;
+ return misc_register(&vhost_vsock_misc);
+};
+
+static void __exit vhost_vsock_exit(void)
+{
+ misc_deregister(&vhost_vsock_misc);
+ vsock_core_exit();
+};
+
+module_init(vhost_vsock_init);
+module_exit(vhost_vsock_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Asias He");
+MODULE_DESCRIPTION("vhost transport for vsock ");
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index cab9f3f63a38..77590320d44c 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -60,7 +60,7 @@ config VIRTIO_INPUT
config VIRTIO_MMIO
tristate "Platform bus driver for memory mapped virtio devices"
- depends on HAS_IOMEM
+ depends on HAS_IOMEM && HAS_DMA
select VIRTIO
---help---
This drivers provides support for memory mapped virtio
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 7d4c7f35e5cf..f77358f08930 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -401,7 +401,7 @@ static int init_vqs(struct virtio_balloon *vb)
{
struct virtqueue *vqs[3];
vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
- const char *names[] = { "inflate", "deflate", "stats" };
+ static const char * const names[] = { "inflate", "deflate", "stats" };
int err, nvqs;
/*
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index c96944b59856..350a2a5a49db 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -170,7 +170,7 @@ static int virtinput_init_vqs(struct virtio_input *vi)
struct virtqueue *vqs[2];
vq_callback_t *cbs[] = { virtinput_recv_events,
virtinput_recv_status };
- static const char *names[] = { "events", "status" };
+ static const char * const names[] = { "events", "status" };
int err;
err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names);
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index f499d9da7237..745c6ee1bb3e 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -482,7 +482,7 @@ error_available:
static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 2046a68ad0ba..f6bed86c17f9 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -296,7 +296,7 @@ void vp_del_vqs(struct virtio_device *vdev)
static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[],
+ const char * const names[],
bool use_msix,
bool per_vq_vectors)
{
@@ -376,7 +376,7 @@ error_find:
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
int err;
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index b976d968e793..2cc252270b2d 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -139,7 +139,7 @@ void vp_del_vqs(struct virtio_device *vdev);
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[]);
+ const char * const names[]);
const char *vp_bus_name(struct virtio_device *vdev);
/* Setup the affinity for a virtqueue:
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 4469202eaa8e..631021cfc740 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -423,7 +423,7 @@ err_new_queue:
static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a01a41a41269..761f28ffd40e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -24,6 +24,8 @@
#include <linux/module.h>
#include <linux/hrtimer.h>
#include <linux/kmemleak.h>
+#include <linux/dma-mapping.h>
+#include <xen/xen.h>
#ifdef DEBUG
/* For development, we want to crash whenever the ring is screwed. */
@@ -54,6 +56,11 @@
#define END_USE(vq)
#endif
+struct vring_desc_state {
+ void *data; /* Data for callback. */
+ struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
+};
+
struct vring_virtqueue {
struct virtqueue vq;
@@ -98,12 +105,131 @@ struct vring_virtqueue {
ktime_t last_add_time;
#endif
- /* Tokens for callbacks. */
- void *data[];
+ /* Per-descriptor state. */
+ struct vring_desc_state desc_state[];
};
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
+/*
+ * Modern virtio devices have feature bits to specify whether they need a
+ * quirk and bypass the IOMMU. If not there, just use the DMA API.
+ *
+ * If there, the interaction between virtio and DMA API is messy.
+ *
+ * On most systems with virtio, physical addresses match bus addresses,
+ * and it doesn't particularly matter whether we use the DMA API.
+ *
+ * On some systems, including Xen and any system with a physical device
+ * that speaks virtio behind a physical IOMMU, we must use the DMA API
+ * for virtio DMA to work at all.
+ *
+ * On other systems, including SPARC and PPC64, virtio-pci devices are
+ * enumerated as though they are behind an IOMMU, but the virtio host
+ * ignores the IOMMU, so we must either pretend that the IOMMU isn't
+ * there or somehow map everything as the identity.
+ *
+ * For the time being, we preserve historic behavior and bypass the DMA
+ * API.
+ *
+ * TODO: install a per-device DMA ops structure that does the right thing
+ * taking into account all the above quirks, and use the DMA API
+ * unconditionally on data path.
+ */
+
+static bool vring_use_dma_api(struct virtio_device *vdev)
+{
+ if (!virtio_has_iommu_quirk(vdev))
+ return true;
+
+ /* Otherwise, we are left to guess. */
+ /*
+ * In theory, it's possible to have a buggy QEMU-supposed
+ * emulated Q35 IOMMU and Xen enabled at the same time. On
+ * such a configuration, virtio has never worked and will
+ * not work without an even larger kludge. Instead, enable
+ * the DMA API if we're a Xen guest, which at least allows
+ * all of the sensible Xen configurations to work correctly.
+ */
+ if (xen_domain())
+ return true;
+
+ return false;
+}
+
+/*
+ * The DMA ops on various arches are rather gnarly right now, and
+ * making all of the arch DMA ops work on the vring device itself
+ * is a mess. For now, we use the parent device for DMA ops.
+ */
+struct device *vring_dma_dev(const struct vring_virtqueue *vq)
+{
+ return vq->vq.vdev->dev.parent;
+}
+
+/* Map one sg entry. */
+static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
+ struct scatterlist *sg,
+ enum dma_data_direction direction)
+{
+ if (!vring_use_dma_api(vq->vq.vdev))
+ return (dma_addr_t)sg_phys(sg);
+
+ /*
+ * We can't use dma_map_sg, because we don't use scatterlists in
+ * the way it expects (we don't guarantee that the scatterlist
+ * will exist for the lifetime of the mapping).
+ */
+ return dma_map_page(vring_dma_dev(vq),
+ sg_page(sg), sg->offset, sg->length,
+ direction);
+}
+
+static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
+ void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ if (!vring_use_dma_api(vq->vq.vdev))
+ return (dma_addr_t)virt_to_phys(cpu_addr);
+
+ return dma_map_single(vring_dma_dev(vq),
+ cpu_addr, size, direction);
+}
+
+static void vring_unmap_one(const struct vring_virtqueue *vq,
+ struct vring_desc *desc)
+{
+ u16 flags;
+
+ if (!vring_use_dma_api(vq->vq.vdev))
+ return;
+
+ flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
+
+ if (flags & VRING_DESC_F_INDIRECT) {
+ dma_unmap_single(vring_dma_dev(vq),
+ virtio64_to_cpu(vq->vq.vdev, desc->addr),
+ virtio32_to_cpu(vq->vq.vdev, desc->len),
+ (flags & VRING_DESC_F_WRITE) ?
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ } else {
+ dma_unmap_page(vring_dma_dev(vq),
+ virtio64_to_cpu(vq->vq.vdev, desc->addr),
+ virtio32_to_cpu(vq->vq.vdev, desc->len),
+ (flags & VRING_DESC_F_WRITE) ?
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ }
+}
+
+static int vring_mapping_error(const struct vring_virtqueue *vq,
+ dma_addr_t addr)
+{
+ if (!vring_use_dma_api(vq->vq.vdev))
+ return 0;
+
+ return dma_mapping_error(vring_dma_dev(vq), addr);
+}
+
static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
unsigned int total_sg, gfp_t gfp)
{
@@ -137,7 +263,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
struct vring_virtqueue *vq = to_vvq(_vq);
struct scatterlist *sg;
struct vring_desc *desc;
- unsigned int i, n, avail, descs_used, uninitialized_var(prev);
+ unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx;
int head;
bool indirect;
@@ -177,21 +303,15 @@ static inline int virtqueue_add(struct virtqueue *_vq,
if (desc) {
/* Use a single buffer which doesn't continue */
- vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
- vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, virt_to_phys(desc));
- /* avoid kmemleak false positive (hidden by virt_to_phys) */
- kmemleak_ignore(desc);
- vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
-
+ indirect = true;
/* Set up rest to use this indirect table. */
i = 0;
descs_used = 1;
- indirect = true;
} else {
+ indirect = false;
desc = vq->vring.desc;
i = head;
descs_used = total_sg;
- indirect = false;
}
if (vq->vq.num_free < descs_used) {
@@ -208,13 +328,14 @@ static inline int virtqueue_add(struct virtqueue *_vq,
return -ENOSPC;
}
- /* We're about to use some buffers from the free list. */
- vq->vq.num_free -= descs_used;
-
for (n = 0; n < out_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
+ if (vring_mapping_error(vq, addr))
+ goto unmap_release;
+
desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
- desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg));
+ desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
prev = i;
i = virtio16_to_cpu(_vq->vdev, desc[i].next);
@@ -222,8 +343,12 @@ static inline int virtqueue_add(struct virtqueue *_vq,
}
for (; n < (out_sgs + in_sgs); n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
+ if (vring_mapping_error(vq, addr))
+ goto unmap_release;
+
desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
- desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg));
+ desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
prev = i;
i = virtio16_to_cpu(_vq->vdev, desc[i].next);
@@ -232,14 +357,33 @@ static inline int virtqueue_add(struct virtqueue *_vq,
/* Last one doesn't continue. */
desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
+ if (indirect) {
+ /* Now that the indirect table is filled in, map it. */
+ dma_addr_t addr = vring_map_single(
+ vq, desc, total_sg * sizeof(struct vring_desc),
+ DMA_TO_DEVICE);
+ if (vring_mapping_error(vq, addr))
+ goto unmap_release;
+
+ vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
+ vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr);
+
+ vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
+ }
+
+ /* We're using some buffers from the free list. */
+ vq->vq.num_free -= descs_used;
+
/* Update free pointer */
if (indirect)
vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next);
else
vq->free_head = i;
- /* Set token. */
- vq->data[head] = data;
+ /* Store token and indirect buffer state. */
+ vq->desc_state[head].data = data;
+ if (indirect)
+ vq->desc_state[head].indir_desc = desc;
/* Put entry in available array (but don't update avail->idx until they
* do sync). */
@@ -262,6 +406,24 @@ static inline int virtqueue_add(struct virtqueue *_vq,
virtqueue_kick(_vq);
return 0;
+
+unmap_release:
+ err_idx = i;
+ i = head;
+
+ for (n = 0; n < total_sg; n++) {
+ if (i == err_idx)
+ break;
+ vring_unmap_one(vq, &desc[i]);
+ i = vq->vring.desc[i].next;
+ }
+
+ vq->vq.num_free += total_sg;
+
+ if (indirect)
+ kfree(desc);
+
+ return -EIO;
}
/**
@@ -432,27 +594,43 @@ EXPORT_SYMBOL_GPL(virtqueue_kick);
static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
{
- unsigned int i;
+ unsigned int i, j;
+ u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
/* Clear data ptr. */
- vq->data[head] = NULL;
+ vq->desc_state[head].data = NULL;
- /* Put back on free list: find end */
+ /* Put back on free list: unmap first-level descriptors and find end */
i = head;
- /* Free the indirect table */
- if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))
- kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, vq->vring.desc[i].addr)));
-
- while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT)) {
+ while (vq->vring.desc[i].flags & nextflag) {
+ vring_unmap_one(vq, &vq->vring.desc[i]);
i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
vq->vq.num_free++;
}
+ vring_unmap_one(vq, &vq->vring.desc[i]);
vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
vq->free_head = head;
+
/* Plus final descriptor */
vq->vq.num_free++;
+
+ /* Free the indirect table, if any, now that it's unmapped. */
+ if (vq->desc_state[head].indir_desc) {
+ struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
+ u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
+
+ BUG_ON(!(vq->vring.desc[head].flags &
+ cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
+ BUG_ON(len == 0 || len % sizeof(struct vring_desc));
+
+ for (j = 0; j < len / sizeof(struct vring_desc); j++)
+ vring_unmap_one(vq, &indir_desc[j]);
+
+ kfree(vq->desc_state[head].indir_desc);
+ vq->desc_state[head].indir_desc = NULL;
+ }
}
static inline bool more_used(const struct vring_virtqueue *vq)
@@ -507,13 +685,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
BAD_RING(vq, "id %u out of range\n", i);
return NULL;
}
- if (unlikely(!vq->data[i])) {
+ if (unlikely(!vq->desc_state[i].data)) {
BAD_RING(vq, "id %u is not a head!\n", i);
return NULL;
}
/* detach_buf clears data, so grab it now. */
- ret = vq->data[i];
+ ret = vq->desc_state[i].data;
detach_buf(vq, i);
vq->last_used_idx++;
/* If we expect an interrupt for the next entry, tell host
@@ -687,10 +865,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
START_USE(vq);
for (i = 0; i < vq->vring.num; i++) {
- if (!vq->data[i])
+ if (!vq->desc_state[i].data)
continue;
/* detach_buf clears data, so grab it now. */
- buf = vq->data[i];
+ buf = vq->desc_state[i].data;
detach_buf(vq, i);
vq->avail_idx_shadow--;
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
@@ -744,7 +922,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
return NULL;
}
- vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
+ vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state),
+ GFP_KERNEL);
if (!vq)
return NULL;
@@ -779,11 +958,9 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
/* Put everything in free lists. */
vq->free_head = 0;
- for (i = 0; i < num-1; i++) {
+ for (i = 0; i < num-1; i++)
vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
- vq->data[i] = NULL;
- }
- vq->data[i] = NULL;
+ memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state));
return &vq->vq;
}
@@ -809,6 +986,8 @@ void vring_transport_features(struct virtio_device *vdev)
break;
case VIRTIO_F_VERSION_1:
break;
+ case VIRTIO_F_IOMMU_PLATFORM:
+ break;
default:
/* We don't understand this bit. */
__virtio_clear_bit(vdev, i);