diff options
Diffstat (limited to 'drivers')
66 files changed, 2478 insertions, 797 deletions
diff --git a/drivers/Makefile b/drivers/Makefile index d563f5c13544..d3f690ab5b27 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -141,6 +141,7 @@ obj-$(CONFIG_OF) += of/ obj-$(CONFIG_SSB) += ssb/ obj-$(CONFIG_BCMA) += bcma/ obj-$(CONFIG_VHOST_RING) += vhost/ +obj-$(CONFIG_VHOST) += vhost/ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ obj-y += platform/ diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 1c2b846c5776..f28b4949cb9d 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -131,6 +131,23 @@ void acpi_power_resources_list_free(struct list_head *list) } } +static bool acpi_power_resource_is_dup(union acpi_object *package, + unsigned int start, unsigned int i) +{ + acpi_handle rhandle, dup; + unsigned int j; + + /* The caller is expected to check the package element types */ + rhandle = package->package.elements[i].reference.handle; + for (j = start; j < i; j++) { + dup = package->package.elements[j].reference.handle; + if (dup == rhandle) + return true; + } + + return false; +} + int acpi_extract_power_resources(union acpi_object *package, unsigned int start, struct list_head *list) { @@ -150,6 +167,11 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start, err = -ENODEV; break; } + + /* Some ACPI tables contain duplicate power resource references */ + if (acpi_power_resource_is_dup(package, start, i)) + continue; + err = acpi_add_power_resource(rhandle); if (err) break; diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c index 05813fbf3daf..647dfbbc4e1c 100644 --- a/drivers/gpio/gpio-max7301.c +++ b/drivers/gpio/gpio-max7301.c @@ -25,7 +25,7 @@ static int max7301_spi_write(struct device *dev, unsigned int reg, struct spi_device *spi = to_spi_device(dev); u16 word = ((reg & 0x7F) << 8) | (val & 0xFF); - return spi_write(spi, (const u8 *)&word, sizeof(word)); + return spi_write_then_read(spi, &word, sizeof(word), NULL, 0); } /* A read from the MAX7301 means two transfers; here, one message each */ @@ -37,14 +37,8 @@ static int max7301_spi_read(struct device *dev, unsigned int reg) struct spi_device *spi = to_spi_device(dev); word = 0x8000 | (reg << 8); - ret = spi_write(spi, (const u8 *)&word, sizeof(word)); - if (ret) - return ret; - /* - * This relies on the fact, that a transfer with NULL tx_buf shifts out - * zero bytes (=NOOP for MAX7301) - */ - ret = spi_read(spi, (u8 *)&word, sizeof(word)); + ret = spi_write_then_read(spi, &word, sizeof(word), &word, + sizeof(word)); if (ret) return ret; return word & 0xff; diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index ebb7e1d1778c..b31c02783d69 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -36,6 +36,7 @@ #include <linux/pci.h> #include <linux/export.h> +#include <linux/nospec.h> static int drm_version(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -705,13 +706,17 @@ long drm_ioctl(struct file *filp, if (is_driver_ioctl) { /* driver ioctl */ - if (nr - DRM_COMMAND_BASE >= dev->driver->num_ioctls) + unsigned int index = nr - DRM_COMMAND_BASE; + + if (index >= dev->driver->num_ioctls) goto err_i1; - ioctl = &dev->driver->ioctls[nr - DRM_COMMAND_BASE]; + index = array_index_nospec(index, dev->driver->num_ioctls); + ioctl = &dev->driver->ioctls[index]; } else { /* core ioctl */ if (nr >= DRM_CORE_IOCTL_COUNT) goto err_i1; + nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT); ioctl = &drm_ioctls[nr]; } @@ -813,6 +818,7 @@ bool drm_ioctl_flags(unsigned int nr, unsigned int *flags) if (nr >= DRM_CORE_IOCTL_COUNT) return false; + nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT); *flags = drm_ioctls[nr].flags; return true; diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 06496a128162..4150873d432e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -130,7 +130,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) static vq_callback_t *callbacks[] = { virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack }; - static const char *names[] = { "control", "cursor" }; + static const char * const names[] = { "control", "cursor" }; struct virtio_gpu_device *vgdev; /* this will expand later */ diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 802dcb409030..b877cce0409b 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -316,6 +316,8 @@ static ssize_t out_intr_mask_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.current_interrupt_mask); } @@ -329,6 +331,8 @@ static ssize_t out_read_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.current_read_index); } @@ -343,6 +347,8 @@ static ssize_t out_write_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.current_write_index); } @@ -357,6 +363,8 @@ static ssize_t out_read_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.bytes_avail_toread); } @@ -371,6 +379,8 @@ static ssize_t out_write_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); } @@ -384,6 +394,8 @@ static ssize_t in_intr_mask_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.current_interrupt_mask); } @@ -397,6 +409,8 @@ static ssize_t in_read_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.current_read_index); } @@ -410,6 +424,8 @@ static ssize_t in_write_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.current_write_index); } @@ -424,6 +440,8 @@ static ssize_t in_read_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.bytes_avail_toread); } @@ -438,6 +456,8 @@ static ssize_t in_write_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); } diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 70ca27e45602..9d9e47eb0842 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -1418,7 +1418,8 @@ nr_pages_store(struct device *dev, struct device_attribute *attr, if (!end) break; - len -= end - p; + /* consume the number and the following comma, hence +1 */ + len -= end - p + 1; p = end + 1; } while (len); diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 192c850a8b92..5b57c7edbc72 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1209,11 +1209,5 @@ config I2C_MSM_V2 This driver can also be built as a module. If so, the module will be called i2c-msm-v2. -config VIRTIO_I2C - tristate "VIRTIO_I2C" - depends on VIRTIO - help - If you say yes to this option, the i2c virtualization will be - supported. endmenu diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 47e5ed02ea7f..0c9891812aa8 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -97,7 +97,6 @@ obj-$(CONFIG_I2C_XLR) += i2c-xlr.o obj-$(CONFIG_I2C_XLP9XX) += i2c-xlp9xx.o obj-$(CONFIG_I2C_RCAR) += i2c-rcar.o obj-$(CONFIG_I2C_MSM_V2) += i2c-msm-v2.o -obj-$(CONFIG_VIRTIO_I2C) += virtio-i2c.o # External I2C/SMBus adapter drivers obj-$(CONFIG_I2C_DIOLAN_U2C) += i2c-diolan-u2c.o diff --git a/drivers/i2c/busses/virtio-i2c.c b/drivers/i2c/busses/virtio-i2c.c deleted file mode 100644 index 84d045266143..000000000000 --- a/drivers/i2c/busses/virtio-i2c.c +++ /dev/null @@ -1,356 +0,0 @@ -/* Copyright (c) 2019, The Linux Foundation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/kthread.h> -#include <linux/scatterlist.h> -#include <linux/dma-mapping.h> -#include <linux/interrupt.h> -#include <linux/slab.h> -#include <linux/idr.h> -#include <linux/jiffies.h> -#include <linux/sched.h> -#include <linux/wait.h> -#include <linux/spinlock.h> -#include <linux/virtio.h> -#include <uapi/linux/virtio_ids.h> -#include <linux/virtio_config.h> -#include <linux/i2c.h> - -#define I2C_ADAPTER_NR 0x00 - -#define I2C_VIRTIO_RD 0x01 -#define I2C_VIRTIO_WR 0x02 -#define I2C_VIRTIO_RDWR 0x03 - -/** - * struct virtio_i2c - virtio i2c device - * @adapter: i2c adapter - * @vdev: the virtio device - * @vq: i2c virtqueue - */ -struct virtio_i2c { - struct i2c_adapter adapter; - struct virtio_device *vdev; - struct virtqueue *vq; - wait_queue_head_t inq; -}; - -struct i2c_transfer_head { - u32 type; /* read or write from or to slave */ - u32 addr; /* slave addr */ - u32 length; /* buffer length */ - u32 total_length; /* merge write and read will use this segment */ -}; - -struct i2c_transfer_end { - u32 result; /* return value from backend */ -}; - -struct virtio_i2c_req { - struct i2c_transfer_head head; - char *buf; - struct i2c_transfer_end end; -}; - -static int virti2c_transfer(struct virtio_i2c *vi2c, - struct virtio_i2c_req *i2c_req) -{ - struct virtqueue *vq = vi2c->vq; - struct scatterlist outhdr, bufhdr, inhdr, *sgs[3]; - unsigned int num_out = 0, num_in = 0, err, len; - struct virtio_i2c_req *req_handled = NULL; - - /* send the head queue to the backend */ - sg_init_one(&outhdr, &i2c_req->head, sizeof(i2c_req->head)); - sgs[num_out++] = &outhdr; - - /* send the buffer queue to the backend */ - sg_init_one(&bufhdr, i2c_req->buf, - (i2c_req->head.type == I2C_VIRTIO_RDWR) ? - i2c_req->head.total_length : i2c_req->head.length); - if (i2c_req->head.type & I2C_VIRTIO_WR) - sgs[num_out++] = &bufhdr; - else - sgs[num_out + num_in++] = &bufhdr; - - /* send the result queue to the backend */ - sg_init_one(&inhdr, &i2c_req->end, sizeof(i2c_req->end)); - sgs[num_out + num_in++] = &inhdr; - - /* call the virtqueue function */ - err = virtqueue_add_sgs(vq, sgs, num_out, num_in, i2c_req, GFP_KERNEL); - if (err) - goto req_exit; - - /* Tell Host to go! */ - err = virtqueue_kick(vq); - - wait_event(vi2c->inq, - (req_handled = virtqueue_get_buf(vq, &len))); - - if (i2c_req->head.type == I2C_VIRTIO_RDWR) { - if (i2c_req->end.result == - i2c_req->head.total_length - i2c_req->head.length) - err = 0; - else - err = -EINVAL; - } else { - if (i2c_req->end.result == i2c_req->head.length) - err = 0; - else - err = -EINVAL; - } -req_exit: - return err; -} - -/* prepare the transfer req */ -static struct virtio_i2c_req *virti2c_transfer_prepare(struct i2c_msg *msg_1, - struct i2c_msg *msg_2) -{ - char *ptr = NULL; - int merge = 0; - struct virtio_i2c_req *i2c_req; - - if (msg_1 == NULL) - return NULL; - - if (msg_2) - merge = 1; - - i2c_req = kzalloc(sizeof(struct virtio_i2c_req), GFP_KERNEL); - if (i2c_req == NULL) - return NULL; - - if (merge) - ptr = kzalloc((msg_1->len + msg_2->len), GFP_KERNEL); - else - ptr = msg_1->buf; - if (ptr == NULL) - goto err_mem; - - /* prepare the head */ - i2c_req->head.type = merge ? - I2C_VIRTIO_RDWR : ((msg_1->flags & I2C_M_RD) ? - I2C_VIRTIO_RD : I2C_VIRTIO_WR); - i2c_req->head.addr = msg_1->addr; - i2c_req->head.length = msg_1->len; - if (merge) - i2c_req->head.total_length = msg_1->len + msg_2->len; - - /* prepare the buf */ - if (merge) - memcpy(ptr, msg_1->buf, msg_1->len); - i2c_req->buf = ptr; - - return i2c_req; -err_mem: - kfree(i2c_req); - i2c_req = NULL; - return NULL; -} - -static void virti2c_transfer_end(struct virtio_i2c_req *req, - struct i2c_msg *msg) -{ - if (req->head.type == I2C_VIRTIO_RDWR) { - memcpy(msg->buf, req->buf + req->head.length, msg->len); - kfree(req->buf); - req->buf = NULL; - } - - kfree(req); - req = NULL; -} - -static int virtio_i2c_master_xfer(struct i2c_adapter *adap, - struct i2c_msg *msgs, int num) -{ - int i, ret; - struct virtio_i2c_req *i2c_req; - struct virtio_i2c *vi2c = i2c_get_adapdata(adap); - - if (num < 1) { - dev_err(&vi2c->vdev->dev, - "error on number of msgs(%d) received\n", num); - return -EINVAL; - } - - if (IS_ERR_OR_NULL(msgs)) { - dev_err(&vi2c->vdev->dev, " error no msgs Accessing invalid pointer location\n"); - return PTR_ERR(msgs); - } - - for (i = 0; i < num; i++) { - - if (msgs[i].flags & I2C_M_RD) { - /* read the data from slave to master*/ - i2c_req = virti2c_transfer_prepare(&msgs[i], NULL); - - } else if ((i + 1 < num) && (msgs[i + 1].flags & I2C_M_RD) && - (msgs[i].addr == msgs[i + 1].addr)) { - /* write then read from same address*/ - i2c_req = virti2c_transfer_prepare(&msgs[i], - &msgs[i+1]); - i += 1; - - } else { - /* write the data to slave */ - i2c_req = virti2c_transfer_prepare(&msgs[i], NULL); - } - - if (i2c_req == NULL) { - ret = -ENOMEM; - goto err; - } - ret = virti2c_transfer(vi2c, i2c_req); - virti2c_transfer_end(i2c_req, &msgs[i]); - if (ret) - goto err; - } - return 0; -err: - return ret; -} - -static u32 virtio_i2c_functionality(struct i2c_adapter *adapter) -{ - return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; -} - -static struct i2c_algorithm virtio_i2c_algorithm = { - .master_xfer = virtio_i2c_master_xfer, - .functionality = virtio_i2c_functionality, -}; - -/* virtqueue incoming data interrupt IRQ */ -static void virti2c_vq_isr(struct virtqueue *vq) -{ - struct virtio_i2c *vi2c = vq->vdev->priv; - - wake_up(&vi2c->inq); -} - -static int virti2c_init_vqs(struct virtio_i2c *vi2c) -{ - struct virtqueue *vqs[1]; - vq_callback_t *cbs[] = { virti2c_vq_isr }; - static const char * const names[] = { "virti2c_vq_isr" }; - int err; - - err = vi2c->vdev->config->find_vqs(vi2c->vdev, 1, vqs, cbs, names); - if (err) - return err; - vi2c->vq = vqs[0]; - - return 0; -} - -static void virti2c_del_vqs(struct virtio_i2c *vi2c) -{ - vi2c->vdev->config->del_vqs(vi2c->vdev); -} - -static int virti2c_init_hw(struct virtio_device *vdev, - struct virtio_i2c *vi2c) -{ - int err; - - i2c_set_adapdata(&vi2c->adapter, vi2c); - vi2c->adapter.algo = &virtio_i2c_algorithm; - - vi2c->adapter.owner = THIS_MODULE; - vi2c->adapter.dev.parent = &vdev->dev; - vi2c->adapter.dev.of_node = NULL; - - /* read virtio i2c config info */ - vi2c->adapter.nr = virtio_cread32(vdev, I2C_ADAPTER_NR); - snprintf(vi2c->adapter.name, sizeof(vi2c->adapter.name), - "virtio_i2c_%d", vi2c->adapter.nr); - - err = i2c_add_numbered_adapter(&vi2c->adapter); - if (err) - return err; - return 0; -} - -static int virti2c_probe(struct virtio_device *vdev) -{ - struct virtio_i2c *vi2c; - int err = 0; - - if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) - return -ENODEV; - - vi2c = kzalloc(sizeof(*vi2c), GFP_KERNEL); - if (!vi2c) - return -ENOMEM; - - vi2c->vdev = vdev; - vdev->priv = vi2c; - init_waitqueue_head(&vi2c->inq); - - err = virti2c_init_vqs(vi2c); - if (err) - goto err_init_vq; - - err = virti2c_init_hw(vdev, vi2c); - if (err) - goto err_init_hw; - - virtio_device_ready(vdev); - - virtqueue_enable_cb(vi2c->vq); - return 0; - -err_init_hw: - virti2c_del_vqs(vi2c); -err_init_vq: - kfree(vi2c); - return err; -} -static void virti2c_remove(struct virtio_device *vdev) -{ - struct virtio_i2c *vi2c = vdev->priv; - - i2c_del_adapter(&vi2c->adapter); - vdev->config->reset(vdev); - virti2c_del_vqs(vi2c); - kfree(vi2c); -} - -static unsigned int features[] = { - /* none */ -}; -static struct virtio_device_id id_table[] = { - { VIRTIO_ID_I2C, VIRTIO_DEV_ANY_ID }, - { 0 }, -}; - -static struct virtio_driver virtio_i2c_driver = { - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .feature_table = features, - .feature_table_size = ARRAY_SIZE(features), - .id_table = id_table, - .probe = virti2c_probe, - .remove = virti2c_remove, -}; - -module_virtio_driver(virtio_i2c_driver); -MODULE_DEVICE_TABLE(virtio, id_table); - -MODULE_DESCRIPTION("Virtio i2c frontend driver"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 94c837046786..57e3790c87b1 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -459,9 +459,15 @@ static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return i2cdev_ioctl_smbus(client, arg); case I2C_RETRIES: + if (arg > INT_MAX) + return -EINVAL; + client->adapter->retries = arg; break; case I2C_TIMEOUT: + if (arg > INT_MAX) + return -EINVAL; + /* For historical reasons, user-space sets the timeout * value in units of 10 ms. */ diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index f78c464899db..3d2c60c8de83 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -126,12 +126,8 @@ static irqreturn_t omap4_keypad_irq_handler(int irq, void *dev_id) { struct omap4_keypad *keypad_data = dev_id; - if (kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)) { - /* Disable interrupts */ - kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE, - OMAP4_VAL_IRQDISABLE); + if (kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)) return IRQ_WAKE_THREAD; - } return IRQ_NONE; } @@ -173,11 +169,6 @@ static irqreturn_t omap4_keypad_irq_thread_fn(int irq, void *dev_id) kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS, kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)); - /* enable interrupts */ - kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE, - OMAP4_DEF_IRQENABLE_EVENTEN | - OMAP4_DEF_IRQENABLE_LONGKEY); - return IRQ_HANDLED; } @@ -214,9 +205,10 @@ static void omap4_keypad_close(struct input_dev *input) disable_irq(keypad_data->irq); - /* Disable interrupts */ + /* Disable interrupts and wake-up events */ kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE, OMAP4_VAL_IRQDISABLE); + kbd_writel(keypad_data, OMAP4_KBD_WAKEUPENABLE, 0); /* clear pending interrupts */ kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS, @@ -364,7 +356,7 @@ static int omap4_keypad_probe(struct platform_device *pdev) } error = request_threaded_irq(keypad_data->irq, omap4_keypad_irq_handler, - omap4_keypad_irq_thread_fn, 0, + omap4_keypad_irq_thread_fn, IRQF_ONESHOT, "omap4-keypad", keypad_data); if (error) { dev_err(&pdev->dev, "failed to register interrupt\n"); diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 471984ec2db0..30adc5745cba 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1240,6 +1240,7 @@ MODULE_DEVICE_TABLE(i2c, elan_id); static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, + { "ELAN0501", 0 }, { "ELAN0600", 0 }, { "ELAN0602", 0 }, { "ELAN0605", 0 }, diff --git a/drivers/iommu/dma-mapping-fast.c b/drivers/iommu/dma-mapping-fast.c index 0881d68f34d8..66c2abd358f8 100644 --- a/drivers/iommu/dma-mapping-fast.c +++ b/drivers/iommu/dma-mapping-fast.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved. +/* Copyright (c) 2016-2017,2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -513,12 +513,22 @@ static void *fast_smmu_alloc(struct device *dev, size_t size, av8l_fast_iopte *ptep; unsigned long flags; struct sg_mapping_iter miter; - unsigned int count = ALIGN(size, SZ_4K) >> PAGE_SHIFT; + size_t count = ALIGN(size, SZ_4K) >> PAGE_SHIFT; int prot = IOMMU_READ | IOMMU_WRITE; /* TODO: extract from attrs */ bool is_coherent = is_dma_coherent(dev, attrs); pgprot_t remap_prot = __get_dma_pgprot(attrs, PAGE_KERNEL, is_coherent); struct page **pages; + /* + * sg_alloc_table_from_pages accepts unsigned int value for count + * so check count doesn't exceed UINT_MAX. + */ + + if (count > UINT_MAX) { + dev_err(dev, "count: %zx exceeds UNIT_MAX\n", count); + return NULL; + } + prot = __get_iommu_pgprot(attrs, prot, is_coherent); *handle = DMA_ERROR_CODE; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7feaa82f8c7c..8b4a4d95669a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2041,7 +2041,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, * than default. Unnecessary for PT mode. */ if (translation != CONTEXT_TT_PASS_THROUGH) { - for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { ret = -ENOMEM; pgd = phys_to_virt(dma_pte_addr(pgd)); if (!dma_pte_present(pgd)) @@ -2055,7 +2055,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, translation = CONTEXT_TT_MULTI_LEVEL; context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, iommu->agaw); + context_set_address_width(context, agaw); } else { /* * In pass through mode, AW must be programmed to diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index dd7e38ac29bd..d15347de415a 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -851,7 +851,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf) u16 ret; if (contr == 0) { - strlcpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN); + strncpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN); return CAPI_NOERROR; } @@ -859,7 +859,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf) ctr = get_capi_ctr_by_nr(contr); if (ctr && ctr->state == CAPI_CTR_RUNNING) { - strlcpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN); + strncpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN); ret = CAPI_NOERROR; } else ret = CAPI_REGNOTINSTALLED; diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 0ad06670fa99..7bf1cdb582f5 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -476,21 +476,6 @@ config DM_VERITY If unsure, say N. -config DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 - bool "Prefetch size 128" - -config DM_VERITY_HASH_PREFETCH_MIN_SIZE - int "Verity hash prefetch minimum size" - depends on DM_VERITY - range 1 4096 - default 128 if DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 - default 1 - ---help--- - This sets minimum number of hash blocks to prefetch for dm-verity. - For devices like eMMC, having larger prefetch size like 128 can improve - performance with increased memory consumption for keeping more hashes - in RAM. - config DM_VERITY_FEC bool "Verity forward error correction support" depends on DM_VERITY @@ -554,7 +539,6 @@ config DM_ANDROID_VERITY depends on ASYMMETRIC_KEY_TYPE depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE depends on MD_LINEAR=y - select DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 ---help--- This device-mapper target is virtually a VERITY target. This target is setup by reading the metadata contents piggybacked diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index d2e3abc182b3..131077aabd08 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -529,7 +529,6 @@ static void verity_prefetch_io(struct work_struct *work) container_of(work, struct dm_verity_prefetch_work, work); struct dm_verity *v = pw->v; int i; - sector_t prefetch_size; for (i = v->levels - 2; i >= 0; i--) { sector_t hash_block_start; @@ -552,14 +551,8 @@ static void verity_prefetch_io(struct work_struct *work) hash_block_end = v->hash_blocks - 1; } no_prefetch_cluster: - // for emmc, it is more efficient to send bigger read - prefetch_size = max((sector_t)CONFIG_DM_VERITY_HASH_PREFETCH_MIN_SIZE, - hash_block_end - hash_block_start + 1); - if ((hash_block_start + prefetch_size) >= (v->hash_start + v->hash_blocks)) { - prefetch_size = hash_block_end - hash_block_start + 1; - } dm_bufio_prefetch(v->bufio, hash_block_start, - prefetch_size); + hash_block_end - hash_block_start + 1); } kfree(pw); diff --git a/drivers/media/platform/vivid/vivid-vid-cap.c b/drivers/media/platform/vivid/vivid-vid-cap.c index ef5412311b2f..a84954f1be34 100644 --- a/drivers/media/platform/vivid/vivid-vid-cap.c +++ b/drivers/media/platform/vivid/vivid-vid-cap.c @@ -461,6 +461,8 @@ void vivid_update_format_cap(struct vivid_dev *dev, bool keep_controls) tpg_s_rgb_range(&dev->tpg, v4l2_ctrl_g_ctrl(dev->rgb_range_cap)); break; } + vfree(dev->bitmap_cap); + dev->bitmap_cap = NULL; vivid_update_quality(dev); tpg_reset_source(&dev->tpg, dev->src_rect.width, dev->src_rect.height, dev->field_cap); dev->crop_cap = dev->src_rect; diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index 524660510599..0c15ba21fa54 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -217,7 +217,7 @@ u32 genwqe_crc32(u8 *buff, size_t len, u32 init) void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, dma_addr_t *dma_handle) { - if (get_order(size) > MAX_ORDER) + if (get_order(size) >= MAX_ORDER) return NULL; return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle, diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c index e486a0c26267..f6ed57d3125c 100644 --- a/drivers/misc/mic/card/mic_virtio.c +++ b/drivers/misc/mic/card/mic_virtio.c @@ -311,7 +311,7 @@ unmap: static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct mic_vdev *mvdev = to_micvdev(vdev); struct mic_device_ctrl __iomem *dc = mvdev->dc; diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 999fdb9bad7d..1ba28b350e91 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -2087,9 +2087,11 @@ reinit: if (err) { pr_warn("%s: Enabling HPI failed\n", mmc_hostname(card->host)); + card->ext_csd.hpi_en = 0; err = 0; - } else + } else { card->ext_csd.hpi_en = 1; + } } /* diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 6b814d7d6560..af937d3e8c3e 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -2117,7 +2117,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev) mmc->max_blk_size = 512; /* Block Length at max can be 1024 */ mmc->max_blk_count = 0xFFFF; /* No. of Blocks is 16 bits */ mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count; - mmc->max_seg_size = mmc->max_req_size; mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED | MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE; @@ -2174,6 +2173,17 @@ static int omap_hsmmc_probe(struct platform_device *pdev) goto err_irq; } + /* + * Limit the maximum segment size to the lower of the request size + * and the DMA engine device segment size limits. In reality, with + * 32-bit transfers, the DMA engine can do longer segments than this + * but there is no way to represent that in the DMA model - if we + * increase this figure here, we get warnings from the DMA API debug. + */ + mmc->max_seg_size = min3(mmc->max_req_size, + dma_get_max_seg_size(host->rx_chan->device->dev), + dma_get_max_seg_size(host->tx_chan->device->dev)); + /* Request IRQ for MMC operations */ ret = devm_request_irq(&pdev->dev, host->irq, omap_hsmmc_irq, 0, mmc_hostname(mmc), host); diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 547098086773..f81df91a9ce1 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -52,5 +52,5 @@ config CAIF_VIRTIO The caif driver for CAIF over Virtio. if CAIF_VIRTIO -source "drivers/vhost/Kconfig" +source "drivers/vhost/Kconfig.vringh" endif diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 2f9b12cf9ee5..61a9ab4fe047 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1163,11 +1163,15 @@ out: map_failed_frags: last = i+1; - for (i = 0; i < last; i++) + for (i = 1; i < last; i++) dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, DMA_TO_DEVICE); + dma_unmap_single(&adapter->vdev->dev, + descs[0].fields.address, + descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, + DMA_TO_DEVICE); map_failed: if (!firmware_has_feature(FW_FEATURE_CMO)) netdev_err(netdev, "tx: unable to map xmit buffer\n"); diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 111d907e0c11..79cede19e0c4 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2825,6 +2825,12 @@ static int hso_get_config_data(struct usb_interface *interface) return -EIO; } + /* check if we have a valid interface */ + if (if_num > 16) { + kfree(config_data); + return -EINVAL; + } + switch (config_data[if_num]) { case 0x0: result = 0; @@ -2895,10 +2901,18 @@ static int hso_probe(struct usb_interface *interface, /* Get the interface/port specification from either driver_info or from * the device itself */ - if (id->driver_info) + if (id->driver_info) { + /* if_num is controlled by the device, driver_info is a 0 terminated + * array. Make sure, the access is in bounds! */ + for (i = 0; i <= if_num; ++i) + if (((u32 *)(id->driver_info))[i] == 0) + goto exit; port_spec = ((u32 *)(id->driver_info))[if_num]; - else + } else { port_spec = hso_get_config_data(interface); + if (port_spec < 0) + goto exit; + } /* Check if we need to switch to alt interfaces prior to port * configuration */ diff --git a/drivers/net/wireless/b43/phy_common.c b/drivers/net/wireless/b43/phy_common.c index ec2b9c577b90..3644c9edaf81 100644 --- a/drivers/net/wireless/b43/phy_common.c +++ b/drivers/net/wireless/b43/phy_common.c @@ -616,7 +616,7 @@ struct b43_c32 b43_cordic(int theta) u8 i; s32 tmp; s8 signx = 1; - u32 angle = 0; + s32 angle = 0; struct b43_c32 ret = { .i = 39797, .q = 0, }; while (theta > (180 << 16)) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 0a4bd73caae5..6f55ab4f7959 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -889,7 +889,7 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; - BUG_ON(pull_to <= skb_headlen(skb)); + BUG_ON(pull_to < skb_headlen(skb)); __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c index 99da549d5d06..0118287a8a10 100644 --- a/drivers/pci/host/pcie-altera.c +++ b/drivers/pci/host/pcie-altera.c @@ -40,8 +40,10 @@ #define P2A_INT_ENABLE 0x3070 #define P2A_INT_ENA_ALL 0xf #define RP_LTSSM 0x3c64 +#define RP_LTSSM_MASK 0x1f #define LTSSM_L0 0xf +#define PCIE_CAP_OFFSET 0x80 /* TLP configuration type 0 and 1 */ #define TLP_FMTTYPE_CFGRD0 0x04 /* Configuration Read Type 0 */ #define TLP_FMTTYPE_CFGWR0 0x44 /* Configuration Write Type 0 */ @@ -60,6 +62,9 @@ #define TLP_LOOP 500 #define RP_DEVFN 0 +#define LINK_UP_TIMEOUT HZ +#define LINK_RETRAIN_TIMEOUT HZ + #define INTX_NUM 4 #define DWORD_MASK 3 @@ -80,25 +85,21 @@ struct tlp_rp_regpair_t { u32 reg1; }; -static void altera_pcie_retrain(struct pci_dev *dev) +static inline void cra_writel(struct altera_pcie *pcie, const u32 value, + const u32 reg) { - u16 linkcap, linkstat; - - /* - * Set the retrain bit if the PCIe rootport support > 2.5GB/s, but - * current speed is 2.5 GB/s. - */ - pcie_capability_read_word(dev, PCI_EXP_LNKCAP, &linkcap); + writel_relaxed(value, pcie->cra_base + reg); +} - if ((linkcap & PCI_EXP_LNKCAP_SLS) <= PCI_EXP_LNKCAP_SLS_2_5GB) - return; +static inline u32 cra_readl(struct altera_pcie *pcie, const u32 reg) +{ + return readl_relaxed(pcie->cra_base + reg); +} - pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &linkstat); - if ((linkstat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB) - pcie_capability_set_word(dev, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_RL); +static bool altera_pcie_link_is_up(struct altera_pcie *pcie) +{ + return !!((cra_readl(pcie, RP_LTSSM) & RP_LTSSM_MASK) == LTSSM_L0); } -DECLARE_PCI_FIXUP_EARLY(0x1172, PCI_ANY_ID, altera_pcie_retrain); /* * Altera PCIe port uses BAR0 of RC's configuration space as the translation @@ -119,17 +120,6 @@ static bool altera_pcie_hide_rc_bar(struct pci_bus *bus, unsigned int devfn, return false; } -static inline void cra_writel(struct altera_pcie *pcie, const u32 value, - const u32 reg) -{ - writel_relaxed(value, pcie->cra_base + reg); -} - -static inline u32 cra_readl(struct altera_pcie *pcie, const u32 reg) -{ - return readl_relaxed(pcie->cra_base + reg); -} - static void tlp_write_tx(struct altera_pcie *pcie, struct tlp_rp_regpair_t *tlp_rp_regdata) { @@ -138,11 +128,6 @@ static void tlp_write_tx(struct altera_pcie *pcie, cra_writel(pcie, tlp_rp_regdata->ctrl, RP_TX_CNTRL); } -static bool altera_pcie_link_is_up(struct altera_pcie *pcie) -{ - return !!(cra_readl(pcie, RP_LTSSM) & LTSSM_L0); -} - static bool altera_pcie_valid_config(struct altera_pcie *pcie, struct pci_bus *bus, int dev) { @@ -286,22 +271,14 @@ static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn, return PCIBIOS_SUCCESSFUL; } -static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 *value) +static int _altera_pcie_cfg_read(struct altera_pcie *pcie, u8 busno, + unsigned int devfn, int where, int size, + u32 *value) { - struct altera_pcie *pcie = bus->sysdata; int ret; u32 data; u8 byte_en; - if (altera_pcie_hide_rc_bar(bus, devfn, where)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) { - *value = 0xffffffff; - return PCIBIOS_DEVICE_NOT_FOUND; - } - switch (size) { case 1: byte_en = 1 << (where & 3); @@ -314,7 +291,7 @@ static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn, break; } - ret = tlp_cfg_dword_read(pcie, bus->number, devfn, + ret = tlp_cfg_dword_read(pcie, busno, devfn, (where & ~DWORD_MASK), byte_en, &data); if (ret != PCIBIOS_SUCCESSFUL) return ret; @@ -334,20 +311,14 @@ static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn, return PCIBIOS_SUCCESSFUL; } -static int altera_pcie_cfg_write(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 value) +static int _altera_pcie_cfg_write(struct altera_pcie *pcie, u8 busno, + unsigned int devfn, int where, int size, + u32 value) { - struct altera_pcie *pcie = bus->sysdata; u32 data32; u32 shift = 8 * (where & 3); u8 byte_en; - if (altera_pcie_hide_rc_bar(bus, devfn, where)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) - return PCIBIOS_DEVICE_NOT_FOUND; - switch (size) { case 1: data32 = (value & 0xff) << shift; @@ -363,8 +334,40 @@ static int altera_pcie_cfg_write(struct pci_bus *bus, unsigned int devfn, break; } - return tlp_cfg_dword_write(pcie, bus->number, devfn, - (where & ~DWORD_MASK), byte_en, data32); + return tlp_cfg_dword_write(pcie, busno, devfn, (where & ~DWORD_MASK), + byte_en, data32); +} + +static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *value) +{ + struct altera_pcie *pcie = bus->sysdata; + + if (altera_pcie_hide_rc_bar(bus, devfn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) { + *value = 0xffffffff; + return PCIBIOS_DEVICE_NOT_FOUND; + } + + return _altera_pcie_cfg_read(pcie, bus->number, devfn, where, size, + value); +} + +static int altera_pcie_cfg_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 value) +{ + struct altera_pcie *pcie = bus->sysdata; + + if (altera_pcie_hide_rc_bar(bus, devfn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) + return PCIBIOS_DEVICE_NOT_FOUND; + + return _altera_pcie_cfg_write(pcie, bus->number, devfn, where, size, + value); } static struct pci_ops altera_pcie_ops = { @@ -372,6 +375,90 @@ static struct pci_ops altera_pcie_ops = { .write = altera_pcie_cfg_write, }; +static int altera_read_cap_word(struct altera_pcie *pcie, u8 busno, + unsigned int devfn, int offset, u16 *value) +{ + u32 data; + int ret; + + ret = _altera_pcie_cfg_read(pcie, busno, devfn, + PCIE_CAP_OFFSET + offset, sizeof(*value), + &data); + *value = data; + return ret; +} + +static int altera_write_cap_word(struct altera_pcie *pcie, u8 busno, + unsigned int devfn, int offset, u16 value) +{ + return _altera_pcie_cfg_write(pcie, busno, devfn, + PCIE_CAP_OFFSET + offset, sizeof(value), + value); +} + +static void altera_wait_link_retrain(struct altera_pcie *pcie) +{ + u16 reg16; + unsigned long start_jiffies; + + /* Wait for link training end. */ + start_jiffies = jiffies; + for (;;) { + altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, + PCI_EXP_LNKSTA, ®16); + if (!(reg16 & PCI_EXP_LNKSTA_LT)) + break; + + if (time_after(jiffies, start_jiffies + LINK_RETRAIN_TIMEOUT)) { + dev_err(&pcie->pdev->dev, "link retrain timeout\n"); + break; + } + udelay(100); + } + + /* Wait for link is up */ + start_jiffies = jiffies; + for (;;) { + if (altera_pcie_link_is_up(pcie)) + break; + + if (time_after(jiffies, start_jiffies + LINK_UP_TIMEOUT)) { + dev_err(&pcie->pdev->dev, "link up timeout\n"); + break; + } + udelay(100); + } +} + +static void altera_pcie_retrain(struct altera_pcie *pcie) +{ + u16 linkcap, linkstat, linkctl; + + if (!altera_pcie_link_is_up(pcie)) + return; + + /* + * Set the retrain bit if the PCIe rootport support > 2.5GB/s, but + * current speed is 2.5 GB/s. + */ + altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, PCI_EXP_LNKCAP, + &linkcap); + if ((linkcap & PCI_EXP_LNKCAP_SLS) <= PCI_EXP_LNKCAP_SLS_2_5GB) + return; + + altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, PCI_EXP_LNKSTA, + &linkstat); + if ((linkstat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB) { + altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, + PCI_EXP_LNKCTL, &linkctl); + linkctl |= PCI_EXP_LNKCTL_RL; + altera_write_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, + PCI_EXP_LNKCTL, linkctl); + + altera_wait_link_retrain(pcie); + } +} + static int altera_pcie_intx_map(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq) { @@ -506,6 +593,11 @@ static int altera_pcie_parse_dt(struct altera_pcie *pcie) return 0; } +static void altera_pcie_host_init(struct altera_pcie *pcie) +{ + altera_pcie_retrain(pcie); +} + static int altera_pcie_probe(struct platform_device *pdev) { struct altera_pcie *pcie; @@ -543,6 +635,7 @@ static int altera_pcie_probe(struct platform_device *pdev) cra_writel(pcie, P2A_INT_STS_ALL, P2A_INT_STATUS); /* enable all interrupts */ cra_writel(pcie, P2A_INT_ENA_ALL, P2A_INT_ENABLE); + altera_pcie_host_init(pcie); bus = pci_scan_root_bus(&pdev->dev, pcie->root_bus_nr, &altera_pcie_ops, pcie, &pcie->resources); diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index 9e29b1321648..15783869e1a0 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c @@ -427,14 +427,14 @@ static int olpc_bat_get_property(struct power_supply *psy, if (ret) return ret; - val->intval = (s16)be16_to_cpu(ec_word) * 100 / 256; + val->intval = (s16)be16_to_cpu(ec_word) * 10 / 256; break; case POWER_SUPPLY_PROP_TEMP_AMBIENT: ret = olpc_ec_cmd(EC_AMB_TEMP, NULL, 0, (void *)&ec_word, 2); if (ret) return ret; - val->intval = (int)be16_to_cpu(ec_word) * 100 / 256; + val->intval = (int)be16_to_cpu(ec_word) * 10 / 256; break; case POWER_SUPPLY_PROP_CHARGE_COUNTER: ret = olpc_ec_cmd(EC_BAT_ACR, NULL, 0, (void *)&ec_word, 2); diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index e1a10232a943..e44872fb9e5e 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -147,7 +147,7 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev) static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct rproc *rproc = vdev_to_rproc(vdev); int i, ret; diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 73354ee27877..1fcd27c1f183 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -945,7 +945,7 @@ static void rpmsg_ns_cb(struct rpmsg_channel *rpdev, void *data, int len, static int rpmsg_probe(struct virtio_device *vdev) { vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done }; - const char *names[] = { "input", "output" }; + static const char * const names[] = { "input", "output" }; struct virtqueue *vqs[2]; struct virtproc_info *vrp; void *bufs_va; diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 38c8e308d4c8..a96c98e3fc73 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -275,16 +275,16 @@ static void zfcp_free_low_mem_buffers(struct zfcp_adapter *adapter) */ int zfcp_status_read_refill(struct zfcp_adapter *adapter) { - while (atomic_read(&adapter->stat_miss) > 0) + while (atomic_add_unless(&adapter->stat_miss, -1, 0)) if (zfcp_fsf_status_read(adapter->qdio)) { + atomic_inc(&adapter->stat_miss); /* undo add -1 */ if (atomic_read(&adapter->stat_miss) >= adapter->stat_read_buf_num) { zfcp_erp_adapter_reopen(adapter, 0, "axsref1"); return 1; } break; - } else - atomic_dec(&adapter->stat_miss); + } return 0; } diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c index 53fb975c404b..1d060fd293a3 100644 --- a/drivers/s390/virtio/kvm_virtio.c +++ b/drivers/s390/virtio/kvm_virtio.c @@ -255,7 +255,7 @@ static void kvm_del_vqs(struct virtio_device *vdev) static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct kvm_device *kdev = to_kvmdev(vdev); int i; diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index ff06bdfd2b20..9e685246b98d 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -639,7 +639,7 @@ out: static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct virtio_ccw_device *vcdev = to_vc_device(vdev); unsigned long *indicatorp = NULL; diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index d0b227ffbd5f..573aeec7a02b 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -2279,7 +2279,7 @@ static int _bnx2fc_create(struct net_device *netdev, if (!interface) { printk(KERN_ERR PFX "bnx2fc_interface_create failed\n"); rc = -ENOMEM; - goto ifput_err; + goto netdev_err; } if (netdev->priv_flags & IFF_802_1Q_VLAN) { diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index cf04960cc3e6..1a1368f5863c 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -88,7 +88,7 @@ struct bcm2835_spi { u8 *rx_buf; int tx_len; int rx_len; - bool dma_pending; + unsigned int dma_pending; }; static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg) @@ -155,8 +155,7 @@ static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id) /* Write as many bytes as possible to FIFO */ bcm2835_wr_fifo(bs); - /* based on flags decide if we can finish the transfer */ - if (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE) { + if (!bs->rx_len) { /* Transfer complete - reset SPI HW */ bcm2835_spi_reset_hw(master); /* wake up the framework */ @@ -233,10 +232,9 @@ static void bcm2835_spi_dma_done(void *data) * is called the tx-dma must have finished - can't get to this * situation otherwise... */ - dmaengine_terminate_all(master->dma_tx); - - /* mark as no longer pending */ - bs->dma_pending = 0; + if (cmpxchg(&bs->dma_pending, true, false)) { + dmaengine_terminate_all(master->dma_tx); + } /* and mark as completed */; complete(&master->xfer_completion); @@ -342,6 +340,7 @@ static int bcm2835_spi_transfer_one_dma(struct spi_master *master, if (ret) { /* need to reset on errors */ dmaengine_terminate_all(master->dma_tx); + bs->dma_pending = false; bcm2835_spi_reset_hw(master); return ret; } @@ -617,10 +616,9 @@ static void bcm2835_spi_handle_err(struct spi_master *master, struct bcm2835_spi *bs = spi_master_get_devdata(master); /* if an error occurred and we have an active dma, then terminate */ - if (bs->dma_pending) { + if (cmpxchg(&bs->dma_pending, true, false)) { dmaengine_terminate_all(master->dma_tx); dmaengine_terminate_all(master->dma_rx); - bs->dma_pending = 0; } /* and reset */ bcm2835_spi_reset_hw(master); diff --git a/drivers/staging/android/sync.c b/drivers/staging/android/sync.c index 5238d67490ce..39b99740a6d8 100644 --- a/drivers/staging/android/sync.c +++ b/drivers/staging/android/sync.c @@ -451,6 +451,8 @@ static bool android_fence_signaled(struct fence *fence) int ret; ret = parent->ops->has_signaled(pt); + if (!ret && parent->destroyed) + ret = -ENOENT; if (ret < 0) fence->status = ret; return ret; diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 0a8e5ac891d4..736de1021d8b 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -507,6 +507,13 @@ static int acm_tty_install(struct tty_driver *driver, struct tty_struct *tty) if (retval) goto error_init_termios; + /* + * Suppress initial echoing for some devices which might send data + * immediately after acm driver has been installed. + */ + if (acm->quirks & DISABLE_ECHO) + tty->termios.c_lflag &= ~ECHO; + tty->driver_data = acm; return 0; @@ -1677,6 +1684,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, + { USB_DEVICE(0x0e8d, 0x2000), /* MediaTek Inc Preloader */ + .driver_info = DISABLE_ECHO, /* DISABLE ECHO in termios flag */ + }, { USB_DEVICE(0x0e8d, 0x3329), /* MediaTek Inc GPS */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, @@ -1875,6 +1885,13 @@ static const struct usb_device_id acm_ids[] = { .driver_info = IGNORE_DEVICE, }, + { USB_DEVICE(0x1bc7, 0x0021), /* Telit 3G ACM only composition */ + .driver_info = SEND_ZERO_PACKET, + }, + { USB_DEVICE(0x1bc7, 0x0023), /* Telit 3G ACM + ECM composition */ + .driver_info = SEND_ZERO_PACKET, + }, + /* control interfaces without any protocol set */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_PROTO_NONE) }, diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index b30ac5fcde68..1ad9ff9f493d 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -134,3 +134,4 @@ struct acm { #define QUIRK_CONTROL_LINE_STATE BIT(6) #define CLEAR_HALT_CONDITIONS BIT(7) #define SEND_ZERO_PACKET BIT(8) +#define DISABLE_ECHO BIT(9) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index cf378b1ed373..733479ddf8a7 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -240,7 +240,8 @@ static const struct usb_device_id usb_quirk_list[] = { USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, /* Corsair K70 RGB */ - { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_DELAY_CTRL_MSG }, /* Corsair Strafe */ { USB_DEVICE(0x1b1c, 0x1b15), .driver_info = USB_QUIRK_DELAY_INIT | diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index a11c2c8bda53..a217f71b45c6 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -1990,6 +1990,8 @@ static int r8a66597_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, static void r8a66597_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep) +__acquires(r8a66597->lock) +__releases(r8a66597->lock) { struct r8a66597 *r8a66597 = hcd_to_r8a66597(hcd); struct r8a66597_pipe *pipe = (struct r8a66597_pipe *)hep->hcpriv; @@ -2002,13 +2004,14 @@ static void r8a66597_endpoint_disable(struct usb_hcd *hcd, return; pipenum = pipe->info.pipenum; + spin_lock_irqsave(&r8a66597->lock, flags); if (pipenum == 0) { kfree(hep->hcpriv); hep->hcpriv = NULL; + spin_unlock_irqrestore(&r8a66597->lock, flags); return; } - spin_lock_irqsave(&r8a66597->lock, flags); pipe_stop(r8a66597, pipe); pipe_irq_disable(r8a66597, pipenum); disable_irq_empty(r8a66597, pipenum); diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 61da1a22b98a..5293596577bf 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1504,7 +1504,8 @@ int xhci_bus_suspend(struct usb_hcd *hcd) portsc_buf[port_index] = 0; /* Bail out if a USB3 port has a new device in link training */ - if ((t1 & PORT_PLS_MASK) == XDEV_POLLING) { + if ((hcd->speed >= HCD_USB3) && + (t1 & PORT_PLS_MASK) == XDEV_POLLING) { bus_state->bus_suspended = 0; spin_unlock_irqrestore(&xhci->lock, flags); xhci_dbg(xhci, "Bus suspend bailout, port in polling\n"); diff --git a/drivers/usb/misc/ks_bridge.c b/drivers/usb/misc/ks_bridge.c index dad51be0820e..c6fd30349ecc 100644 --- a/drivers/usb/misc/ks_bridge.c +++ b/drivers/usb/misc/ks_bridge.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014, 2017-2018, Linux Foundation. All rights reserved. + * Copyright (c) 2012-2014, 2017-2019, Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -700,6 +700,7 @@ ksb_usb_probe(struct usb_interface *ifc, const struct usb_device_id *id) case 0x9025: case 0x9091: case 0x901D: + case 0x901F: /* 1-1 mapping between ksb and udev port which starts with 1 */ ksb_port_num = udev->portnum - 1; dev_dbg(&udev->dev, "ifc_count: %u, port_num:%u\n", ifc_count, diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2b81939fecd7..7bc2c9fef605 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1163,6 +1163,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, + { USB_DEVICE(TELIT_VENDOR_ID, 0x1900), /* Telit LN940 (QMI) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff), /* Telit LN940 (MBIM) */ + .driver_info = NCTRL(0) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff), .driver_info = RSVD(1) }, @@ -1327,6 +1331,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0414, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0417, 0xff, 0xff, 0xff) }, + { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x0602, 0xff) }, /* GosunCn ZTE WeLink ME3630 (MBIM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1008, 0xff, 0xff, 0xff), .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1010, 0xff, 0xff, 0xff), @@ -1530,6 +1535,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(2) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1428, 0xff, 0xff, 0xff), /* Telewell TW-LTE 4G v2 */ .driver_info = RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) }, @@ -1757,6 +1763,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) }, { USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E), .driver_info = RSVD(5) | RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9003, 0xff) }, /* Simcom SIM7500/SIM7600 MBIM mode */ { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200), .driver_info = NCTRL(0) | NCTRL(1) | RSVD(4) }, { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X220_X500D), @@ -1941,7 +1948,18 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) }, - { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x10) }, /* HP lt4132 (Huawei ME906s-158) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x12) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x13) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x14) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x1b) }, + { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 */ + .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, + { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ + .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ + .driver_info = RSVD(6) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 3da25ad267a2..4966768d3c98 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -86,9 +86,14 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(YCCABLE_VENDOR_ID, YCCABLE_PRODUCT_ID) }, { USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LD220TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LD960TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) }, { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) }, { USB_DEVICE(ZEAGLE_VENDOR_ID, ZEAGLE_N2ITION3_PRODUCT_ID) }, { USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 123289085ee2..a84f0959ab34 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -123,10 +123,15 @@ /* Hewlett-Packard POS Pole Displays */ #define HP_VENDOR_ID 0x03f0 +#define HP_LM920_PRODUCT_ID 0x026b +#define HP_TD620_PRODUCT_ID 0x0956 #define HP_LD960_PRODUCT_ID 0x0b39 #define HP_LCM220_PRODUCT_ID 0x3139 #define HP_LCM960_PRODUCT_ID 0x3239 #define HP_LD220_PRODUCT_ID 0x3524 +#define HP_LD220TA_PRODUCT_ID 0x4349 +#define HP_LD960TA_PRODUCT_ID 0x4439 +#define HP_LM940_PRODUCT_ID 0x5039 /* Cressi Edy (diving computer) PC interface */ #define CRESSI_VENDOR_ID 0x04b8 diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 6c186b4df94a..b3344a77dcce 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -223,8 +223,12 @@ static int slave_configure(struct scsi_device *sdev) if (!(us->fflags & US_FL_NEEDS_CAP16)) sdev->try_rc_10_first = 1; - /* assume SPC3 or latter devices support sense size > 18 */ - if (sdev->scsi_level > SCSI_SPC_2) + /* + * assume SPC3 or latter devices support sense size > 18 + * unless US_FL_BAD_SENSE quirk is specified. + */ + if (sdev->scsi_level > SCSI_SPC_2 && + !(us->fflags & US_FL_BAD_SENSE)) us->fflags |= US_FL_SANE_SENSE; /* USB-IDE bridges tend to report SK = 0x04 (Non-recoverable diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 898215cad351..d92b974f0635 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1393,6 +1393,18 @@ UNUSUAL_DEV( 0x0d49, 0x7310, 0x0000, 0x9999, US_FL_SANE_SENSE), /* + * Reported by Icenowy Zheng <icenowy@aosc.io> + * The SMI SM3350 USB-UFS bridge controller will enter a wrong state + * that do not process read/write command if a long sense is requested, + * so force to use 18-byte sense. + */ +UNUSUAL_DEV( 0x090c, 0x3350, 0x0000, 0xffff, + "SMI", + "SM3350 UFS-to-USB-Mass-Storage bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BAD_SENSE ), + +/* * Pete Zaitcev <zaitcev@yahoo.com>, bz#164688. * The device blatantly ignores LUN and returns 1 in GetMaxLUN. */ diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 533eaf04f12f..40764ecad9ce 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -2,7 +2,6 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) select VHOST - select VHOST_RING ---help--- This kernel module can be loaded in host kernel to accelerate guest networking with virtio_net. Not to be confused with virtio_net @@ -15,17 +14,24 @@ config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" depends on TARGET_CORE && EVENTFD && m select VHOST - select VHOST_RING default n ---help--- Say M here to enable the vhost_scsi TCM fabric module for use with virtio-scsi guests -config VHOST_RING - tristate +config VHOST_VSOCK + tristate "vhost virtio-vsock driver" + depends on VSOCKETS && EVENTFD + select VIRTIO_VSOCKETS_COMMON + select VHOST + default n ---help--- - This option is selected by any driver which needs to access - the host side of a virtio ring. + This kernel module can be loaded in the host kernel to provide AF_VSOCK + sockets for communicating with guests. The guests must have the + virtio_transport.ko driver loaded to use the virtio-vsock device. + + To compile this driver as a module, choose M here: the module will be called + vhost_vsock. config VHOST tristate diff --git a/drivers/vhost/Kconfig.vringh b/drivers/vhost/Kconfig.vringh new file mode 100644 index 000000000000..6a4490c09d7f --- /dev/null +++ b/drivers/vhost/Kconfig.vringh @@ -0,0 +1,5 @@ +config VHOST_RING + tristate + ---help--- + This option is selected by any driver which needs to access + the host side of a virtio ring. diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index e0441c34db1c..6b012b986b57 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -4,5 +4,9 @@ vhost_net-y := net.o obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o vhost_scsi-y := scsi.o +obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o +vhost_vsock-y := vsock.o + obj-$(CONFIG_VHOST_RING) += vringh.o + obj-$(CONFIG_VHOST) += vhost.o diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 645b2197930e..53cf130922f3 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -61,7 +61,8 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" enum { VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | - (1ULL << VIRTIO_NET_F_MRG_RXBUF) + (1ULL << VIRTIO_NET_F_MRG_RXBUF) | + (1ULL << VIRTIO_F_IOMMU_PLATFORM) }; enum { @@ -287,6 +288,69 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev); +} + +static void vhost_net_disable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); + if (!vq->private_data) + return; + vhost_poll_stop(poll); +} + +static int vhost_net_enable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); + struct socket *sock; + + sock = vq->private_data; + if (!sock) + return 0; + + return vhost_poll_start(poll, sock->file); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + out_num, in_num, NULL, NULL); + + if (r == vq->num && vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + vhost_vq_avail_empty(vq->dev, vq)) + cpu_relax_lowlatency(); + preempt_enable(); + r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + out_num, in_num, NULL, NULL); + } + + return r; +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -314,6 +378,9 @@ static void handle_tx(struct vhost_net *net) if (!sock) goto out; + if (!vq_iotlb_prefetch(vq)) + goto out; + vhost_disable_notify(&net->dev, vq); hdr_size = nvq->vhost_hlen; @@ -331,10 +398,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, - ARRAY_SIZE(vq->iov), - &out, &in, - NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +501,43 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) +{ + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = &nvq->vq; + unsigned long uninitialized_var(endtime); + int len = peek_head_len(sk); + + if (!len && vq->busyloop_timeout) { + /* Both tx vq and rx socket were polled here */ + mutex_lock_nested(&vq->mutex, 1); + vhost_disable_notify(&net->dev, vq); + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(&net->dev, endtime) && + skb_queue_empty(&sk->sk_receive_queue) && + vhost_vq_avail_empty(&net->dev, vq)) + cpu_relax_lowlatency(); + + preempt_enable(); + + if (!vhost_vq_avail_empty(&net->dev, vq)) + vhost_poll_queue(&vq->poll); + else if (unlikely(vhost_enable_notify(&net->dev, vq))) { + vhost_disable_notify(&net->dev, vq); + vhost_poll_queue(&vq->poll); + } + + mutex_unlock(&vq->mutex); + + len = peek_head_len(sk); + } + + return len; +} + /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue @@ -540,11 +643,16 @@ static void handle_rx(struct vhost_net *net) struct iov_iter fixup; __virtio16 num_buffers; - mutex_lock(&vq->mutex); + mutex_lock_nested(&vq->mutex, 0); sock = vq->private_data; if (!sock) goto out; + + if (!vq_iotlb_prefetch(vq)) + goto out; + vhost_disable_notify(&net->dev, vq); + vhost_net_disable_vq(net, vq); vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; @@ -553,7 +661,7 @@ static void handle_rx(struct vhost_net *net) vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); - while ((sock_len = peek_head_len(sock->sk))) { + while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) { sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads, vhost_len, @@ -561,7 +669,7 @@ static void handle_rx(struct vhost_net *net) likely(mergeable) ? UIO_MAXIOV : 1); /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) - break; + goto out; /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); @@ -580,7 +688,7 @@ static void handle_rx(struct vhost_net *net) } /* Nothing new? Wait for eventfd to tell us * they refilled. */ - break; + goto out; } /* We don't need to be notified again. */ iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len); @@ -608,7 +716,7 @@ static void handle_rx(struct vhost_net *net) &fixup) != sizeof(hdr)) { vq_err(vq, "Unable to write vnet_hdr " "at addr %p\n", vq->iov->iov_base); - break; + goto out; } } else { /* Header came from socket; we'll need to patch @@ -624,7 +732,7 @@ static void handle_rx(struct vhost_net *net) &fixup) != sizeof num_buffers) { vq_err(vq, "Failed num_buffers write"); vhost_discard_vq_desc(vq, headcount); - break; + goto out; } vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, headcount); @@ -633,9 +741,10 @@ static void handle_rx(struct vhost_net *net) total_len += vhost_len; if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); - break; + goto out; } } + vhost_net_enable_vq(net, vq); out: mutex_unlock(&vq->mutex); } @@ -714,32 +823,6 @@ static int vhost_net_open(struct inode *inode, struct file *f) return 0; } -static void vhost_net_disable_vq(struct vhost_net *n, - struct vhost_virtqueue *vq) -{ - struct vhost_net_virtqueue *nvq = - container_of(vq, struct vhost_net_virtqueue, vq); - struct vhost_poll *poll = n->poll + (nvq - n->vqs); - if (!vq->private_data) - return; - vhost_poll_stop(poll); -} - -static int vhost_net_enable_vq(struct vhost_net *n, - struct vhost_virtqueue *vq) -{ - struct vhost_net_virtqueue *nvq = - container_of(vq, struct vhost_net_virtqueue, vq); - struct vhost_poll *poll = n->poll + (nvq - n->vqs); - struct socket *sock; - - sock = vq->private_data; - if (!sock) - return 0; - - return vhost_poll_start(poll, sock->file); -} - static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { @@ -917,7 +1000,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) vhost_net_disable_vq(n, vq); vq->private_data = sock; - r = vhost_init_used(vq); + r = vhost_vq_init_access(vq); if (r) goto err_used; r = vhost_net_enable_vq(n, vq); @@ -969,21 +1052,21 @@ static long vhost_net_reset_owner(struct vhost_net *n) struct socket *tx_sock = NULL; struct socket *rx_sock = NULL; long err; - struct vhost_memory *memory; + struct vhost_umem *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; - memory = vhost_dev_reset_owner_prepare(); - if (!memory) { + umem = vhost_dev_reset_owner_prepare(); + if (!umem) { err = -ENOMEM; goto done; } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); vhost_dev_stop(&n->dev); - vhost_dev_reset_owner(&n->dev, memory); + vhost_dev_reset_owner(&n->dev, umem); vhost_net_vq_reset(n); done: mutex_unlock(&n->dev.mutex); @@ -1014,10 +1097,14 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) } mutex_lock(&n->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && - !vhost_log_access_ok(&n->dev)) { - mutex_unlock(&n->dev.mutex); - return -EFAULT; + !vhost_log_access_ok(&n->dev)) + goto out_unlock; + + if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) { + if (vhost_init_device_iotlb(&n->dev, true)) + goto out_unlock; } + for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { mutex_lock(&n->vqs[i].vq.mutex); n->vqs[i].vq.acked_features = features; @@ -1027,6 +1114,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) } mutex_unlock(&n->dev.mutex); return 0; + +out_unlock: + mutex_unlock(&n->dev.mutex); + return -EFAULT; } static long vhost_net_set_owner(struct vhost_net *n) @@ -1100,9 +1191,40 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl, } #endif +static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + int noblock = file->f_flags & O_NONBLOCK; + + return vhost_chr_read_iter(dev, to, noblock); +} + +static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb, + struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + + return vhost_chr_write_iter(dev, from); +} + +static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait) +{ + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + + return vhost_chr_poll(file, dev, wait); +} + static const struct file_operations vhost_net_fops = { .owner = THIS_MODULE, .release = vhost_net_release, + .read_iter = vhost_net_chr_read_iter, + .write_iter = vhost_net_chr_write_iter, + .poll = vhost_net_chr_poll, .unlocked_ioctl = vhost_net_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = vhost_net_compat_ioctl, diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 8fc62a03637a..009315f006bf 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1277,7 +1277,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); vq->private_data = vs_tpg; - vhost_init_used(vq); + vhost_vq_init_access(vq); mutex_unlock(&vq->mutex); } ret = 0; diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index f2882ac98726..388eec4e1a90 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -196,7 +196,7 @@ static long vhost_test_run(struct vhost_test *n, int test) oldpriv = vq->private_data; vq->private_data = priv; - r = vhost_init_used(&n->vqs[index]); + r = vhost_vq_init_access(&n->vqs[index]); mutex_unlock(&vq->mutex); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c54d388310f0..53b1b3cfce84 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -27,6 +27,7 @@ #include <linux/cgroup.h> #include <linux/module.h> #include <linux/sort.h> +#include <linux/interval_tree_generic.h> #include <linux/nospec.h> #include "vhost.h" @@ -35,6 +36,10 @@ static ushort max_mem_regions = 64; module_param(max_mem_regions, ushort, 0444); MODULE_PARM_DESC(max_mem_regions, "Maximum number of memory regions in memory map. (default: 64)"); +static int max_iotlb_entries = 2048; +module_param(max_iotlb_entries, int, 0444); +MODULE_PARM_DESC(max_iotlb_entries, + "Maximum number of iotlb entries. (default: 2048)"); enum { VHOST_MEMORY_F_LOG = 0x1, @@ -43,12 +48,26 @@ enum { #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) +INTERVAL_TREE_DEFINE(struct vhost_umem_node, + rb, __u64, __subtree_last, + START, LAST, , vhost_umem_interval_tree); + #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY -static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { vq->user_be = !virtio_legacy_is_little_endian(); } +static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq) +{ + vq->user_be = true; +} + +static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq) +{ + vq->user_be = false; +} + static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) { struct vhost_vring_state s; @@ -63,7 +82,10 @@ static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) s.num != VHOST_VRING_BIG_ENDIAN) return -EINVAL; - vq->user_be = s.num; + if (s.num == VHOST_VRING_BIG_ENDIAN) + vhost_enable_cross_endian_big(vq); + else + vhost_enable_cross_endian_little(vq); return 0; } @@ -92,7 +114,7 @@ static void vhost_init_is_le(struct vhost_virtqueue *vq) vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; } #else -static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { } @@ -109,11 +131,29 @@ static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, static void vhost_init_is_le(struct vhost_virtqueue *vq) { - if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) - vq->is_le = true; + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) + || virtio_legacy_is_little_endian(); } #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ +static void vhost_reset_is_le(struct vhost_virtqueue *vq) +{ + vhost_init_is_le(vq); +} + +struct vhost_flush_struct { + struct vhost_work work; + struct completion wait_event; +}; + +static void vhost_flush_work(struct vhost_work *work) +{ + struct vhost_flush_struct *s; + + s = container_of(work, struct vhost_flush_struct, work); + complete(&s->wait_event); +} + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -138,11 +178,9 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) { - INIT_LIST_HEAD(&work->node); + clear_bit(VHOST_WORK_QUEUED, &work->flags); work->fn = fn; init_waitqueue_head(&work->done); - work->flushing = 0; - work->queue_seq = work->done_seq = 0; } EXPORT_SYMBOL_GPL(vhost_work_init); @@ -193,31 +231,17 @@ void vhost_poll_stop(struct vhost_poll *poll) } EXPORT_SYMBOL_GPL(vhost_poll_stop); -static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, - unsigned seq) -{ - int left; - - spin_lock_irq(&dev->work_lock); - left = seq - work->done_seq; - spin_unlock_irq(&dev->work_lock); - return left <= 0; -} - void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) { - unsigned seq; - int flushing; + struct vhost_flush_struct flush; + + if (dev->worker) { + init_completion(&flush.wait_event); + vhost_work_init(&flush.work, vhost_flush_work); - spin_lock_irq(&dev->work_lock); - seq = work->queue_seq; - work->flushing++; - spin_unlock_irq(&dev->work_lock); - wait_event(work->done, vhost_work_seq_done(dev, work, seq)); - spin_lock_irq(&dev->work_lock); - flushing = --work->flushing; - spin_unlock_irq(&dev->work_lock); - BUG_ON(flushing < 0); + vhost_work_queue(dev, &flush.work); + wait_for_completion(&flush.wait_event); + } } EXPORT_SYMBOL_GPL(vhost_work_flush); @@ -231,20 +255,27 @@ EXPORT_SYMBOL_GPL(vhost_poll_flush); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { - unsigned long flags; + if (!dev->worker) + return; - spin_lock_irqsave(&dev->work_lock, flags); - if (list_empty(&work->node)) { - list_add_tail(&work->node, &dev->work_list); - work->queue_seq++; - spin_unlock_irqrestore(&dev->work_lock, flags); + if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { + /* We can only add the work to the list after we're + * sure it was not in the list. + */ + smp_mb(); + llist_add(&work->node, &dev->work_list); wake_up_process(dev->worker); - } else { - spin_unlock_irqrestore(&dev->work_lock, flags); } } EXPORT_SYMBOL_GPL(vhost_work_queue); +/* A lockless hint for busy polling code to exit the loop */ +bool vhost_has_work(struct vhost_dev *dev) +{ + return !llist_empty(&dev->work_list); +} +EXPORT_SYMBOL_GPL(vhost_has_work); + void vhost_poll_queue(struct vhost_poll *poll) { vhost_work_queue(poll->dev, &poll->work); @@ -275,16 +306,18 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call_ctx = NULL; vq->call = NULL; vq->log_ctx = NULL; - vq->memory = NULL; - vq->is_le = virtio_legacy_is_little_endian(); - vhost_vq_reset_user_be(vq); + vhost_reset_is_le(vq); + vhost_disable_cross_endian(vq); + vq->busyloop_timeout = 0; + vq->umem = NULL; + vq->iotlb = NULL; } static int vhost_worker(void *data) { struct vhost_dev *dev = data; - struct vhost_work *work = NULL; - unsigned uninitialized_var(seq); + struct vhost_work *work, *work_next; + struct llist_node *node; mm_segment_t oldfs = get_fs(); set_fs(USER_DS); @@ -294,35 +327,25 @@ static int vhost_worker(void *data) /* mb paired w/ kthread_stop */ set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irq(&dev->work_lock); - if (work) { - work->done_seq = seq; - if (work->flushing) - wake_up_all(&work->done); - } - if (kthread_should_stop()) { - spin_unlock_irq(&dev->work_lock); __set_current_state(TASK_RUNNING); break; } - if (!list_empty(&dev->work_list)) { - work = list_first_entry(&dev->work_list, - struct vhost_work, node); - list_del_init(&work->node); - seq = work->queue_seq; - } else - work = NULL; - spin_unlock_irq(&dev->work_lock); - if (work) { + node = llist_del_all(&dev->work_list); + if (!node) + schedule(); + + node = llist_reverse_order(node); + /* make sure flag is seen after deletion */ + smp_wmb(); + llist_for_each_entry_safe(work, work_next, node, node) { + clear_bit(VHOST_WORK_QUEUED, &work->flags); __set_current_state(TASK_RUNNING); work->fn(work); if (need_resched()) schedule(); - } else - schedule(); - + } } unuse_mm(dev->mm); set_fs(oldfs); @@ -381,11 +404,16 @@ void vhost_dev_init(struct vhost_dev *dev, mutex_init(&dev->mutex); dev->log_ctx = NULL; dev->log_file = NULL; - dev->memory = NULL; + dev->umem = NULL; + dev->iotlb = NULL; dev->mm = NULL; - spin_lock_init(&dev->work_lock); - INIT_LIST_HEAD(&dev->work_list); dev->worker = NULL; + init_llist_head(&dev->work_list); + init_waitqueue_head(&dev->wait); + INIT_LIST_HEAD(&dev->read_list); + INIT_LIST_HEAD(&dev->pending_list); + spin_lock_init(&dev->iotlb_lock); + for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; @@ -486,27 +514,36 @@ err_mm: } EXPORT_SYMBOL_GPL(vhost_dev_set_owner); -struct vhost_memory *vhost_dev_reset_owner_prepare(void) +static void *vhost_kvzalloc(unsigned long size) +{ + void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + + if (!n) + n = vzalloc(size); + return n; +} + +struct vhost_umem *vhost_dev_reset_owner_prepare(void) { - return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); + return vhost_kvzalloc(sizeof(struct vhost_umem)); } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); /* Caller should have device mutex */ -void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) +void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem) { int i; vhost_dev_cleanup(dev, true); /* Restore memory to default empty mapping. */ - memory->nregions = 0; - dev->memory = memory; + INIT_LIST_HEAD(&umem->umem_list); + dev->umem = umem; /* We don't need VQ locks below since vhost_dev_cleanup makes sure * VQs aren't running. */ for (i = 0; i < dev->nvqs; ++i) - dev->vqs[i]->memory = memory; + dev->vqs[i]->umem = umem; } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); @@ -523,6 +560,47 @@ void vhost_dev_stop(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_stop); +static void vhost_umem_free(struct vhost_umem *umem, + struct vhost_umem_node *node) +{ + vhost_umem_interval_tree_remove(node, &umem->umem_tree); + list_del(&node->link); + kfree(node); + umem->numem--; +} + +static void vhost_umem_clean(struct vhost_umem *umem) +{ + struct vhost_umem_node *node, *tmp; + + if (!umem) + return; + + list_for_each_entry_safe(node, tmp, &umem->umem_list, link) + vhost_umem_free(umem, node); + + kvfree(umem); +} + +static void vhost_clear_msg(struct vhost_dev *dev) +{ + struct vhost_msg_node *node, *n; + + spin_lock(&dev->iotlb_lock); + + list_for_each_entry_safe(node, n, &dev->read_list, node) { + list_del(&node->node); + kfree(node); + } + + list_for_each_entry_safe(node, n, &dev->pending_list, node) { + list_del(&node->node); + kfree(node); + } + + spin_unlock(&dev->iotlb_lock); +} + /* Caller should have device mutex if and only if locked is set */ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) { @@ -549,9 +627,13 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) fput(dev->log_file); dev->log_file = NULL; /* No one will access memory at this point */ - kvfree(dev->memory); - dev->memory = NULL; - WARN_ON(!list_empty(&dev->work_list)); + vhost_umem_clean(dev->umem); + dev->umem = NULL; + vhost_umem_clean(dev->iotlb); + dev->iotlb = NULL; + vhost_clear_msg(dev); + wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM); + WARN_ON(!llist_empty(&dev->work_list)); if (dev->worker) { kthread_stop(dev->worker); dev->worker = NULL; @@ -575,26 +657,34 @@ static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); } +static bool vhost_overflow(u64 uaddr, u64 size) +{ + /* Make sure 64 bit math will not overflow. */ + return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size; +} + /* Caller should have vq mutex and device mutex. */ -static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, +static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem, int log_all) { - int i; + struct vhost_umem_node *node; - if (!mem) + if (!umem) return 0; - for (i = 0; i < mem->nregions; ++i) { - struct vhost_memory_region *m = mem->regions + i; - unsigned long a = m->userspace_addr; - if (m->memory_size > ULONG_MAX) + list_for_each_entry(node, &umem->umem_list, link) { + unsigned long a = node->userspace_addr; + + if (vhost_overflow(node->userspace_addr, node->size)) return 0; - else if (!access_ok(VERIFY_WRITE, (void __user *)a, - m->memory_size)) + + + if (!access_ok(VERIFY_WRITE, (void __user *)a, + node->size)) return 0; else if (log_all && !log_access_ok(log_base, - m->guest_phys_addr, - m->memory_size)) + node->start, + node->size)) return 0; } return 1; @@ -602,7 +692,7 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, /* Can we switch to this memory table? */ /* Caller should have device mutex but not vq mutex */ -static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, +static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, int log_all) { int i; @@ -615,7 +705,8 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL); /* If ring is inactive, will check when it's enabled. */ if (d->vqs[i]->private_data) - ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log); + ok = vq_memory_access_ok(d->vqs[i]->log_base, + umem, log); else ok = 1; mutex_unlock(&d->vqs[i]->mutex); @@ -625,12 +716,388 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, return 1; } +static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, + struct iovec iov[], int iov_size, int access); + +static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to, + const void *from, unsigned size) +{ + int ret; + + if (!vq->iotlb) + return __copy_to_user(to, from, size); + else { + /* This function should be called after iotlb + * prefetch, which means we're sure that all vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + struct iov_iter t; + ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_WO); + if (ret < 0) + goto out; + iov_iter_init(&t, WRITE, vq->iotlb_iov, ret, size); + ret = copy_to_iter(from, size, &t); + if (ret == size) + ret = 0; + } +out: + return ret; +} + +static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, + void *from, unsigned size) +{ + int ret; + + if (!vq->iotlb) + return __copy_from_user(to, from, size); + else { + /* This function should be called after iotlb + * prefetch, which means we're sure that vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + struct iov_iter f; + ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_RO); + if (ret < 0) { + vq_err(vq, "IOTLB translation failure: uaddr " + "%p size 0x%llx\n", from, + (unsigned long long) size); + goto out; + } + iov_iter_init(&f, READ, vq->iotlb_iov, ret, size); + ret = copy_from_iter(to, size, &f); + if (ret == size) + ret = 0; + } + +out: + return ret; +} + +static void __user *__vhost_get_user(struct vhost_virtqueue *vq, + void *addr, unsigned size) +{ + int ret; + + /* This function should be called after iotlb + * prefetch, which means we're sure that vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_RO); + if (ret < 0) { + vq_err(vq, "IOTLB translation failure: uaddr " + "%p size 0x%llx\n", addr, + (unsigned long long) size); + return NULL; + } + + if (ret != 1 || vq->iotlb_iov[0].iov_len != size) { + vq_err(vq, "Non atomic userspace memory access: uaddr " + "%p size 0x%llx\n", addr, + (unsigned long long) size); + return NULL; + } + + return vq->iotlb_iov[0].iov_base; +} + +#define vhost_put_user(vq, x, ptr) \ +({ \ + int ret = -EFAULT; \ + if (!vq->iotlb) { \ + ret = __put_user(x, ptr); \ + } else { \ + __typeof__(ptr) to = \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + if (to != NULL) \ + ret = __put_user(x, to); \ + else \ + ret = -EFAULT; \ + } \ + ret; \ +}) + +#define vhost_get_user(vq, x, ptr) \ +({ \ + int ret; \ + if (!vq->iotlb) { \ + ret = __get_user(x, ptr); \ + } else { \ + __typeof__(ptr) from = \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + if (from != NULL) \ + ret = __get_user(x, from); \ + else \ + ret = -EFAULT; \ + } \ + ret; \ +}) + +static void vhost_dev_lock_vqs(struct vhost_dev *d) +{ + int i = 0; + for (i = 0; i < d->nvqs; ++i) + mutex_lock_nested(&d->vqs[i]->mutex, i); +} + +static void vhost_dev_unlock_vqs(struct vhost_dev *d) +{ + int i = 0; + for (i = 0; i < d->nvqs; ++i) + mutex_unlock(&d->vqs[i]->mutex); +} + +static int vhost_new_umem_range(struct vhost_umem *umem, + u64 start, u64 size, u64 end, + u64 userspace_addr, int perm) +{ + struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC); + + if (!node) + return -ENOMEM; + + if (umem->numem == max_iotlb_entries) { + tmp = list_first_entry(&umem->umem_list, typeof(*tmp), link); + vhost_umem_free(umem, tmp); + } + + node->start = start; + node->size = size; + node->last = end; + node->userspace_addr = userspace_addr; + node->perm = perm; + INIT_LIST_HEAD(&node->link); + list_add_tail(&node->link, &umem->umem_list); + vhost_umem_interval_tree_insert(node, &umem->umem_tree); + umem->numem++; + + return 0; +} + +static void vhost_del_umem_range(struct vhost_umem *umem, + u64 start, u64 end) +{ + struct vhost_umem_node *node; + + while ((node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + start, end))) + vhost_umem_free(umem, node); +} + +static void vhost_iotlb_notify_vq(struct vhost_dev *d, + struct vhost_iotlb_msg *msg) +{ + struct vhost_msg_node *node, *n; + + spin_lock(&d->iotlb_lock); + + list_for_each_entry_safe(node, n, &d->pending_list, node) { + struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; + if (msg->iova <= vq_msg->iova && + msg->iova + msg->size - 1 >= vq_msg->iova && + vq_msg->type == VHOST_IOTLB_MISS) { + vhost_poll_queue(&node->vq->poll); + list_del(&node->node); + kfree(node); + } + } + + spin_unlock(&d->iotlb_lock); +} + +static int umem_access_ok(u64 uaddr, u64 size, int access) +{ + unsigned long a = uaddr; + + /* Make sure 64 bit math will not overflow. */ + if (vhost_overflow(uaddr, size)) + return -EFAULT; + + if ((access & VHOST_ACCESS_RO) && + !access_ok(VERIFY_READ, (void __user *)a, size)) + return -EFAULT; + if ((access & VHOST_ACCESS_WO) && + !access_ok(VERIFY_WRITE, (void __user *)a, size)) + return -EFAULT; + return 0; +} + +int vhost_process_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *msg) +{ + int ret = 0; + + mutex_lock(&dev->mutex); + vhost_dev_lock_vqs(dev); + switch (msg->type) { + case VHOST_IOTLB_UPDATE: + if (!dev->iotlb) { + ret = -EFAULT; + break; + } + if (umem_access_ok(msg->uaddr, msg->size, msg->perm)) { + ret = -EFAULT; + break; + } + if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size, + msg->iova + msg->size - 1, + msg->uaddr, msg->perm)) { + ret = -ENOMEM; + break; + } + vhost_iotlb_notify_vq(dev, msg); + break; + case VHOST_IOTLB_INVALIDATE: + vhost_del_umem_range(dev->iotlb, msg->iova, + msg->iova + msg->size - 1); + break; + default: + ret = -EINVAL; + break; + } + + vhost_dev_unlock_vqs(dev); + mutex_unlock(&dev->mutex); + + return ret; +} +ssize_t vhost_chr_write_iter(struct vhost_dev *dev, + struct iov_iter *from) +{ + struct vhost_msg_node node; + unsigned size = sizeof(struct vhost_msg); + size_t ret; + int err; + + if (iov_iter_count(from) < size) + return 0; + ret = copy_from_iter(&node.msg, size, from); + if (ret != size) + goto done; + + switch (node.msg.type) { + case VHOST_IOTLB_MSG: + err = vhost_process_iotlb_msg(dev, &node.msg.iotlb); + if (err) + ret = err; + break; + default: + ret = -EINVAL; + break; + } + +done: + return ret; +} +EXPORT_SYMBOL(vhost_chr_write_iter); + +unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev, + poll_table *wait) +{ + unsigned int mask = 0; + + poll_wait(file, &dev->wait, wait); + + if (!list_empty(&dev->read_list)) + mask |= POLLIN | POLLRDNORM; + + return mask; +} +EXPORT_SYMBOL(vhost_chr_poll); + +ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, + int noblock) +{ + DEFINE_WAIT(wait); + struct vhost_msg_node *node; + ssize_t ret = 0; + unsigned size = sizeof(struct vhost_msg); + + if (iov_iter_count(to) < size) + return 0; + + while (1) { + if (!noblock) + prepare_to_wait(&dev->wait, &wait, + TASK_INTERRUPTIBLE); + + node = vhost_dequeue_msg(dev, &dev->read_list); + if (node) + break; + if (noblock) { + ret = -EAGAIN; + break; + } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + if (!dev->iotlb) { + ret = -EBADFD; + break; + } + + schedule(); + } + + if (!noblock) + finish_wait(&dev->wait, &wait); + + if (node) { + ret = copy_to_iter(&node->msg, size, to); + + if (ret != size || node->msg.type != VHOST_IOTLB_MISS) { + kfree(node); + return ret; + } + + vhost_enqueue_msg(dev, &dev->pending_list, node); + } + + return ret; +} +EXPORT_SYMBOL_GPL(vhost_chr_read_iter); + +static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) +{ + struct vhost_dev *dev = vq->dev; + struct vhost_msg_node *node; + struct vhost_iotlb_msg *msg; + + node = vhost_new_msg(vq, VHOST_IOTLB_MISS); + if (!node) + return -ENOMEM; + + msg = &node->msg.iotlb; + msg->type = VHOST_IOTLB_MISS; + msg->iova = iova; + msg->perm = access; + + vhost_enqueue_msg(dev, &dev->read_list, node); + + return 0; +} + static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, struct vring_desc __user *desc, struct vring_avail __user *avail, struct vring_used __user *used) + { size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + return access_ok(VERIFY_READ, desc, num * sizeof *desc) && access_ok(VERIFY_READ, avail, sizeof *avail + num * sizeof *avail->ring + s) && @@ -638,11 +1105,59 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, sizeof *used + num * sizeof *used->ring + s); } +static int iotlb_access_ok(struct vhost_virtqueue *vq, + int access, u64 addr, u64 len) +{ + const struct vhost_umem_node *node; + struct vhost_umem *umem = vq->iotlb; + u64 s = 0, size; + + while (len > s) { + node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + addr, + addr + len - 1); + if (node == NULL || node->start > addr) { + vhost_iotlb_miss(vq, addr, access); + return false; + } else if (!(node->perm & access)) { + /* Report the possible access violation by + * request another translation from userspace. + */ + return false; + } + + size = node->size - addr + node->start; + s += size; + addr += size; + } + + return true; +} + +int vq_iotlb_prefetch(struct vhost_virtqueue *vq) +{ + size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + unsigned int num = vq->num; + + if (!vq->iotlb) + return 1; + + return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, + num * sizeof *vq->desc) && + iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, + sizeof *vq->avail + + num * sizeof *vq->avail->ring + s) && + iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, + sizeof *vq->used + + num * sizeof *vq->used->ring + s); +} +EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); + /* Can we log writes? */ /* Caller should have device mutex but not vq mutex */ int vhost_log_access_ok(struct vhost_dev *dev) { - return memory_access_ok(dev, dev->memory, 1); + return memory_access_ok(dev, dev->umem, 1); } EXPORT_SYMBOL_GPL(vhost_log_access_ok); @@ -653,7 +1168,7 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, { size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; - return vq_memory_access_ok(log_base, vq->memory, + return vq_memory_access_ok(log_base, vq->umem, vhost_has_feature(vq, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + @@ -664,33 +1179,36 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, /* Caller should have vq mutex and device mutex */ int vhost_vq_access_ok(struct vhost_virtqueue *vq) { - return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used) && - vq_log_access_ok(vq, vq->log_base); -} -EXPORT_SYMBOL_GPL(vhost_vq_access_ok); + if (!vq_log_access_ok(vq, vq->log_base)) + return 0; -static int vhost_memory_reg_sort_cmp(const void *p1, const void *p2) -{ - const struct vhost_memory_region *r1 = p1, *r2 = p2; - if (r1->guest_phys_addr < r2->guest_phys_addr) + /* Access validation occurs at prefetch time with IOTLB */ + if (vq->iotlb) return 1; - if (r1->guest_phys_addr > r2->guest_phys_addr) - return -1; - return 0; + + return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); } +EXPORT_SYMBOL_GPL(vhost_vq_access_ok); -static void *vhost_kvzalloc(unsigned long size) +static struct vhost_umem *vhost_umem_alloc(void) { - void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + struct vhost_umem *umem = vhost_kvzalloc(sizeof(*umem)); - if (!n) - n = vzalloc(size); - return n; + if (!umem) + return NULL; + + umem->umem_tree = RB_ROOT; + umem->numem = 0; + INIT_LIST_HEAD(&umem->umem_list); + + return umem; } static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { - struct vhost_memory mem, *newmem, *oldmem; + struct vhost_memory mem, *newmem; + struct vhost_memory_region *region; + struct vhost_umem *newumem, *oldumem; unsigned long size = offsetof(struct vhost_memory, regions); int i; @@ -710,24 +1228,47 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) kvfree(newmem); return -EFAULT; } - sort(newmem->regions, newmem->nregions, sizeof(*newmem->regions), - vhost_memory_reg_sort_cmp, NULL); - if (!memory_access_ok(d, newmem, 0)) { + newumem = vhost_umem_alloc(); + if (!newumem) { kvfree(newmem); - return -EFAULT; + return -ENOMEM; + } + + for (region = newmem->regions; + region < newmem->regions + mem.nregions; + region++) { + if (vhost_new_umem_range(newumem, + region->guest_phys_addr, + region->memory_size, + region->guest_phys_addr + + region->memory_size - 1, + region->userspace_addr, + VHOST_ACCESS_RW)) + goto err; } - oldmem = d->memory; - d->memory = newmem; + + if (!memory_access_ok(d, newumem, 0)) + goto err; + + oldumem = d->umem; + d->umem = newumem; /* All memory accesses are done under some VQ mutex. */ for (i = 0; i < d->nvqs; ++i) { mutex_lock(&d->vqs[i]->mutex); - d->vqs[i]->memory = newmem; + d->vqs[i]->umem = newumem; mutex_unlock(&d->vqs[i]->mutex); } - kvfree(oldmem); + + kvfree(newmem); + vhost_umem_clean(oldumem); return 0; + +err: + vhost_umem_clean(newumem); + kvfree(newmem); + return -EFAULT; } long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) @@ -913,6 +1454,19 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) case VHOST_GET_VRING_ENDIAN: r = vhost_get_vring_endian(vq, idx, argp); break; + case VHOST_SET_VRING_BUSYLOOP_TIMEOUT: + if (copy_from_user(&s, argp, sizeof(s))) { + r = -EFAULT; + break; + } + vq->busyloop_timeout = s.num; + break; + case VHOST_GET_VRING_BUSYLOOP_TIMEOUT: + s.index = idx; + s.num = vq->busyloop_timeout; + if (copy_to_user(argp, &s, sizeof(s))) + r = -EFAULT; + break; default: r = -ENOIOCTLCMD; } @@ -936,6 +1490,30 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) } EXPORT_SYMBOL_GPL(vhost_vring_ioctl); +int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled) +{ + struct vhost_umem *niotlb, *oiotlb; + int i; + + niotlb = vhost_umem_alloc(); + if (!niotlb) + return -ENOMEM; + + oiotlb = d->iotlb; + d->iotlb = niotlb; + + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i]->mutex); + d->vqs[i]->iotlb = niotlb; + mutex_unlock(&d->vqs[i]->mutex); + } + + vhost_umem_clean(oiotlb); + + return 0; +} +EXPORT_SYMBOL_GPL(vhost_init_device_iotlb); + /* Caller must have device mutex */ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { @@ -1018,28 +1596,6 @@ done: } EXPORT_SYMBOL_GPL(vhost_dev_ioctl); -static const struct vhost_memory_region *find_region(struct vhost_memory *mem, - __u64 addr, __u32 len) -{ - const struct vhost_memory_region *reg; - int start = 0, end = mem->nregions; - - while (start < end) { - int slot = start + (end - start) / 2; - reg = mem->regions + slot; - if (addr >= reg->guest_phys_addr) - end = slot; - else - start = slot + 1; - } - - reg = mem->regions + start; - if (addr >= reg->guest_phys_addr && - reg->guest_phys_addr + reg->memory_size > addr) - return reg; - return NULL; -} - /* TODO: This is really inefficient. We need something like get_user() * (instruction directly accesses the data, with an exception table entry * returning -EFAULT). See Documentation/x86/exception-tables.txt. @@ -1118,7 +1674,8 @@ EXPORT_SYMBOL_GPL(vhost_log_write); static int vhost_update_used_flags(struct vhost_virtqueue *vq) { void __user *used; - if (__put_user(cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags) < 0) + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), + &vq->used->flags) < 0) return -EFAULT; if (unlikely(vq->log_used)) { /* Make sure the flag is seen before log. */ @@ -1136,7 +1693,8 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) { - if (__put_user(cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq))) + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), + vhost_avail_event(vq))) return -EFAULT; if (unlikely(vq->log_used)) { void __user *used; @@ -1153,62 +1711,84 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) return 0; } -int vhost_init_used(struct vhost_virtqueue *vq) +int vhost_vq_init_access(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; - if (!vq->private_data) { - vq->is_le = virtio_legacy_is_little_endian(); + bool is_le = vq->is_le; + + if (!vq->private_data) return 0; - } vhost_init_is_le(vq); r = vhost_update_used_flags(vq); if (r) - return r; + goto err; vq->signalled_used_valid = false; - if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) - return -EFAULT; - r = __get_user(last_used_idx, &vq->used->idx); - if (r) - return r; + if (!vq->iotlb && + !access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) { + r = -EFAULT; + goto err; + } + r = vhost_get_user(vq, last_used_idx, &vq->used->idx); + if (r) { + vq_err(vq, "Can't access used idx at %p\n", + &vq->used->idx); + goto err; + } vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); return 0; + +err: + vq->is_le = is_le; + return r; } -EXPORT_SYMBOL_GPL(vhost_init_used); +EXPORT_SYMBOL_GPL(vhost_vq_init_access); static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, - struct iovec iov[], int iov_size) + struct iovec iov[], int iov_size, int access) { - const struct vhost_memory_region *reg; - struct vhost_memory *mem; + const struct vhost_umem_node *node; + struct vhost_dev *dev = vq->dev; + struct vhost_umem *umem = dev->iotlb ? dev->iotlb : dev->umem; struct iovec *_iov; u64 s = 0; int ret = 0; - mem = vq->memory; while ((u64)len > s) { u64 size; if (unlikely(ret >= iov_size)) { ret = -ENOBUFS; break; } - reg = find_region(mem, addr, len); - if (unlikely(!reg)) { - ret = -EFAULT; + + node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + addr, addr + len - 1); + if (node == NULL || node->start > addr) { + if (umem != dev->iotlb) { + ret = -EFAULT; + break; + } + ret = -EAGAIN; + break; + } else if (!(node->perm & access)) { + ret = -EPERM; break; } + _iov = iov + ret; - size = reg->memory_size - addr + reg->guest_phys_addr; + size = node->size - addr + node->start; _iov->iov_len = min((u64)len - s, size); _iov->iov_base = (void __user *)(unsigned long) - (reg->userspace_addr + addr - reg->guest_phys_addr); + (node->userspace_addr + addr - node->start); s += size; addr += size; ++ret; } + if (ret == -EAGAIN) + vhost_iotlb_miss(vq, addr, access); return ret; } @@ -1243,7 +1823,7 @@ static int get_indirect(struct vhost_virtqueue *vq, unsigned int i = 0, count, found = 0; u32 len = vhost32_to_cpu(vq, indirect->len); struct iov_iter from; - int ret; + int ret, access; /* Sanity check */ if (unlikely(len % sizeof desc)) { @@ -1255,9 +1835,10 @@ static int get_indirect(struct vhost_virtqueue *vq, } ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect, - UIO_MAXIOV); + UIO_MAXIOV, VHOST_ACCESS_RO); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d in indirect.\n", ret); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d in indirect.\n", ret); return ret; } iov_iter_init(&from, READ, vq->indirect, ret, len); @@ -1295,16 +1876,22 @@ static int get_indirect(struct vhost_virtqueue *vq, return -EINVAL; } + if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) + access = VHOST_ACCESS_WO; + else + access = VHOST_ACCESS_RO; + ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, - iov_size - iov_count); + iov_size - iov_count, access); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d indirect idx %d\n", - ret, i); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d indirect idx %d\n", + ret, i); return ret; } /* If this is an input descriptor, increment that count. */ - if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) { + if (access == VHOST_ACCESS_WO) { *in_num += ret; if (unlikely(log)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); @@ -1343,11 +1930,11 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, u16 last_avail_idx; __virtio16 avail_idx; __virtio16 ring_head; - int ret; + int ret, access; /* Check it isn't doing very strange things with descriptor numbers. */ last_avail_idx = vq->last_avail_idx; - if (unlikely(__get_user(avail_idx, &vq->avail->idx))) { + if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) { vq_err(vq, "Failed to access avail idx at %p\n", &vq->avail->idx); return -EFAULT; @@ -1369,8 +1956,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - if (unlikely(__get_user(ring_head, - &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { + if (unlikely(vhost_get_user(vq, ring_head, + &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); @@ -1405,7 +1992,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, i, vq->num, head); return -EINVAL; } - ret = __copy_from_user(&desc, vq->desc + i, sizeof desc); + ret = vhost_copy_from_user(vq, &desc, vq->desc + i, + sizeof desc); if (unlikely(ret)) { vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", i, vq->desc + i); @@ -1416,22 +2004,28 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, out_num, in_num, log, log_num, &desc); if (unlikely(ret < 0)) { - vq_err(vq, "Failure detected " - "in indirect descriptor at idx %d\n", i); + if (ret != -EAGAIN) + vq_err(vq, "Failure detected " + "in indirect descriptor at idx %d\n", i); return ret; } continue; } + if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) + access = VHOST_ACCESS_WO; + else + access = VHOST_ACCESS_RO; ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, - iov_size - iov_count); + iov_size - iov_count, access); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d descriptor idx %d\n", - ret, i); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d descriptor idx %d\n", + ret, i); return ret; } - if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) { + if (access == VHOST_ACCESS_WO) { /* If this is an input descriptor, * increment that count. */ *in_num += ret; @@ -1493,15 +2087,15 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, start = vq->last_used_idx & (vq->num - 1); used = vq->used->ring + start; if (count == 1) { - if (__put_user(heads[0].id, &used->id)) { + if (vhost_put_user(vq, heads[0].id, &used->id)) { vq_err(vq, "Failed to write used id"); return -EFAULT; } - if (__put_user(heads[0].len, &used->len)) { + if (vhost_put_user(vq, heads[0].len, &used->len)) { vq_err(vq, "Failed to write used len"); return -EFAULT; } - } else if (__copy_to_user(used, heads, count * sizeof *used)) { + } else if (vhost_copy_to_user(vq, used, heads, count * sizeof *used)) { vq_err(vq, "Failed to write used"); return -EFAULT; } @@ -1545,11 +2139,14 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, /* Make sure buffer is written before we update index. */ smp_wmb(); - if (__put_user(cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx)) { + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), + &vq->used->idx)) { vq_err(vq, "Failed to increment used idx"); return -EFAULT; } if (unlikely(vq->log_used)) { + /* Make sure used idx is seen before log. */ + smp_wmb(); /* Log used index update. */ log_write(vq->log_base, vq->log_addr + offsetof(struct vring_used, idx), @@ -1577,7 +2174,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { __virtio16 flags; - if (__get_user(flags, &vq->avail->flags)) { + if (vhost_get_user(vq, flags, &vq->avail->flags)) { vq_err(vq, "Failed to get flags"); return true; } @@ -1591,7 +2188,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (unlikely(!v)) return true; - if (__get_user(event, vhost_used_event(vq))) { + if (vhost_get_user(vq, event, vhost_used_event(vq))) { vq_err(vq, "Failed to get used event idx"); return true; } @@ -1627,6 +2224,20 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); +/* return true if we're sure that avaiable ring is empty */ +bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) +{ + __virtio16 avail_idx; + int r; + + r = vhost_get_user(vq, avail_idx, &vq->avail->idx); + if (r) + return false; + + return vhost16_to_cpu(vq, avail_idx) == vq->avail_idx; +} +EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); + /* OK, now we need to know about added descriptors. */ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -1654,7 +2265,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); - r = __get_user(avail_idx, &vq->avail->idx); + r = vhost_get_user(vq, avail_idx, &vq->avail->idx); if (r) { vq_err(vq, "Failed to check avail idx at %p: %d\n", &vq->avail->idx, r); @@ -1682,6 +2293,50 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) } EXPORT_SYMBOL_GPL(vhost_disable_notify); +/* Create a new message. */ +struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) +{ + struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); + if (!node) + return NULL; + + /* Make sure all padding within the structure is initialized. */ + memset(&node->msg, 0, sizeof node->msg); + node->vq = vq; + node->msg.type = type; + return node; +} +EXPORT_SYMBOL_GPL(vhost_new_msg); + +void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head, + struct vhost_msg_node *node) +{ + spin_lock(&dev->iotlb_lock); + list_add_tail(&node->node, head); + spin_unlock(&dev->iotlb_lock); + + wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM); +} +EXPORT_SYMBOL_GPL(vhost_enqueue_msg); + +struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, + struct list_head *head) +{ + struct vhost_msg_node *node = NULL; + + spin_lock(&dev->iotlb_lock); + if (!list_empty(head)) { + node = list_first_entry(head, struct vhost_msg_node, + node); + list_del(&node->node); + } + spin_unlock(&dev->iotlb_lock); + + return node; +} +EXPORT_SYMBOL_GPL(vhost_dequeue_msg); + + static int __init vhost_init(void) { return 0; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index d3f767448a72..78f3c5fc02e4 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -15,13 +15,15 @@ struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); +#define VHOST_WORK_QUEUED 1 struct vhost_work { - struct list_head node; + struct llist_node node; vhost_work_fn_t fn; wait_queue_head_t done; int flushing; unsigned queue_seq; unsigned done_seq; + unsigned long flags; }; /* Poll a file (eventfd or socket) */ @@ -37,6 +39,7 @@ struct vhost_poll { void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work); +bool vhost_has_work(struct vhost_dev *dev); void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, unsigned long mask, struct vhost_dev *dev); @@ -52,6 +55,27 @@ struct vhost_log { u64 len; }; +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) + +struct vhost_umem_node { + struct rb_node rb; + struct list_head link; + __u64 start; + __u64 last; + __u64 size; + __u64 userspace_addr; + __u32 perm; + __u32 flags_padding; + __u64 __subtree_last; +}; + +struct vhost_umem { + struct rb_root umem_tree; + struct list_head umem_list; + int numem; +}; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -97,10 +121,12 @@ struct vhost_virtqueue { u64 log_addr; struct iovec iov[UIO_MAXIOV]; + struct iovec iotlb_iov[64]; struct iovec *indirect; struct vring_used_elem *heads; /* Protected by virtqueue mutex. */ - struct vhost_memory *memory; + struct vhost_umem *umem; + struct vhost_umem *iotlb; void *private_data; u64 acked_features; /* Log write descriptors */ @@ -114,27 +140,38 @@ struct vhost_virtqueue { /* Ring endianness requested by userspace for cross-endian support. */ bool user_be; #endif + u32 busyloop_timeout; +}; + +struct vhost_msg_node { + struct vhost_msg msg; + struct vhost_virtqueue *vq; + struct list_head node; }; struct vhost_dev { - struct vhost_memory *memory; struct mm_struct *mm; struct mutex mutex; struct vhost_virtqueue **vqs; int nvqs; struct file *log_file; struct eventfd_ctx *log_ctx; - spinlock_t work_lock; - struct list_head work_list; + struct llist_head work_list; struct task_struct *worker; + struct vhost_umem *umem; + struct vhost_umem *iotlb; + spinlock_t iotlb_lock; + struct list_head read_list; + struct list_head pending_list; + wait_queue_head_t wait; }; void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); long vhost_dev_set_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev); long vhost_dev_check_owner(struct vhost_dev *); -struct vhost_memory *vhost_dev_reset_owner_prepare(void); -void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *); +struct vhost_umem *vhost_dev_reset_owner_prepare(void); +void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *); void vhost_dev_cleanup(struct vhost_dev *, bool locked); void vhost_dev_stop(struct vhost_dev *); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); @@ -148,7 +185,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *, struct vhost_log *log, unsigned int *log_num); void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); -int vhost_init_used(struct vhost_virtqueue *); +int vhost_vq_init_access(struct vhost_virtqueue *); int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); @@ -158,10 +195,26 @@ void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); +bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *); bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len); +int vq_iotlb_prefetch(struct vhost_virtqueue *vq); + +struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); +void vhost_enqueue_msg(struct vhost_dev *dev, + struct list_head *head, + struct vhost_msg_node *node); +struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, + struct list_head *head); +unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev, + poll_table *wait); +ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, + int noblock); +ssize_t vhost_chr_write_iter(struct vhost_dev *dev, + struct iov_iter *from); +int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled); #define vq_err(vq, fmt, ...) do { \ pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c new file mode 100644 index 000000000000..72e914de473e --- /dev/null +++ b/drivers/vhost/vsock.c @@ -0,0 +1,797 @@ +/* + * vhost transport for vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He <asias@redhat.com> + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include <linux/miscdevice.h> +#include <linux/atomic.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/vmalloc.h> +#include <net/sock.h> +#include <linux/virtio_vsock.h> +#include <linux/vhost.h> +#include <linux/hashtable.h> + +#include <net/af_vsock.h> +#include "vhost.h" + +#define VHOST_VSOCK_DEFAULT_HOST_CID 2 + +enum { + VHOST_VSOCK_FEATURES = VHOST_FEATURES, +}; + +/* Used to track all the vhost_vsock instances on the system. */ +static DEFINE_SPINLOCK(vhost_vsock_lock); +static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8); + +struct vhost_vsock { + struct vhost_dev dev; + struct vhost_virtqueue vqs[2]; + + /* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */ + struct hlist_node hash; + + struct vhost_work send_pkt_work; + spinlock_t send_pkt_list_lock; + struct list_head send_pkt_list; /* host->guest pending packets */ + + atomic_t queued_replies; + + u32 guest_cid; +}; + +static u32 vhost_transport_get_local_cid(void) +{ + return VHOST_VSOCK_DEFAULT_HOST_CID; +} + +/* Callers that dereference the return value must hold vhost_vsock_lock or the + * RCU read lock. + */ +static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) +{ + struct vhost_vsock *vsock; + + hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) { + u32 other_cid = vsock->guest_cid; + + /* Skip instances that have no CID yet */ + if (other_cid == 0) + continue; + + if (other_cid == guest_cid) { + return vsock; + } + } + + return NULL; +} + +static void +vhost_transport_do_send_pkt(struct vhost_vsock *vsock, + struct vhost_virtqueue *vq) +{ + struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + bool added = false; + bool restart_tx = false; + + mutex_lock(&vq->mutex); + + if (!vq->private_data) + goto out; + + /* Avoid further vmexits, we're already processing the virtqueue */ + vhost_disable_notify(&vsock->dev, vq); + + for (;;) { + struct virtio_vsock_pkt *pkt; + struct iov_iter iov_iter; + unsigned out, in; + size_t nbytes; + size_t len; + int head; + + spin_lock_bh(&vsock->send_pkt_list_lock); + if (list_empty(&vsock->send_pkt_list)) { + spin_unlock_bh(&vsock->send_pkt_list_lock); + vhost_enable_notify(&vsock->dev, vq); + break; + } + + pkt = list_first_entry(&vsock->send_pkt_list, + struct virtio_vsock_pkt, list); + list_del_init(&pkt->list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + &out, &in, NULL, NULL); + if (head < 0) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + break; + } + + if (head == vq->num) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + /* We cannot finish yet if more buffers snuck in while + * re-enabling notify. + */ + if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { + vhost_disable_notify(&vsock->dev, vq); + continue; + } + break; + } + + if (out) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Expected 0 output buffers, got %u\n", out); + break; + } + + len = iov_length(&vq->iov[out], in); + iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); + + nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); + if (nbytes != sizeof(pkt->hdr)) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Faulted on copying pkt hdr\n"); + break; + } + + nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); + if (nbytes != pkt->len) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Faulted on copying pkt buf\n"); + break; + } + + vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); + added = true; + + if (pkt->reply) { + int val; + + val = atomic_dec_return(&vsock->queued_replies); + + /* Do we have resources to resume tx processing? */ + if (val + 1 == tx_vq->num) + restart_tx = true; + } + + virtio_transport_free_pkt(pkt); + } + if (added) + vhost_signal(&vsock->dev, vq); + +out: + mutex_unlock(&vq->mutex); + + if (restart_tx) + vhost_poll_queue(&tx_vq->poll); +} + +static void vhost_transport_send_pkt_work(struct vhost_work *work) +{ + struct vhost_virtqueue *vq; + struct vhost_vsock *vsock; + + vsock = container_of(work, struct vhost_vsock, send_pkt_work); + vq = &vsock->vqs[VSOCK_VQ_RX]; + + vhost_transport_do_send_pkt(vsock, vq); +} + +static int +vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) +{ + struct vhost_vsock *vsock; + struct vhost_virtqueue *vq; + int len = pkt->len; + + rcu_read_lock(); + + /* Find the vhost_vsock according to guest context id */ + vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); + if (!vsock) { + rcu_read_unlock(); + virtio_transport_free_pkt(pkt); + return -ENODEV; + } + + vq = &vsock->vqs[VSOCK_VQ_RX]; + + if (pkt->reply) + atomic_inc(&vsock->queued_replies); + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add_tail(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + + rcu_read_unlock(); + return len; +} + +static int +vhost_transport_cancel_pkt(struct vsock_sock *vsk) +{ + struct vhost_vsock *vsock; + struct virtio_vsock_pkt *pkt, *n; + int cnt = 0; + int ret = -ENODEV; + LIST_HEAD(freeme); + + rcu_read_lock(); + + /* Find the vhost_vsock according to guest context id */ + vsock = vhost_vsock_get(vsk->remote_addr.svm_cid); + if (!vsock) + goto out; + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { + if (pkt->vsk != vsk) + continue; + list_move(&pkt->list, &freeme); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + list_for_each_entry_safe(pkt, n, &freeme, list) { + if (pkt->reply) + cnt++; + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + + if (cnt) { + struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + int new_cnt; + + new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); + if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num) + vhost_poll_queue(&tx_vq->poll); + } + + ret = 0; +out: + rcu_read_unlock(); + return ret; +} + +static struct virtio_vsock_pkt * +vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, + unsigned int out, unsigned int in) +{ + struct virtio_vsock_pkt *pkt; + struct iov_iter iov_iter; + size_t nbytes; + size_t len; + + if (in != 0) { + vq_err(vq, "Expected 0 input buffers, got %u\n", in); + return NULL; + } + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return NULL; + + len = iov_length(vq->iov, out); + iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); + + nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); + if (nbytes != sizeof(pkt->hdr)) { + vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", + sizeof(pkt->hdr), nbytes); + kfree(pkt); + return NULL; + } + + if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) + pkt->len = le32_to_cpu(pkt->hdr.len); + + /* No payload */ + if (!pkt->len) + return pkt; + + /* The pkt is too big */ + if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { + kfree(pkt); + return NULL; + } + + pkt->buf = kmalloc(pkt->len, GFP_KERNEL); + if (!pkt->buf) { + kfree(pkt); + return NULL; + } + + nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); + if (nbytes != pkt->len) { + vq_err(vq, "Expected %u byte payload, got %zu bytes\n", + pkt->len, nbytes); + virtio_transport_free_pkt(pkt); + return NULL; + } + + return pkt; +} + +/* Is there space left for replies to rx packets? */ +static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) +{ + struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX]; + int val; + + smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ + val = atomic_read(&vsock->queued_replies); + + return val < vq->num; +} + +static void vhost_vsock_handle_tx_kick(struct vhost_work *work) +{ + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, + dev); + struct virtio_vsock_pkt *pkt; + int head; + unsigned int out, in; + bool added = false; + + mutex_lock(&vq->mutex); + + if (!vq->private_data) + goto out; + + vhost_disable_notify(&vsock->dev, vq); + for (;;) { + u32 len; + + if (!vhost_vsock_more_replies(vsock)) { + /* Stop tx until the device processes already + * pending replies. Leave tx virtqueue + * callbacks disabled. + */ + goto no_more_replies; + } + + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + &out, &in, NULL, NULL); + if (head < 0) + break; + + if (head == vq->num) { + if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { + vhost_disable_notify(&vsock->dev, vq); + continue; + } + break; + } + + pkt = vhost_vsock_alloc_pkt(vq, out, in); + if (!pkt) { + vq_err(vq, "Faulted on pkt\n"); + continue; + } + + len = pkt->len; + + /* Only accept correctly addressed packets */ + if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) + virtio_transport_recv_pkt(pkt); + else + virtio_transport_free_pkt(pkt); + + vhost_add_used(vq, head, sizeof(pkt->hdr) + len); + added = true; + } + +no_more_replies: + if (added) + vhost_signal(&vsock->dev, vq); + +out: + mutex_unlock(&vq->mutex); +} + +static void vhost_vsock_handle_rx_kick(struct vhost_work *work) +{ + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, + dev); + + vhost_transport_do_send_pkt(vsock, vq); +} + +static int vhost_vsock_start(struct vhost_vsock *vsock) +{ + struct vhost_virtqueue *vq; + size_t i; + int ret; + + mutex_lock(&vsock->dev.mutex); + + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + + if (!vhost_vq_access_ok(vq)) { + ret = -EFAULT; + goto err_vq; + } + + if (!vq->private_data) { + vq->private_data = vsock; + ret = vhost_vq_init_access(vq); + if (ret) + goto err_vq; + } + + mutex_unlock(&vq->mutex); + } + + mutex_unlock(&vsock->dev.mutex); + return 0; + +err_vq: + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + } +err: + mutex_unlock(&vsock->dev.mutex); + return ret; +} + +static int vhost_vsock_stop(struct vhost_vsock *vsock) +{ + size_t i; + int ret; + + mutex_lock(&vsock->dev.mutex); + + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + struct vhost_virtqueue *vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + } + +err: + mutex_unlock(&vsock->dev.mutex); + return ret; +} + +static void vhost_vsock_free(struct vhost_vsock *vsock) +{ + kvfree(vsock); +} + +static int vhost_vsock_dev_open(struct inode *inode, struct file *file) +{ + struct vhost_virtqueue **vqs; + struct vhost_vsock *vsock; + int ret; + + /* This struct is large and allocation could fail, fall back to vmalloc + * if there is no other way. + */ + vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!vsock) { + vsock = vmalloc(sizeof(*vsock)); + if (!vsock) + return -ENOMEM; + } + + vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); + if (!vqs) { + ret = -ENOMEM; + goto out; + } + + vsock->guest_cid = 0; /* no CID assigned yet */ + + atomic_set(&vsock->queued_replies, 0); + + vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; + vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; + vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; + vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; + + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); + + file->private_data = vsock; + spin_lock_init(&vsock->send_pkt_list_lock); + INIT_LIST_HEAD(&vsock->send_pkt_list); + vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); + return 0; + +out: + vhost_vsock_free(vsock); + return ret; +} + +static void vhost_vsock_flush(struct vhost_vsock *vsock) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) + if (vsock->vqs[i].handle_kick) + vhost_poll_flush(&vsock->vqs[i].poll); + vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); +} + +static void vhost_vsock_reset_orphans(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + /* vmci_transport.c doesn't take sk_lock here either. At least we're + * under vsock_table_lock so the sock cannot disappear while we're + * executing. + */ + + /* If the peer is still valid, no need to reset connection */ + if (vhost_vsock_get(vsk->remote_addr.svm_cid)) + return; + + /* If the close timeout is pending, let it expire. This avoids races + * with the timeout callback. + */ + if (vsk->close_work_scheduled) + return; + + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); +} + +static int vhost_vsock_dev_release(struct inode *inode, struct file *file) +{ + struct vhost_vsock *vsock = file->private_data; + + spin_lock_bh(&vhost_vsock_lock); + if (vsock->guest_cid) + hash_del_rcu(&vsock->hash); + spin_unlock_bh(&vhost_vsock_lock); + + /* Wait for other CPUs to finish using vsock */ + synchronize_rcu(); + + /* Iterating over all connections for all CIDs to find orphans is + * inefficient. Room for improvement here. */ + vsock_for_each_connected_socket(vhost_vsock_reset_orphans); + + vhost_vsock_stop(vsock); + vhost_vsock_flush(vsock); + vhost_dev_stop(&vsock->dev); + + spin_lock_bh(&vsock->send_pkt_list_lock); + while (!list_empty(&vsock->send_pkt_list)) { + struct virtio_vsock_pkt *pkt; + + pkt = list_first_entry(&vsock->send_pkt_list, + struct virtio_vsock_pkt, list); + list_del_init(&pkt->list); + virtio_transport_free_pkt(pkt); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + vhost_dev_cleanup(&vsock->dev, false); + kfree(vsock->dev.vqs); + vhost_vsock_free(vsock); + return 0; +} + +static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) +{ + struct vhost_vsock *other; + + /* Refuse reserved CIDs */ + if (guest_cid <= VMADDR_CID_HOST || + guest_cid == U32_MAX) + return -EINVAL; + + /* 64-bit CIDs are not yet supported */ + if (guest_cid > U32_MAX) + return -EINVAL; + + /* Refuse if CID is already in use */ + spin_lock_bh(&vhost_vsock_lock); + other = vhost_vsock_get(guest_cid); + if (other && other != vsock) { + spin_unlock_bh(&vhost_vsock_lock); + return -EADDRINUSE; + } + + if (vsock->guest_cid) + hash_del_rcu(&vsock->hash); + + vsock->guest_cid = guest_cid; + hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid); + spin_unlock_bh(&vhost_vsock_lock); + + return 0; +} + +static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) +{ + struct vhost_virtqueue *vq; + int i; + + if (features & ~VHOST_VSOCK_FEATURES) + return -EOPNOTSUPP; + + mutex_lock(&vsock->dev.mutex); + if ((features & (1 << VHOST_F_LOG_ALL)) && + !vhost_log_access_ok(&vsock->dev)) { + mutex_unlock(&vsock->dev.mutex); + return -EFAULT; + } + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + vq = &vsock->vqs[i]; + mutex_lock(&vq->mutex); + vq->acked_features = features; + mutex_unlock(&vq->mutex); + } + mutex_unlock(&vsock->dev.mutex); + return 0; +} + +static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + struct vhost_vsock *vsock = f->private_data; + void __user *argp = (void __user *)arg; + u64 guest_cid; + u64 features; + int start; + int r; + + switch (ioctl) { + case VHOST_VSOCK_SET_GUEST_CID: + if (copy_from_user(&guest_cid, argp, sizeof(guest_cid))) + return -EFAULT; + return vhost_vsock_set_cid(vsock, guest_cid); + case VHOST_VSOCK_SET_RUNNING: + if (copy_from_user(&start, argp, sizeof(start))) + return -EFAULT; + if (start) + return vhost_vsock_start(vsock); + else + return vhost_vsock_stop(vsock); + case VHOST_GET_FEATURES: + features = VHOST_VSOCK_FEATURES; + if (copy_to_user(argp, &features, sizeof(features))) + return -EFAULT; + return 0; + case VHOST_SET_FEATURES: + if (copy_from_user(&features, argp, sizeof(features))) + return -EFAULT; + return vhost_vsock_set_features(vsock, features); + default: + mutex_lock(&vsock->dev.mutex); + r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); + if (r == -ENOIOCTLCMD) + r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); + else + vhost_vsock_flush(vsock); + mutex_unlock(&vsock->dev.mutex); + return r; + } +} + +static const struct file_operations vhost_vsock_fops = { + .owner = THIS_MODULE, + .open = vhost_vsock_dev_open, + .release = vhost_vsock_dev_release, + .llseek = noop_llseek, + .unlocked_ioctl = vhost_vsock_dev_ioctl, +}; + +static struct miscdevice vhost_vsock_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "vhost-vsock", + .fops = &vhost_vsock_fops, +}; + +static struct virtio_transport vhost_transport = { + .transport = { + .get_local_cid = vhost_transport_get_local_cid, + + .init = virtio_transport_do_socket_init, + .destruct = virtio_transport_destruct, + .release = virtio_transport_release, + .connect = virtio_transport_connect, + .shutdown = virtio_transport_shutdown, + .cancel_pkt = vhost_transport_cancel_pkt, + + .dgram_enqueue = virtio_transport_dgram_enqueue, + .dgram_dequeue = virtio_transport_dgram_dequeue, + .dgram_bind = virtio_transport_dgram_bind, + .dgram_allow = virtio_transport_dgram_allow, + + .stream_enqueue = virtio_transport_stream_enqueue, + .stream_dequeue = virtio_transport_stream_dequeue, + .stream_has_data = virtio_transport_stream_has_data, + .stream_has_space = virtio_transport_stream_has_space, + .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, + .stream_is_active = virtio_transport_stream_is_active, + .stream_allow = virtio_transport_stream_allow, + + .notify_poll_in = virtio_transport_notify_poll_in, + .notify_poll_out = virtio_transport_notify_poll_out, + .notify_recv_init = virtio_transport_notify_recv_init, + .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, + .notify_send_init = virtio_transport_notify_send_init, + .notify_send_pre_block = virtio_transport_notify_send_pre_block, + .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, + + .set_buffer_size = virtio_transport_set_buffer_size, + .set_min_buffer_size = virtio_transport_set_min_buffer_size, + .set_max_buffer_size = virtio_transport_set_max_buffer_size, + .get_buffer_size = virtio_transport_get_buffer_size, + .get_min_buffer_size = virtio_transport_get_min_buffer_size, + .get_max_buffer_size = virtio_transport_get_max_buffer_size, + }, + + .send_pkt = vhost_transport_send_pkt, +}; + +static int __init vhost_vsock_init(void) +{ + int ret; + + ret = vsock_core_init(&vhost_transport.transport); + if (ret < 0) + return ret; + return misc_register(&vhost_vsock_misc); +}; + +static void __exit vhost_vsock_exit(void) +{ + misc_deregister(&vhost_vsock_misc); + vsock_core_exit(); +}; + +module_init(vhost_vsock_init); +module_exit(vhost_vsock_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Asias He"); +MODULE_DESCRIPTION("vhost transport for vsock "); diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index cab9f3f63a38..77590320d44c 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -60,7 +60,7 @@ config VIRTIO_INPUT config VIRTIO_MMIO tristate "Platform bus driver for memory mapped virtio devices" - depends on HAS_IOMEM + depends on HAS_IOMEM && HAS_DMA select VIRTIO ---help--- This drivers provides support for memory mapped virtio diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 7d4c7f35e5cf..f77358f08930 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -401,7 +401,7 @@ static int init_vqs(struct virtio_balloon *vb) { struct virtqueue *vqs[3]; vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; - const char *names[] = { "inflate", "deflate", "stats" }; + static const char * const names[] = { "inflate", "deflate", "stats" }; int err, nvqs; /* diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index c96944b59856..350a2a5a49db 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -170,7 +170,7 @@ static int virtinput_init_vqs(struct virtio_input *vi) struct virtqueue *vqs[2]; vq_callback_t *cbs[] = { virtinput_recv_events, virtinput_recv_status }; - static const char *names[] = { "events", "status" }; + static const char * const names[] = { "events", "status" }; int err; err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index f499d9da7237..745c6ee1bb3e 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -482,7 +482,7 @@ error_available: static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); unsigned int irq = platform_get_irq(vm_dev->pdev, 0); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 2046a68ad0ba..f6bed86c17f9 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -296,7 +296,7 @@ void vp_del_vqs(struct virtio_device *vdev) static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[], + const char * const names[], bool use_msix, bool per_vq_vectors) { @@ -376,7 +376,7 @@ error_find: int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { int err; diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index b976d968e793..2cc252270b2d 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -139,7 +139,7 @@ void vp_del_vqs(struct virtio_device *vdev); int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]); + const char * const names[]); const char *vp_bus_name(struct virtio_device *vdev); /* Setup the affinity for a virtqueue: diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 4469202eaa8e..631021cfc740 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -423,7 +423,7 @@ err_new_queue: static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a01a41a41269..761f28ffd40e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -24,6 +24,8 @@ #include <linux/module.h> #include <linux/hrtimer.h> #include <linux/kmemleak.h> +#include <linux/dma-mapping.h> +#include <xen/xen.h> #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ @@ -54,6 +56,11 @@ #define END_USE(vq) #endif +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + struct vring_virtqueue { struct virtqueue vq; @@ -98,12 +105,131 @@ struct vring_virtqueue { ktime_t last_add_time; #endif - /* Tokens for callbacks. */ - void *data[]; + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; }; #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +/* + * Modern virtio devices have feature bits to specify whether they need a + * quirk and bypass the IOMMU. If not there, just use the DMA API. + * + * If there, the interaction between virtio and DMA API is messy. + * + * On most systems with virtio, physical addresses match bus addresses, + * and it doesn't particularly matter whether we use the DMA API. + * + * On some systems, including Xen and any system with a physical device + * that speaks virtio behind a physical IOMMU, we must use the DMA API + * for virtio DMA to work at all. + * + * On other systems, including SPARC and PPC64, virtio-pci devices are + * enumerated as though they are behind an IOMMU, but the virtio host + * ignores the IOMMU, so we must either pretend that the IOMMU isn't + * there or somehow map everything as the identity. + * + * For the time being, we preserve historic behavior and bypass the DMA + * API. + * + * TODO: install a per-device DMA ops structure that does the right thing + * taking into account all the above quirks, and use the DMA API + * unconditionally on data path. + */ + +static bool vring_use_dma_api(struct virtio_device *vdev) +{ + if (!virtio_has_iommu_quirk(vdev)) + return true; + + /* Otherwise, we are left to guess. */ + /* + * In theory, it's possible to have a buggy QEMU-supposed + * emulated Q35 IOMMU and Xen enabled at the same time. On + * such a configuration, virtio has never worked and will + * not work without an even larger kludge. Instead, enable + * the DMA API if we're a Xen guest, which at least allows + * all of the sensible Xen configurations to work correctly. + */ + if (xen_domain()) + return true; + + return false; +} + +/* + * The DMA ops on various arches are rather gnarly right now, and + * making all of the arch DMA ops work on the vring device itself + * is a mess. For now, we use the parent device for DMA ops. + */ +struct device *vring_dma_dev(const struct vring_virtqueue *vq) +{ + return vq->vq.vdev->dev.parent; +} + +/* Map one sg entry. */ +static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, + struct scatterlist *sg, + enum dma_data_direction direction) +{ + if (!vring_use_dma_api(vq->vq.vdev)) + return (dma_addr_t)sg_phys(sg); + + /* + * We can't use dma_map_sg, because we don't use scatterlists in + * the way it expects (we don't guarantee that the scatterlist + * will exist for the lifetime of the mapping). + */ + return dma_map_page(vring_dma_dev(vq), + sg_page(sg), sg->offset, sg->length, + direction); +} + +static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, + void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + if (!vring_use_dma_api(vq->vq.vdev)) + return (dma_addr_t)virt_to_phys(cpu_addr); + + return dma_map_single(vring_dma_dev(vq), + cpu_addr, size, direction); +} + +static void vring_unmap_one(const struct vring_virtqueue *vq, + struct vring_desc *desc) +{ + u16 flags; + + if (!vring_use_dma_api(vq->vq.vdev)) + return; + + flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); + + if (flags & VRING_DESC_F_INDIRECT) { + dma_unmap_single(vring_dma_dev(vq), + virtio64_to_cpu(vq->vq.vdev, desc->addr), + virtio32_to_cpu(vq->vq.vdev, desc->len), + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } else { + dma_unmap_page(vring_dma_dev(vq), + virtio64_to_cpu(vq->vq.vdev, desc->addr), + virtio32_to_cpu(vq->vq.vdev, desc->len), + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } +} + +static int vring_mapping_error(const struct vring_virtqueue *vq, + dma_addr_t addr) +{ + if (!vring_use_dma_api(vq->vq.vdev)) + return 0; + + return dma_mapping_error(vring_dma_dev(vq), addr); +} + static struct vring_desc *alloc_indirect(struct virtqueue *_vq, unsigned int total_sg, gfp_t gfp) { @@ -137,7 +263,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, struct vring_virtqueue *vq = to_vvq(_vq); struct scatterlist *sg; struct vring_desc *desc; - unsigned int i, n, avail, descs_used, uninitialized_var(prev); + unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; int head; bool indirect; @@ -177,21 +303,15 @@ static inline int virtqueue_add(struct virtqueue *_vq, if (desc) { /* Use a single buffer which doesn't continue */ - vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT); - vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, virt_to_phys(desc)); - /* avoid kmemleak false positive (hidden by virt_to_phys) */ - kmemleak_ignore(desc); - vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc)); - + indirect = true; /* Set up rest to use this indirect table. */ i = 0; descs_used = 1; - indirect = true; } else { + indirect = false; desc = vq->vring.desc; i = head; descs_used = total_sg; - indirect = false; } if (vq->vq.num_free < descs_used) { @@ -208,13 +328,14 @@ static inline int virtqueue_add(struct virtqueue *_vq, return -ENOSPC; } - /* We're about to use some buffers from the free list. */ - vq->vq.num_free -= descs_used; - for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); + if (vring_mapping_error(vq, addr)) + goto unmap_release; + desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); - desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg)); + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); prev = i; i = virtio16_to_cpu(_vq->vdev, desc[i].next); @@ -222,8 +343,12 @@ static inline int virtqueue_add(struct virtqueue *_vq, } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); + if (vring_mapping_error(vq, addr)) + goto unmap_release; + desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); - desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg)); + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); prev = i; i = virtio16_to_cpu(_vq->vdev, desc[i].next); @@ -232,14 +357,33 @@ static inline int virtqueue_add(struct virtqueue *_vq, /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); + if (indirect) { + /* Now that the indirect table is filled in, map it. */ + dma_addr_t addr = vring_map_single( + vq, desc, total_sg * sizeof(struct vring_desc), + DMA_TO_DEVICE); + if (vring_mapping_error(vq, addr)) + goto unmap_release; + + vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT); + vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr); + + vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc)); + } + + /* We're using some buffers from the free list. */ + vq->vq.num_free -= descs_used; + /* Update free pointer */ if (indirect) vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next); else vq->free_head = i; - /* Set token. */ - vq->data[head] = data; + /* Store token and indirect buffer state. */ + vq->desc_state[head].data = data; + if (indirect) + vq->desc_state[head].indir_desc = desc; /* Put entry in available array (but don't update avail->idx until they * do sync). */ @@ -262,6 +406,24 @@ static inline int virtqueue_add(struct virtqueue *_vq, virtqueue_kick(_vq); return 0; + +unmap_release: + err_idx = i; + i = head; + + for (n = 0; n < total_sg; n++) { + if (i == err_idx) + break; + vring_unmap_one(vq, &desc[i]); + i = vq->vring.desc[i].next; + } + + vq->vq.num_free += total_sg; + + if (indirect) + kfree(desc); + + return -EIO; } /** @@ -432,27 +594,43 @@ EXPORT_SYMBOL_GPL(virtqueue_kick); static void detach_buf(struct vring_virtqueue *vq, unsigned int head) { - unsigned int i; + unsigned int i, j; + u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); /* Clear data ptr. */ - vq->data[head] = NULL; + vq->desc_state[head].data = NULL; - /* Put back on free list: find end */ + /* Put back on free list: unmap first-level descriptors and find end */ i = head; - /* Free the indirect table */ - if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)) - kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, vq->vring.desc[i].addr))); - - while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT)) { + while (vq->vring.desc[i].flags & nextflag) { + vring_unmap_one(vq, &vq->vring.desc[i]); i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next); vq->vq.num_free++; } + vring_unmap_one(vq, &vq->vring.desc[i]); vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); vq->free_head = head; + /* Plus final descriptor */ vq->vq.num_free++; + + /* Free the indirect table, if any, now that it's unmapped. */ + if (vq->desc_state[head].indir_desc) { + struct vring_desc *indir_desc = vq->desc_state[head].indir_desc; + u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len); + + BUG_ON(!(vq->vring.desc[head].flags & + cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); + BUG_ON(len == 0 || len % sizeof(struct vring_desc)); + + for (j = 0; j < len / sizeof(struct vring_desc); j++) + vring_unmap_one(vq, &indir_desc[j]); + + kfree(vq->desc_state[head].indir_desc); + vq->desc_state[head].indir_desc = NULL; + } } static inline bool more_used(const struct vring_virtqueue *vq) @@ -507,13 +685,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) BAD_RING(vq, "id %u out of range\n", i); return NULL; } - if (unlikely(!vq->data[i])) { + if (unlikely(!vq->desc_state[i].data)) { BAD_RING(vq, "id %u is not a head!\n", i); return NULL; } /* detach_buf clears data, so grab it now. */ - ret = vq->data[i]; + ret = vq->desc_state[i].data; detach_buf(vq, i); vq->last_used_idx++; /* If we expect an interrupt for the next entry, tell host @@ -687,10 +865,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) START_USE(vq); for (i = 0; i < vq->vring.num; i++) { - if (!vq->data[i]) + if (!vq->desc_state[i].data) continue; /* detach_buf clears data, so grab it now. */ - buf = vq->data[i]; + buf = vq->desc_state[i].data; detach_buf(vq, i); vq->avail_idx_shadow--; vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); @@ -744,7 +922,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, return NULL; } - vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); + vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state), + GFP_KERNEL); if (!vq) return NULL; @@ -779,11 +958,9 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, /* Put everything in free lists. */ vq->free_head = 0; - for (i = 0; i < num-1; i++) { + for (i = 0; i < num-1; i++) vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); - vq->data[i] = NULL; - } - vq->data[i] = NULL; + memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state)); return &vq->vq; } @@ -809,6 +986,8 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_F_VERSION_1: break; + case VIRTIO_F_IOMMU_PLATFORM: + break; default: /* We don't understand this bit. */ __virtio_clear_bit(vdev, i); |