diff options
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/events/events_2l.c | 31 | ||||
-rw-r--r-- | drivers/xen/events/events_base.c | 597 | ||||
-rw-r--r-- | drivers/xen/events/events_fifo.c | 107 | ||||
-rw-r--r-- | drivers/xen/events/events_internal.h | 42 | ||||
-rw-r--r-- | drivers/xen/evtchn.c | 7 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 33 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/pci_stub.c | 14 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/pciback.h | 12 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/pciback_ops.c | 48 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/vpci.c | 14 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/xenbus.c | 26 | ||||
-rw-r--r-- | drivers/xen/xen-scsiback.c | 27 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_client.c | 8 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 1 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.h | 2 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_backend.c | 7 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_xs.c | 38 |
17 files changed, 789 insertions, 225 deletions
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index 7dd46312c180..ca729f19061d 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -46,6 +46,11 @@ static unsigned evtchn_2l_max_channels(void) return EVTCHN_2L_NR_CHANNELS; } +static void evtchn_2l_remove(evtchn_port_t evtchn, unsigned int cpu) +{ + clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu))); +} + static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu) { clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu))); @@ -70,12 +75,6 @@ static bool evtchn_2l_is_pending(unsigned port) return sync_test_bit(port, BM(&s->evtchn_pending[0])); } -static bool evtchn_2l_test_and_set_mask(unsigned port) -{ - struct shared_info *s = HYPERVISOR_shared_info; - return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0])); -} - static void evtchn_2l_mask(unsigned port) { struct shared_info *s = HYPERVISOR_shared_info; @@ -90,6 +89,8 @@ static void evtchn_2l_unmask(unsigned port) BUG_ON(!irqs_disabled()); + smp_wmb(); /* All writes before unmask must be visible. */ + if (unlikely((cpu != cpu_from_evtchn(port)))) do_hypercall = 1; else { @@ -158,7 +159,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu, * a bitset of words which contain pending event bits. The second * level is a bitset of pending events themselves. */ -static void evtchn_2l_handle_events(unsigned cpu) +static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl) { int irq; xen_ulong_t pending_words; @@ -239,10 +240,7 @@ static void evtchn_2l_handle_events(unsigned cpu) /* Process port. */ port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx; - irq = get_evtchn_to_irq(port); - - if (irq != -1) - generic_handle_irq(irq); + handle_irq_for_port(port, ctrl); bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD; @@ -354,18 +352,27 @@ static void evtchn_2l_resume(void) EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD); } +static int evtchn_2l_percpu_deinit(unsigned int cpu) +{ + memset(per_cpu(cpu_evtchn_mask, cpu), 0, sizeof(xen_ulong_t) * + EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD); + + return 0; +} + static const struct evtchn_ops evtchn_ops_2l = { .max_channels = evtchn_2l_max_channels, .nr_channels = evtchn_2l_max_channels, + .remove = evtchn_2l_remove, .bind_to_cpu = evtchn_2l_bind_to_cpu, .clear_pending = evtchn_2l_clear_pending, .set_pending = evtchn_2l_set_pending, .is_pending = evtchn_2l_is_pending, - .test_and_set_mask = evtchn_2l_test_and_set_mask, .mask = evtchn_2l_mask, .unmask = evtchn_2l_unmask, .handle_events = evtchn_2l_handle_events, .resume = evtchn_2l_resume, + .percpu_deinit = evtchn_2l_percpu_deinit, }; void __init xen_evtchn_2l_init(void) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index e4dd991e2888..56bf952de411 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -32,6 +32,10 @@ #include <linux/slab.h> #include <linux/irqnr.h> #include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/cpu.h> +#include <linux/atomic.h> +#include <linux/ktime.h> #ifdef CONFIG_X86 #include <asm/desc.h> @@ -62,6 +66,15 @@ #include "events_internal.h" +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "xen." + +static uint __read_mostly event_loop_timeout = 2; +module_param(event_loop_timeout, uint, 0644); + +static uint __read_mostly event_eoi_delay = 10; +module_param(event_eoi_delay, uint, 0644); + const struct evtchn_ops *evtchn_ops; /* @@ -70,6 +83,25 @@ const struct evtchn_ops *evtchn_ops; */ static DEFINE_MUTEX(irq_mapping_update_lock); +/* + * Lock protecting event handling loop against removing event channels. + * Adding of event channels is no issue as the associated IRQ becomes active + * only after everything is setup (before request_[threaded_]irq() the handler + * can't be entered for an event, as the event channel will be unmasked only + * then). + */ +static DEFINE_RWLOCK(evtchn_rwlock); + +/* + * Lock hierarchy: + * + * irq_mapping_update_lock + * evtchn_rwlock + * IRQ-desc lock + * percpu eoi_list_lock + * irq_info->lock + */ + static LIST_HEAD(xen_irq_list_head); /* IRQ <-> VIRQ mapping. */ @@ -91,18 +123,23 @@ static bool (*pirq_needs_eoi)(unsigned irq); /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) != 0) +static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; + static struct irq_chip xen_dynamic_chip; +static struct irq_chip xen_lateeoi_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); static void disable_dynirq(struct irq_data *data); +static DEFINE_PER_CPU(unsigned int, irq_epoch); + static void clear_evtchn_to_irq_row(unsigned row) { unsigned col; for (col = 0; col < EVTCHN_PER_ROW; col++) - evtchn_to_irq[row][col] = -1; + WRITE_ONCE(evtchn_to_irq[row][col], -1); } static void clear_evtchn_to_irq_all(void) @@ -139,7 +176,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) clear_evtchn_to_irq_row(row); } - evtchn_to_irq[row][col] = irq; + WRITE_ONCE(evtchn_to_irq[row][col], irq); return 0; } @@ -149,13 +186,24 @@ int get_evtchn_to_irq(unsigned evtchn) return -1; if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) return -1; - return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]; + return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); } /* Get info for IRQ */ struct irq_info *info_for_irq(unsigned irq) { - return irq_get_chip_data(irq); + if (irq < nr_legacy_irqs()) + return legacy_info_ptrs[irq]; + else + return irq_get_chip_data(irq); +} + +static void set_info_for_irq(unsigned int irq, struct irq_info *info) +{ + if (irq < nr_legacy_irqs()) + legacy_info_ptrs[irq] = info; + else + irq_set_chip_data(irq, info); } /* Constructors for packed IRQ information. */ @@ -173,6 +221,8 @@ static int xen_irq_info_common_setup(struct irq_info *info, info->irq = irq; info->evtchn = evtchn; info->cpu = cpu; + info->mask_reason = EVT_MASK_REASON_EXPLICIT; + raw_spin_lock_init(&info->lock); ret = set_evtchn_to_irq(evtchn, irq); if (ret < 0) @@ -239,6 +289,7 @@ static int xen_irq_info_pirq_setup(unsigned irq, static void xen_irq_info_cleanup(struct irq_info *info) { set_evtchn_to_irq(info->evtchn, -1); + xen_evtchn_port_remove(info->evtchn, info->cpu); info->evtchn = 0; } @@ -247,10 +298,14 @@ static void xen_irq_info_cleanup(struct irq_info *info) */ unsigned int evtchn_from_irq(unsigned irq) { - if (unlikely(WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq))) + const struct irq_info *info = NULL; + + if (likely(irq < nr_irqs)) + info = info_for_irq(irq); + if (!info) return 0; - return info_for_irq(irq)->evtchn; + return info->evtchn; } unsigned irq_from_evtchn(unsigned int evtchn) @@ -315,6 +370,34 @@ unsigned int cpu_from_evtchn(unsigned int evtchn) return ret; } +static void do_mask(struct irq_info *info, u8 reason) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&info->lock, flags); + + if (!info->mask_reason) + mask_evtchn(info->evtchn); + + info->mask_reason |= reason; + + raw_spin_unlock_irqrestore(&info->lock, flags); +} + +static void do_unmask(struct irq_info *info, u8 reason) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&info->lock, flags); + + info->mask_reason &= ~reason; + + if (!info->mask_reason) + unmask_evtchn(info->evtchn); + + raw_spin_unlock_irqrestore(&info->lock, flags); +} + #ifdef CONFIG_X86 static bool pirq_check_eoi_map(unsigned irq) { @@ -369,9 +452,157 @@ void notify_remote_via_irq(int irq) } EXPORT_SYMBOL_GPL(notify_remote_via_irq); +struct lateeoi_work { + struct delayed_work delayed; + spinlock_t eoi_list_lock; + struct list_head eoi_list; +}; + +static DEFINE_PER_CPU(struct lateeoi_work, lateeoi); + +static void lateeoi_list_del(struct irq_info *info) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); + unsigned long flags; + + spin_lock_irqsave(&eoi->eoi_list_lock, flags); + list_del_init(&info->eoi_list); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); +} + +static void lateeoi_list_add(struct irq_info *info) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); + struct irq_info *elem; + u64 now = get_jiffies_64(); + unsigned long delay; + unsigned long flags; + + if (now < info->eoi_time) + delay = info->eoi_time - now; + else + delay = 1; + + spin_lock_irqsave(&eoi->eoi_list_lock, flags); + + if (list_empty(&eoi->eoi_list)) { + list_add(&info->eoi_list, &eoi->eoi_list); + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, delay); + } else { + list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) { + if (elem->eoi_time <= info->eoi_time) + break; + } + list_add(&info->eoi_list, &elem->eoi_list); + } + + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); +} + +static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) +{ + evtchn_port_t evtchn; + unsigned int cpu; + unsigned int delay = 0; + + evtchn = info->evtchn; + if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list)) + return; + + if (spurious) { + if ((1 << info->spurious_cnt) < (HZ << 2)) + info->spurious_cnt++; + if (info->spurious_cnt > 1) { + delay = 1 << (info->spurious_cnt - 2); + if (delay > HZ) + delay = HZ; + if (!info->eoi_time) + info->eoi_cpu = smp_processor_id(); + info->eoi_time = get_jiffies_64() + delay; + } + } else { + info->spurious_cnt = 0; + } + + cpu = info->eoi_cpu; + if (info->eoi_time && + (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) { + lateeoi_list_add(info); + return; + } + + info->eoi_time = 0; + do_unmask(info, EVT_MASK_REASON_EOI_PENDING); +} + +static void xen_irq_lateeoi_worker(struct work_struct *work) +{ + struct lateeoi_work *eoi; + struct irq_info *info; + u64 now = get_jiffies_64(); + unsigned long flags; + + eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); + + read_lock_irqsave(&evtchn_rwlock, flags); + + while (true) { + spin_lock(&eoi->eoi_list_lock); + + info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, + eoi_list); + + if (info == NULL || now < info->eoi_time) { + spin_unlock(&eoi->eoi_list_lock); + break; + } + + list_del_init(&info->eoi_list); + + spin_unlock(&eoi->eoi_list_lock); + + info->eoi_time = 0; + + xen_irq_lateeoi_locked(info, false); + } + + if (info) + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, info->eoi_time - now); + + read_unlock_irqrestore(&evtchn_rwlock, flags); +} + +static void xen_cpu_init_eoi(unsigned int cpu) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu); + + INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker); + spin_lock_init(&eoi->eoi_list_lock); + INIT_LIST_HEAD(&eoi->eoi_list); +} + +void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) +{ + struct irq_info *info; + unsigned long flags; + + read_lock_irqsave(&evtchn_rwlock, flags); + + info = info_for_irq(irq); + + if (info) + xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); + + read_unlock_irqrestore(&evtchn_rwlock, flags); +} +EXPORT_SYMBOL_GPL(xen_irq_lateeoi); + static void xen_irq_init(unsigned irq) { struct irq_info *info; + #ifdef CONFIG_SMP /* By default all event channels notify CPU#0. */ cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); @@ -384,8 +615,9 @@ static void xen_irq_init(unsigned irq) info->type = IRQT_UNBOUND; info->refcnt = -1; - irq_set_chip_data(irq, info); + set_info_for_irq(irq, info); + INIT_LIST_HEAD(&info->eoi_list); list_add_tail(&info->list, &xen_irq_list_head); } @@ -433,17 +665,25 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); + unsigned long flags; if (WARN_ON(!info)) return; + write_lock_irqsave(&evtchn_rwlock, flags); + + if (!list_empty(&info->eoi_list)) + lateeoi_list_del(info); + list_del(&info->list); - irq_set_chip_data(irq, NULL); + set_info_for_irq(irq, NULL); WARN_ON(info->refcnt > 0); + write_unlock_irqrestore(&evtchn_rwlock, flags); + kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ @@ -462,6 +702,12 @@ static void xen_evtchn_close(unsigned int port) BUG(); } +static void event_handler_exit(struct irq_info *info) +{ + smp_store_release(&info->is_active, 0); + clear_evtchn(info->evtchn); +} + static void pirq_query_unmask(int irq) { struct physdev_irq_status_query irq_status; @@ -480,7 +726,8 @@ static void pirq_query_unmask(int irq) static void eoi_pirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + int evtchn = info ? info->evtchn : 0; struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; int rc = 0; @@ -489,16 +736,15 @@ static void eoi_pirq(struct irq_data *data) if (unlikely(irqd_is_setaffinity_pending(data)) && likely(!irqd_irq_disabled(data))) { - int masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); - clear_evtchn(evtchn); + event_handler_exit(info); irq_move_masked_irq(data); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); } else - clear_evtchn(evtchn); + event_handler_exit(info); if (pirq_needs_eoi(data->irq)) { rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); @@ -549,7 +795,8 @@ static unsigned int __startup_pirq(unsigned int irq) goto err; out: - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_EXPLICIT); + eoi_pirq(irq_get_irq_data(irq)); return 0; @@ -576,7 +823,7 @@ static void shutdown_pirq(struct irq_data *data) if (!VALID_EVTCHN(evtchn)) return; - mask_evtchn(evtchn); + do_mask(info, EVT_MASK_REASON_EXPLICIT); xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); } @@ -610,7 +857,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi); static void __unbind_from_irq(unsigned int irq) { int evtchn = evtchn_from_irq(irq); - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); if (info->refcnt > 0) { info->refcnt--; @@ -835,7 +1082,7 @@ int xen_pirq_from_irq(unsigned irq) } EXPORT_SYMBOL_GPL(xen_pirq_from_irq); -int bind_evtchn_to_irq(unsigned int evtchn) +static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip) { int irq; int ret; @@ -852,7 +1099,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) if (irq < 0) goto out; - irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "event"); ret = xen_irq_info_evtchn_setup(irq, evtchn); @@ -873,8 +1120,19 @@ out: return irq; } + +int bind_evtchn_to_irq(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); + static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; @@ -916,8 +1174,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) return irq; } -int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port) +static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain, + evtchn_port_t remote_port, + struct irq_chip *chip) { struct evtchn_bind_interdomain bind_interdomain; int err; @@ -928,10 +1187,26 @@ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &bind_interdomain); - return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); + return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port, + chip); +} + +int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, + evtchn_port_t remote_port) +{ + return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + &xen_dynamic_chip); } EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq); +int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port) +{ + return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); + static int find_virq(unsigned int virq, unsigned int cpu) { struct evtchn_status status; @@ -1027,14 +1302,15 @@ static void unbind_from_irq(unsigned int irq) mutex_unlock(&irq_mapping_update_lock); } -int bind_evtchn_to_irqhandler(unsigned int evtchn, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, void *dev_id) +static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id, + struct irq_chip *chip) { int irq, retval; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_evtchn_to_irq_chip(evtchn, chip); if (irq < 0) return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); @@ -1045,18 +1321,38 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, return irq; } + +int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) +{ + return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, + devname, dev_id, + &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); -int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, - void *dev_id) +int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) +{ + return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, + devname, dev_id, + &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi); + +static int bind_interdomain_evtchn_to_irqhandler_chip( + unsigned int remote_domain, evtchn_port_t remote_port, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, struct irq_chip *chip) { int irq, retval; - irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); + irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + chip); if (irq < 0) return irq; @@ -1068,8 +1364,33 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, return irq; } + +int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, + remote_port, handler, irqflags, devname, + dev_id, &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); +int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, + remote_port, handler, irqflags, devname, + dev_id, &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi); + int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) @@ -1114,7 +1435,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, void unbind_from_irqhandler(unsigned int irq, void *dev_id) { - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); if (WARN_ON(!info)) return; @@ -1148,7 +1469,7 @@ int evtchn_make_refcounted(unsigned int evtchn) if (irq == -1) return -ENOENT; - info = irq_get_chip_data(irq); + info = info_for_irq(irq); if (!info) return -ENOENT; @@ -1176,13 +1497,13 @@ int evtchn_get(unsigned int evtchn) if (irq == -1) goto done; - info = irq_get_chip_data(irq); + info = info_for_irq(irq); if (!info) goto done; err = -EINVAL; - if (info->refcnt <= 0) + if (info->refcnt <= 0 || info->refcnt == SHRT_MAX) goto done; info->refcnt++; @@ -1220,6 +1541,56 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) notify_remote_via_irq(irq); } +struct evtchn_loop_ctrl { + ktime_t timeout; + unsigned count; + bool defer_eoi; +}; + +void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) +{ + int irq; + struct irq_info *info; + + irq = get_evtchn_to_irq(port); + if (irq == -1) + return; + + /* + * Check for timeout every 256 events. + * We are setting the timeout value only after the first 256 + * events in order to not hurt the common case of few loop + * iterations. The 256 is basically an arbitrary value. + * + * In case we are hitting the timeout we need to defer all further + * EOIs in order to ensure to leave the event handling loop rather + * sooner than later. + */ + if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) { + ktime_t kt = ktime_get(); + + if (!ctrl->timeout.tv64) { + kt = ktime_add_ms(kt, + jiffies_to_msecs(event_loop_timeout)); + ctrl->timeout = kt; + } else if (kt.tv64 > ctrl->timeout.tv64) { + ctrl->defer_eoi = true; + } + } + + info = info_for_irq(irq); + if (xchg_acquire(&info->is_active, 1)) + return; + + if (ctrl->defer_eoi) { + info->eoi_cpu = smp_processor_id(); + info->irq_epoch = __this_cpu_read(irq_epoch); + info->eoi_time = get_jiffies_64() + event_eoi_delay; + } + + generic_handle_irq(irq); +} + static DEFINE_PER_CPU(unsigned, xed_nesting_count); static void __xen_evtchn_do_upcall(void) @@ -1227,6 +1598,9 @@ static void __xen_evtchn_do_upcall(void) struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int cpu = get_cpu(); unsigned count; + struct evtchn_loop_ctrl ctrl = { 0 }; + + read_lock(&evtchn_rwlock); do { vcpu_info->evtchn_upcall_pending = 0; @@ -1234,7 +1608,7 @@ static void __xen_evtchn_do_upcall(void) if (__this_cpu_inc_return(xed_nesting_count) - 1) goto out; - xen_evtchn_handle_events(cpu); + xen_evtchn_handle_events(cpu, &ctrl); BUG_ON(!irqs_disabled()); @@ -1243,6 +1617,14 @@ static void __xen_evtchn_do_upcall(void) } while (count != 1 || vcpu_info->evtchn_upcall_pending); out: + read_unlock(&evtchn_rwlock); + + /* + * Increment irq_epoch only now to defer EOIs only for + * xen_irq_lateeoi() invocations occurring from inside the loop + * above. + */ + __this_cpu_inc(irq_epoch); put_cpu(); } @@ -1305,8 +1687,8 @@ void rebind_evtchn_irq(int evtchn, int irq) static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { struct evtchn_bind_vcpu bind_vcpu; - int evtchn = evtchn_from_irq(irq); - int masked; + struct irq_info *info = info_for_irq(irq); + int evtchn = info ? info->evtchn : 0; if (!VALID_EVTCHN(evtchn)) return -1; @@ -1322,7 +1704,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) * Mask the event while changing the VCPU binding to prevent * it being delivered on an unexpected VCPU. */ - masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); /* * If this fails, it usually just indicates that we're dealing with a @@ -1332,8 +1714,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) bind_evtchn_to_cpu(evtchn, tcpu); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); return 0; } @@ -1348,39 +1729,41 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, static void enable_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_EXPLICIT); } static void disable_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - mask_evtchn(evtchn); + do_mask(info, EVT_MASK_REASON_EXPLICIT); } static void ack_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (!VALID_EVTCHN(evtchn)) return; if (unlikely(irqd_is_setaffinity_pending(data)) && likely(!irqd_irq_disabled(data))) { - int masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); - clear_evtchn(evtchn); + event_handler_exit(info); irq_move_masked_irq(data); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); } else - clear_evtchn(evtchn); + event_handler_exit(info); } static void mask_ack_dynirq(struct irq_data *data) @@ -1389,18 +1772,39 @@ static void mask_ack_dynirq(struct irq_data *data) ack_dynirq(data); } +static void lateeoi_ack_dynirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; + + if (VALID_EVTCHN(evtchn)) { + do_mask(info, EVT_MASK_REASON_EOI_PENDING); + ack_dynirq(data); + } +} + +static void lateeoi_mask_ack_dynirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; + + if (VALID_EVTCHN(evtchn)) { + do_mask(info, EVT_MASK_REASON_EXPLICIT); + ack_dynirq(data); + } +} + static int retrigger_dynirq(struct irq_data *data) { - unsigned int evtchn = evtchn_from_irq(data->irq); - int masked; + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (!VALID_EVTCHN(evtchn)) return 0; - masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); set_evtchn(evtchn); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); return 1; } @@ -1495,10 +1899,11 @@ static void restore_cpu_ipis(unsigned int cpu) /* Clear an irq's pending state, in preparation for polling on it */ void xen_clear_irq_pending(int irq) { - int evtchn = evtchn_from_irq(irq); + struct irq_info *info = info_for_irq(irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - clear_evtchn(evtchn); + event_handler_exit(info); } EXPORT_SYMBOL(xen_clear_irq_pending); void xen_set_irq_pending(int irq) @@ -1599,6 +2004,21 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { .irq_retrigger = retrigger_dynirq, }; +static struct irq_chip xen_lateeoi_chip __read_mostly = { + /* The chip name needs to contain "xen-dyn" for irqbalance to work. */ + .name = "xen-dyn-lateeoi", + + .irq_disable = disable_dynirq, + .irq_mask = disable_dynirq, + .irq_unmask = enable_dynirq, + + .irq_ack = lateeoi_ack_dynirq, + .irq_mask_ack = lateeoi_mask_ack_dynirq, + + .irq_set_affinity = set_affinity_irq, + .irq_retrigger = retrigger_dynirq, +}; + static struct irq_chip xen_pirq_chip __read_mostly = { .name = "xen-pirq", @@ -1666,12 +2086,55 @@ void xen_callback_vector(void) void xen_callback_vector(void) {} #endif -#undef MODULE_PARAM_PREFIX -#define MODULE_PARAM_PREFIX "xen." - static bool fifo_events = true; module_param(fifo_events, bool, 0); +static int xen_evtchn_cpu_prepare(unsigned int cpu) +{ + int ret = 0; + + xen_cpu_init_eoi(cpu); + + if (evtchn_ops->percpu_init) + ret = evtchn_ops->percpu_init(cpu); + + return ret; +} + +static int xen_evtchn_cpu_dead(unsigned int cpu) +{ + int ret = 0; + + if (evtchn_ops->percpu_deinit) + ret = evtchn_ops->percpu_deinit(cpu); + + return ret; +} + +static int evtchn_cpu_notification(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + int ret = 0; + + switch (action) { + case CPU_UP_PREPARE: + ret = xen_evtchn_cpu_prepare(cpu); + break; + case CPU_DEAD: + ret = xen_evtchn_cpu_dead(cpu); + break; + default: + break; + } + + return ret < 0 ? NOTIFY_BAD : NOTIFY_OK; +} + +static struct notifier_block evtchn_cpu_notifier = { + .notifier_call = evtchn_cpu_notification, +}; + void __init xen_init_IRQ(void) { int ret = -EINVAL; @@ -1681,6 +2144,10 @@ void __init xen_init_IRQ(void) if (ret < 0) xen_evtchn_2l_init(); + xen_cpu_init_eoi(smp_processor_id()); + + register_cpu_notifier(&evtchn_cpu_notifier); + evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()), sizeof(*evtchn_to_irq), GFP_KERNEL); BUG_ON(!evtchn_to_irq); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 96a1b8da5371..5e6ff2120132 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -209,12 +209,6 @@ static bool evtchn_fifo_is_pending(unsigned port) return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word)); } -static bool evtchn_fifo_test_and_set_mask(unsigned port) -{ - event_word_t *word = event_word_from_port(port); - return sync_test_and_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); -} - static void evtchn_fifo_mask(unsigned port) { event_word_t *word = event_word_from_port(port); @@ -227,19 +221,25 @@ static bool evtchn_fifo_is_masked(unsigned port) return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); } /* - * Clear MASKED, spinning if BUSY is set. + * Clear MASKED if not PENDING, spinning if BUSY is set. + * Return true if mask was cleared. */ -static void clear_masked(volatile event_word_t *word) +static bool clear_masked_cond(volatile event_word_t *word) { event_word_t new, old, w; w = *word; do { + if (w & (1 << EVTCHN_FIFO_PENDING)) + return false; + old = w & ~(1 << EVTCHN_FIFO_BUSY); new = old & ~(1 << EVTCHN_FIFO_MASKED); w = sync_cmpxchg(word, old, new); } while (w != old); + + return true; } static void evtchn_fifo_unmask(unsigned port) @@ -248,8 +248,7 @@ static void evtchn_fifo_unmask(unsigned port) BUG_ON(!irqs_disabled()); - clear_masked(word); - if (evtchn_fifo_is_pending(port)) { + if (!clear_masked_cond(word)) { struct evtchn_unmask unmask = { .port = port }; (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); } @@ -270,19 +269,9 @@ static uint32_t clear_linked(volatile event_word_t *word) return w & EVTCHN_FIFO_LINK_MASK; } -static void handle_irq_for_port(unsigned port) -{ - int irq; - - irq = get_evtchn_to_irq(port); - if (irq != -1) - generic_handle_irq(irq); -} - -static void consume_one_event(unsigned cpu, +static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl, struct evtchn_fifo_control_block *control_block, - unsigned priority, unsigned long *ready, - bool drop) + unsigned priority, unsigned long *ready) { struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu); uint32_t head; @@ -315,16 +304,17 @@ static void consume_one_event(unsigned cpu, clear_bit(priority, ready); if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) { - if (unlikely(drop)) + if (unlikely(!ctrl)) pr_warn("Dropping pending event for port %u\n", port); else - handle_irq_for_port(port); + handle_irq_for_port(port, ctrl); } q->head[priority] = head; } -static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) +static void __evtchn_fifo_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { struct evtchn_fifo_control_block *control_block; unsigned long ready; @@ -336,14 +326,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) while (ready) { q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES); - consume_one_event(cpu, control_block, q, &ready, drop); + consume_one_event(cpu, ctrl, control_block, q, &ready); ready |= xchg(&control_block->ready, 0); } } -static void evtchn_fifo_handle_events(unsigned cpu) +static void evtchn_fifo_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { - __evtchn_fifo_handle_events(cpu, false); + __evtchn_fifo_handle_events(cpu, ctrl); } static void evtchn_fifo_resume(void) @@ -381,21 +372,6 @@ static void evtchn_fifo_resume(void) event_array_pages = 0; } -static const struct evtchn_ops evtchn_ops_fifo = { - .max_channels = evtchn_fifo_max_channels, - .nr_channels = evtchn_fifo_nr_channels, - .setup = evtchn_fifo_setup, - .bind_to_cpu = evtchn_fifo_bind_to_cpu, - .clear_pending = evtchn_fifo_clear_pending, - .set_pending = evtchn_fifo_set_pending, - .is_pending = evtchn_fifo_is_pending, - .test_and_set_mask = evtchn_fifo_test_and_set_mask, - .mask = evtchn_fifo_mask, - .unmask = evtchn_fifo_unmask, - .handle_events = evtchn_fifo_handle_events, - .resume = evtchn_fifo_resume, -}; - static int evtchn_fifo_alloc_control_block(unsigned cpu) { void *control_block = NULL; @@ -418,29 +394,33 @@ static int evtchn_fifo_alloc_control_block(unsigned cpu) return ret; } -static int evtchn_fifo_cpu_notification(struct notifier_block *self, - unsigned long action, - void *hcpu) +static int evtchn_fifo_percpu_init(unsigned int cpu) { - int cpu = (long)hcpu; - int ret = 0; - - switch (action) { - case CPU_UP_PREPARE: - if (!per_cpu(cpu_control_block, cpu)) - ret = evtchn_fifo_alloc_control_block(cpu); - break; - case CPU_DEAD: - __evtchn_fifo_handle_events(cpu, true); - break; - default: - break; - } - return ret < 0 ? NOTIFY_BAD : NOTIFY_OK; + if (!per_cpu(cpu_control_block, cpu)) + return evtchn_fifo_alloc_control_block(cpu); + return 0; } -static struct notifier_block evtchn_fifo_cpu_notifier = { - .notifier_call = evtchn_fifo_cpu_notification, +static int evtchn_fifo_percpu_deinit(unsigned int cpu) +{ + __evtchn_fifo_handle_events(cpu, NULL); + return 0; +} + +static const struct evtchn_ops evtchn_ops_fifo = { + .max_channels = evtchn_fifo_max_channels, + .nr_channels = evtchn_fifo_nr_channels, + .setup = evtchn_fifo_setup, + .bind_to_cpu = evtchn_fifo_bind_to_cpu, + .clear_pending = evtchn_fifo_clear_pending, + .set_pending = evtchn_fifo_set_pending, + .is_pending = evtchn_fifo_is_pending, + .mask = evtchn_fifo_mask, + .unmask = evtchn_fifo_unmask, + .handle_events = evtchn_fifo_handle_events, + .resume = evtchn_fifo_resume, + .percpu_init = evtchn_fifo_percpu_init, + .percpu_deinit = evtchn_fifo_percpu_deinit, }; int __init xen_evtchn_fifo_init(void) @@ -456,7 +436,6 @@ int __init xen_evtchn_fifo_init(void) evtchn_ops = &evtchn_ops_fifo; - register_cpu_notifier(&evtchn_fifo_cpu_notifier); out: put_cpu(); return ret; diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 50c2050a1e32..cc37b711491c 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -32,11 +32,22 @@ enum xen_irq_type { */ struct irq_info { struct list_head list; - int refcnt; - enum xen_irq_type type; /* type */ + struct list_head eoi_list; + short refcnt; + short spurious_cnt; + short type; /* type */ + u8 mask_reason; /* Why is event channel masked */ +#define EVT_MASK_REASON_EXPLICIT 0x01 +#define EVT_MASK_REASON_TEMPORARY 0x02 +#define EVT_MASK_REASON_EOI_PENDING 0x04 + u8 is_active; /* Is event just being handled? */ unsigned irq; unsigned int evtchn; /* event channel */ unsigned short cpu; /* cpu bound */ + unsigned short eoi_cpu; /* EOI must happen on this cpu */ + unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ + u64 eoi_time; /* Time in jiffies when to EOI. */ + raw_spinlock_t lock; union { unsigned short virq; @@ -55,28 +66,34 @@ struct irq_info { #define PIRQ_SHAREABLE (1 << 1) #define PIRQ_MSI_GROUP (1 << 2) +struct evtchn_loop_ctrl; + struct evtchn_ops { unsigned (*max_channels)(void); unsigned (*nr_channels)(void); int (*setup)(struct irq_info *info); + void (*remove)(evtchn_port_t port, unsigned int cpu); void (*bind_to_cpu)(struct irq_info *info, unsigned cpu); void (*clear_pending)(unsigned port); void (*set_pending)(unsigned port); bool (*is_pending)(unsigned port); - bool (*test_and_set_mask)(unsigned port); void (*mask)(unsigned port); void (*unmask)(unsigned port); - void (*handle_events)(unsigned cpu); + void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl); void (*resume)(void); + + int (*percpu_init)(unsigned int cpu); + int (*percpu_deinit)(unsigned int cpu); }; extern const struct evtchn_ops *evtchn_ops; extern int **evtchn_to_irq; int get_evtchn_to_irq(unsigned int evtchn); +void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl); struct irq_info *info_for_irq(unsigned irq); unsigned cpu_from_irq(unsigned irq); @@ -98,6 +115,13 @@ static inline int xen_evtchn_port_setup(struct irq_info *info) return 0; } +static inline void xen_evtchn_port_remove(evtchn_port_t evtchn, + unsigned int cpu) +{ + if (evtchn_ops->remove) + evtchn_ops->remove(evtchn, cpu); +} + static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info, unsigned cpu) { @@ -119,11 +143,6 @@ static inline bool test_evtchn(unsigned port) return evtchn_ops->is_pending(port); } -static inline bool test_and_set_mask(unsigned port) -{ - return evtchn_ops->test_and_set_mask(port); -} - static inline void mask_evtchn(unsigned port) { return evtchn_ops->mask(port); @@ -134,9 +153,10 @@ static inline void unmask_evtchn(unsigned port) return evtchn_ops->unmask(port); } -static inline void xen_evtchn_handle_events(unsigned cpu) +static inline void xen_evtchn_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { - return evtchn_ops->handle_events(cpu); + return evtchn_ops->handle_events(cpu, ctrl); } static inline void xen_evtchn_resume(void) diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index f4edd6df3df2..96c3007576b6 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -173,7 +173,6 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) "Interrupt for port %d, but apparently not enabled; per-user %p\n", evtchn->port, u); - disable_irq_nosync(irq); evtchn->enabled = false; spin_lock(&u->ring_prod_lock); @@ -299,7 +298,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, evtchn = find_evtchn(u, port); if (evtchn && !evtchn->enabled) { evtchn->enabled = true; - enable_irq(irq_from_evtchn(port)); + xen_irq_lateeoi(irq_from_evtchn(port), 0); } } @@ -399,8 +398,8 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) if (rc < 0) goto err; - rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, - u->name, evtchn); + rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0, + u->name, evtchn); if (rc < 0) goto err; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1865bcfa869b..6f077ae0cf31 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -293,36 +293,47 @@ static int map_grant_pages(struct grant_map *map) * to the kernel linear addresses of the struct pages. * These ptes are completely different from the user ptes dealt * with find_grant_ptes. + * Note that GNTMAP_device_map isn't needed here: The + * dev_bus_addr output field gets consumed only from ->map_ops, + * and by not requesting it when mapping we also avoid needing + * to mirror dev_bus_addr into ->unmap_ops (and holding an extra + * reference to the page in the hypervisor). */ + unsigned int flags = (map->flags & ~GNTMAP_device_map) | + GNTMAP_host_map; + for (i = 0; i < map->count; i++) { unsigned long address = (unsigned long) pfn_to_kaddr(page_to_pfn(map->pages[i])); BUG_ON(PageHighMem(map->pages[i])); - gnttab_set_map_op(&map->kmap_ops[i], address, - map->flags | GNTMAP_host_map, + gnttab_set_map_op(&map->kmap_ops[i], address, flags, map->grants[i].ref, map->grants[i].domid); gnttab_set_unmap_op(&map->kunmap_ops[i], address, - map->flags | GNTMAP_host_map, -1); + flags, -1); } } pr_debug("map %d+%d\n", map->index, map->count); err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, map->pages, map->count); - if (err) - return err; for (i = 0; i < map->count; i++) { - if (map->map_ops[i].status) { + if (map->map_ops[i].status == GNTST_okay) + map->unmap_ops[i].handle = map->map_ops[i].handle; + else if (!err) err = -EINVAL; - continue; - } - map->unmap_ops[i].handle = map->map_ops[i].handle; - if (use_ptemod) - map->kunmap_ops[i].handle = map->kmap_ops[i].handle; + if (map->flags & GNTMAP_device_map) + map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr; + + if (use_ptemod) { + if (map->kmap_ops[i].status == GNTST_okay) + map->kunmap_ops[i].handle = map->kmap_ops[i].handle; + else if (!err) + err = -EINVAL; + } } return err; } diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 47c6df53cabf..e21b82921c33 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -681,10 +681,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, wmb(); notify_remote_via_irq(pdev->evtchn_irq); + /* Enable IRQ to signal "request done". */ + xen_pcibk_lateeoi(pdev, 0); + ret = wait_event_timeout(xen_pcibk_aer_wait_queue, !(test_bit(_XEN_PCIB_active, (unsigned long *) &sh_info->flags)), 300*HZ); + /* Enable IRQ for pcifront request if not already active. */ + if (!test_bit(_PDEVF_op_active, &pdev->flags)) + xen_pcibk_lateeoi(pdev, 0); + if (!ret) { if (test_bit(_XEN_PCIB_active, (unsigned long *)&sh_info->flags)) { @@ -698,13 +705,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, } clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags); - if (test_bit(_XEN_PCIF_active, - (unsigned long *)&sh_info->flags)) { - dev_dbg(&psdev->dev->dev, - "schedule pci_conf service in " DRV_NAME "\n"); - xen_pcibk_test_and_schedule_op(psdev->pdev); - } - res = (pci_ers_result_t)aer_op->err; return res; } diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 4d529f3e40df..f44a425d1a5a 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -13,6 +13,7 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/atomic.h> +#include <xen/events.h> #include <xen/interface/io/pciif.h> #define DRV_NAME "xen-pciback" @@ -26,6 +27,8 @@ struct pci_dev_entry { #define PDEVF_op_active (1<<(_PDEVF_op_active)) #define _PCIB_op_pending (1) #define PCIB_op_pending (1<<(_PCIB_op_pending)) +#define _EOI_pending (2) +#define EOI_pending (1<<(_EOI_pending)) struct xen_pcibk_device { void *pci_dev_data; @@ -182,12 +185,17 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); void xen_pcibk_do_op(struct work_struct *data); +static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev, + unsigned int eoi_flag) +{ + if (test_and_clear_bit(_EOI_pending, &pdev->flags)) + xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag); +} + int xen_pcibk_xenbus_register(void); void xen_pcibk_xenbus_unregister(void); extern int verbose_request; - -void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev); #endif /* Handles shared IRQs that can to device domain and control domain. */ diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 49c5f0e9600a..232db7fcc523 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -296,26 +296,41 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, return 0; } #endif + +static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev) +{ + return test_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags) && + !test_and_set_bit(_PDEVF_op_active, &pdev->flags); +} + /* * Now the same evtchn is used for both pcifront conf_read_write request * as well as pcie aer front end ack. We use a new work_queue to schedule * xen_pcibk conf_read_write service for avoiding confict with aer_core * do_recovery job which also use the system default work_queue */ -void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) +static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) { + bool eoi = true; + /* Check that frontend is requesting an operation and that we are not * already processing a request */ - if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) - && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { + if (xen_pcibk_test_op_pending(pdev)) { queue_work(xen_pcibk_wq, &pdev->op_work); + eoi = false; } /*_XEN_PCIB_active should have been cleared by pcifront. And also make sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) && test_bit(_PCIB_op_pending, &pdev->flags)) { wake_up(&xen_pcibk_aer_wait_queue); + eoi = false; } + + /* EOI if there was nothing to do. */ + if (eoi) + xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS); } /* Performing the configuration space reads/writes must not be done in atomic @@ -323,10 +338,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) * use of semaphores). This function is intended to be called from a work * queue in process context taking a struct xen_pcibk_device as a parameter */ -void xen_pcibk_do_op(struct work_struct *data) +static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev) { - struct xen_pcibk_device *pdev = - container_of(data, struct xen_pcibk_device, op_work); struct pci_dev *dev; struct xen_pcibk_dev_data *dev_data = NULL; struct xen_pci_op *op = &pdev->op; @@ -399,16 +412,31 @@ void xen_pcibk_do_op(struct work_struct *data) smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ clear_bit(_PDEVF_op_active, &pdev->flags); smp_mb__after_atomic(); /* /before/ final check for work */ +} - /* Check to see if the driver domain tried to start another request in - * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. - */ - xen_pcibk_test_and_schedule_op(pdev); +void xen_pcibk_do_op(struct work_struct *data) +{ + struct xen_pcibk_device *pdev = + container_of(data, struct xen_pcibk_device, op_work); + + do { + xen_pcibk_do_one_op(pdev); + } while (xen_pcibk_test_op_pending(pdev)); + + xen_pcibk_lateeoi(pdev, 0); } irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) { struct xen_pcibk_device *pdev = dev_id; + bool eoi; + + /* IRQs might come in before pdev->evtchn_irq is written. */ + if (unlikely(pdev->evtchn_irq != irq)) + pdev->evtchn_irq = irq; + + eoi = test_and_set_bit(_EOI_pending, &pdev->flags); + WARN(eoi, "IRQ while EOI pending\n"); xen_pcibk_test_and_schedule_op(pdev); diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index c99f8bb1c56c..e6c7509a3d87 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -68,7 +68,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, int devid, publish_pci_dev_cb publish_cb) { - int err = 0, slot, func = -1; + int err = 0, slot, func = PCI_FUNC(dev->devfn); struct pci_dev_entry *t, *dev_entry; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; @@ -93,23 +93,26 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, /* * Keep multi-function devices together on the virtual PCI bus, except - * virtual functions. + * that we want to keep virtual functions at func 0 on their own. They + * aren't multi-function devices and hence their presence at func 0 + * may cause guests to not scan the other functions. */ - if (!dev->is_virtfn) { + if (!dev->is_virtfn || func) { for (slot = 0; slot < PCI_SLOT_MAX; slot++) { if (list_empty(&vpci_dev->dev_list[slot])) continue; t = list_entry(list_first(&vpci_dev->dev_list[slot]), struct pci_dev_entry, list); + if (t->dev->is_virtfn && !PCI_FUNC(t->dev->devfn)) + continue; if (match_slot(dev, t->dev)) { pr_info("vpci: %s: assign to virtual slot %d func %d\n", pci_name(dev), slot, - PCI_FUNC(dev->devfn)); + func); list_add_tail(&dev_entry->list, &vpci_dev->dev_list[slot]); - func = PCI_FUNC(dev->devfn); goto unlock; } } @@ -122,7 +125,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, pci_name(dev), slot); list_add_tail(&dev_entry->list, &vpci_dev->dev_list[slot]); - func = dev->is_virtfn ? 0 : PCI_FUNC(dev->devfn); goto unlock; } } diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 4843741e703a..c784a01aa8cb 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -124,7 +124,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, pdev->sh_info = vaddr; - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, 0, DRV_NAME, pdev); if (err < 0) { @@ -359,7 +359,8 @@ out: return err; } -static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) +static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev, + enum xenbus_state state) { int err = 0; int num_devs; @@ -373,9 +374,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); mutex_lock(&pdev->dev_lock); - /* Make sure we only reconfigure once */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != - XenbusStateReconfiguring) + if (xenbus_read_driver_state(pdev->xdev->nodename) != state) goto out; err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", @@ -502,6 +501,10 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) } } + if (state != XenbusStateReconfiguring) + /* Make sure we only reconfigure once. */ + goto out; + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured); if (err) { xenbus_dev_fatal(pdev->xdev, err, @@ -527,7 +530,7 @@ static void xen_pcibk_frontend_changed(struct xenbus_device *xdev, break; case XenbusStateReconfiguring: - xen_pcibk_reconfigure(pdev); + xen_pcibk_reconfigure(pdev, XenbusStateReconfiguring); break; case XenbusStateConnected: @@ -666,6 +669,15 @@ static void xen_pcibk_be_watch(struct xenbus_watch *watch, xen_pcibk_setup_backend(pdev); break; + case XenbusStateInitialised: + /* + * We typically move to Initialised when the first device was + * added. Hence subsequent devices getting added may need + * reconfiguring. + */ + xen_pcibk_reconfigure(pdev, XenbusStateInitialised); + break; + default: break; } @@ -691,7 +703,7 @@ static int xen_pcibk_xenbus_probe(struct xenbus_device *dev, /* watch the backend node for backend configuration information */ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, - xen_pcibk_be_watch); + NULL, xen_pcibk_be_watch); if (err) goto out; diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 51387d75c7bf..7fb5a2e7fa81 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -91,7 +91,6 @@ struct vscsibk_info { unsigned int irq; struct vscsiif_back_ring ring; - int ring_error; spinlock_t ring_lock; atomic_t nr_unreplied_reqs; @@ -416,12 +415,12 @@ static int scsiback_gnttab_data_map_batch(struct gnttab_map_grant_ref *map, return 0; err = gnttab_map_refs(map, NULL, pg, cnt); - BUG_ON(err); for (i = 0; i < cnt; i++) { if (unlikely(map[i].status != GNTST_okay)) { pr_err("invalid buffer -- could not remap it\n"); map[i].handle = SCSIBACK_INVALID_HANDLE; - err = -ENOMEM; + if (!err) + err = -ENOMEM; } else { get_page(pg[i]); } @@ -698,7 +697,8 @@ static int prepare_pending_reqs(struct vscsibk_info *info, return 0; } -static int scsiback_do_cmd_fn(struct vscsibk_info *info) +static int scsiback_do_cmd_fn(struct vscsibk_info *info, + unsigned int *eoi_flags) { struct vscsiif_back_ring *ring = &info->ring; struct vscsiif_request ring_req; @@ -715,11 +715,12 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) rc = ring->rsp_prod_pvt; pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n", info->domid, rp, rc, rp - rc); - info->ring_error = 1; - return 0; + return -EINVAL; } while ((rc != rp)) { + *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS; + if (RING_REQUEST_CONS_OVERFLOW(ring, rc)) break; pending_req = kmem_cache_alloc(scsiback_cachep, GFP_KERNEL); @@ -782,13 +783,16 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) static irqreturn_t scsiback_irq_fn(int irq, void *dev_id) { struct vscsibk_info *info = dev_id; + int rc; + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; - if (info->ring_error) - return IRQ_HANDLED; - - while (scsiback_do_cmd_fn(info)) + while ((rc = scsiback_do_cmd_fn(info, &eoi_flags)) > 0) cond_resched(); + /* In case of a ring error we keep the event channel masked. */ + if (!rc) + xen_irq_lateeoi(irq, eoi_flags); + return IRQ_HANDLED; } @@ -809,7 +813,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref, sring = (struct vscsiif_sring *)area; BACK_RING_INIT(&info->ring, sring, PAGE_SIZE); - err = bind_interdomain_evtchn_to_irq(info->domid, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn); if (err < 0) goto unmap_page; @@ -1210,7 +1214,6 @@ static int scsiback_probe(struct xenbus_device *dev, info->domid = dev->otherend_id; spin_lock_init(&info->ring_lock); - info->ring_error = 0; atomic_set(&info->nr_unreplied_reqs, 0); init_waitqueue_head(&info->waiting_to_free); info->dev = dev; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 266f446ba331..8bbd887ca422 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -114,18 +114,22 @@ EXPORT_SYMBOL_GPL(xenbus_strstate); */ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char **, unsigned int), void (*callback)(struct xenbus_watch *, const char **, unsigned int)) { int err; watch->node = path; + watch->will_handle = will_handle; watch->callback = callback; err = register_xenbus_watch(watch); if (err) { watch->node = NULL; + watch->will_handle = NULL; watch->callback = NULL; xenbus_dev_fatal(dev, err, "adding watch on %s", path); } @@ -152,6 +156,8 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path); */ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char **, unsigned int), void (*callback)(struct xenbus_watch *, const char **, unsigned int), const char *pathfmt, ...) @@ -168,7 +174,7 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); return -ENOMEM; } - err = xenbus_watch_path(dev, path, watch, callback); + err = xenbus_watch_path(dev, path, watch, will_handle, callback); if (err) kfree(path); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index c2d447687e33..ba7590d75985 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -137,6 +137,7 @@ static int watch_otherend(struct xenbus_device *dev) container_of(dev->dev.bus, struct xen_bus_type, bus); return xenbus_watch_pathfmt(dev, &dev->otherend_watch, + bus->otherend_will_handle, bus->otherend_changed, "%s/%s", dev->otherend, "state"); } diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index c9ec7ca1f7ab..2c394c6ba605 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -42,6 +42,8 @@ struct xen_bus_type { int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); int (*probe)(struct xen_bus_type *bus, const char *type, const char *dir); + bool (*otherend_will_handle)(struct xenbus_watch *watch, + const char **vec, unsigned int len); void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, unsigned int len); struct bus_type bus; diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 04f7f85a5edf..597c0b038454 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -181,6 +181,12 @@ static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, return err; } +static bool frontend_will_handle(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + return watch->nr_pending == 0; +} + static void frontend_changed(struct xenbus_watch *watch, const char **vec, unsigned int len) { @@ -192,6 +198,7 @@ static struct xen_bus_type xenbus_backend = { .levels = 3, /* backend/type/<frontend>/<id> */ .get_bus_id = backend_bus_id, .probe = xenbus_probe_backend, + .otherend_will_handle = frontend_will_handle, .otherend_changed = frontend_changed, .bus = { .name = "xen-backend", diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index ce65591b4168..d98d88fae58a 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -701,6 +701,8 @@ int register_xenbus_watch(struct xenbus_watch *watch) sprintf(token, "%lX", (long)watch); + watch->nr_pending = 0; + down_read(&xs_state.watch_mutex); spin_lock(&watches_lock); @@ -750,12 +752,15 @@ void unregister_xenbus_watch(struct xenbus_watch *watch) /* Cancel pending watch events. */ spin_lock(&watch_events_lock); - list_for_each_entry_safe(msg, tmp, &watch_events, list) { - if (msg->u.watch.handle != watch) - continue; - list_del(&msg->list); - kfree(msg->u.watch.vec); - kfree(msg); + if (watch->nr_pending) { + list_for_each_entry_safe(msg, tmp, &watch_events, list) { + if (msg->u.watch.handle != watch) + continue; + list_del(&msg->list); + kfree(msg->u.watch.vec); + kfree(msg); + } + watch->nr_pending = 0; } spin_unlock(&watch_events_lock); @@ -802,7 +807,6 @@ void xs_suspend_cancel(void) static int xenwatch_thread(void *unused) { - struct list_head *ent; struct xs_stored_msg *msg; for (;;) { @@ -815,13 +819,15 @@ static int xenwatch_thread(void *unused) mutex_lock(&xenwatch_mutex); spin_lock(&watch_events_lock); - ent = watch_events.next; - if (ent != &watch_events) - list_del(ent); + msg = list_first_entry_or_null(&watch_events, + struct xs_stored_msg, list); + if (msg) { + list_del(&msg->list); + msg->u.watch.handle->nr_pending--; + } spin_unlock(&watch_events_lock); - if (ent != &watch_events) { - msg = list_entry(ent, struct xs_stored_msg, list); + if (msg) { msg->u.watch.handle->callback( msg->u.watch.handle, (const char **)msg->u.watch.vec, @@ -903,9 +909,15 @@ static int process_msg(void) spin_lock(&watches_lock); msg->u.watch.handle = find_watch( msg->u.watch.vec[XS_WATCH_TOKEN]); - if (msg->u.watch.handle != NULL) { + if (msg->u.watch.handle != NULL && + (!msg->u.watch.handle->will_handle || + msg->u.watch.handle->will_handle( + msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size))) { spin_lock(&watch_events_lock); list_add_tail(&msg->list, &watch_events); + msg->u.watch.handle->nr_pending++; wake_up(&watch_events_waitq); spin_unlock(&watch_events_lock); } else { |