From b4b7ff4b08e691656c9d77c758fc355833128ac0 Mon Sep 17 00:00:00 2001 From: André Fabian Silva Delgado Date: Wed, 20 Jan 2016 14:01:31 -0300 Subject: Linux-libre 4.4-gnu --- drivers/xen/Makefile | 2 - drivers/xen/balloon.c | 341 ++++++++++++++++++++++------------ drivers/xen/biomerge.c | 8 + drivers/xen/cpu_hotplug.c | 14 +- drivers/xen/events/events_base.c | 2 +- drivers/xen/events/events_fifo.c | 25 ++- drivers/xen/evtchn.c | 123 ++++++++++-- drivers/xen/gntdev.c | 2 +- drivers/xen/grant-table.c | 56 +++++- drivers/xen/privcmd.c | 10 +- drivers/xen/swiotlb-xen.c | 43 +++-- drivers/xen/xen-pciback/pciback.h | 1 + drivers/xen/xen-pciback/pciback_ops.c | 75 ++++++-- drivers/xen/xen-pciback/xenbus.c | 4 +- drivers/xen/xen-scsiback.c | 34 ++-- drivers/xen/xenbus/xenbus_client.c | 128 +++++++++---- drivers/xen/xenbus/xenbus_probe.c | 3 +- drivers/xen/xlate_mmu.c | 124 +++++++++---- 18 files changed, 704 insertions(+), 291 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index e293bc507..aa8a7f71f 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,6 +1,4 @@ -ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),) obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o -endif obj-$(CONFIG_X86) += fallback.o obj-y += grant-table.o features.o balloon.o manage.o preempt.o obj-y += events/ diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index c79329fcf..12eab503e 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -54,6 +54,8 @@ #include #include #include +#include +#include #include #include @@ -70,16 +72,64 @@ #include #include +static int xen_hotplug_unpopulated; + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + +static int zero; +static int one = 1; + +static struct ctl_table balloon_table[] = { + { + .procname = "hotplug_unpopulated", + .data = &xen_hotplug_unpopulated, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { } +}; + +static struct ctl_table balloon_root[] = { + { + .procname = "balloon", + .mode = 0555, + .child = balloon_table, + }, + { } +}; + +static struct ctl_table xen_root[] = { + { + .procname = "xen", + .mode = 0555, + .child = balloon_root, + }, + { } +}; + +#endif + +/* + * Use one extent per PAGE_SIZE to avoid to break down the page into + * multiple frame. + */ +#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1) + /* * balloon_process() state: * * BP_DONE: done or nothing to do, + * BP_WAIT: wait to be rescheduled, * BP_EAGAIN: error, go to sleep, * BP_ECANCELED: error, balloon operation canceled. */ enum bp_state { BP_DONE, + BP_WAIT, BP_EAGAIN, BP_ECANCELED }; @@ -91,11 +141,12 @@ struct balloon_stats balloon_stats; EXPORT_SYMBOL_GPL(balloon_stats); /* We increase/decrease in batches which fit in a page */ -static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; +static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)]; /* List of ballooned pages, threaded through the mem_map array. */ static LIST_HEAD(ballooned_pages); +static DECLARE_WAIT_QUEUE_HEAD(balloon_wq); /* Main work function, always executed in process context. */ static void balloon_process(struct work_struct *work); @@ -124,6 +175,7 @@ static void __balloon_append(struct page *page) list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; } + wake_up(&balloon_wq); } static void balloon_append(struct page *page) @@ -133,17 +185,16 @@ static void balloon_append(struct page *page) } /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ -static struct page *balloon_retrieve(bool prefer_highmem) +static struct page *balloon_retrieve(bool require_lowmem) { struct page *page; if (list_empty(&ballooned_pages)) return NULL; - if (prefer_highmem) - page = list_entry(ballooned_pages.prev, struct page, lru); - else - page = list_entry(ballooned_pages.next, struct page, lru); + page = list_entry(ballooned_pages.next, struct page, lru); + if (require_lowmem && PageHighMem(page)) + return NULL; list_del(&page->lru); if (PageHighMem(page)) @@ -166,6 +217,9 @@ static struct page *balloon_next_page(struct page *page) static enum bp_state update_schedule(enum bp_state state) { + if (state == BP_WAIT) + return BP_WAIT; + if (state == BP_ECANCELED) return BP_ECANCELED; @@ -193,43 +247,75 @@ static enum bp_state update_schedule(enum bp_state state) } #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -static long current_credit(void) +static struct resource *additional_memory_resource(phys_addr_t size) { - return balloon_stats.target_pages - balloon_stats.current_pages - - balloon_stats.hotplug_pages; + struct resource *res; + int ret; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return NULL; + + res->name = "System RAM"; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + ret = allocate_resource(&iomem_resource, res, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new System RAM resource\n"); + kfree(res); + return NULL; + } + + return res; } -static bool balloon_is_inflated(void) +static void release_memory_resource(struct resource *resource) { - if (balloon_stats.balloon_low || balloon_stats.balloon_high || - balloon_stats.balloon_hotplug) - return true; - else - return false; -} + if (!resource) + return; -/* - * reserve_additional_memory() adds memory region of size >= credit above - * max_pfn. New region is section aligned and size is modified to be multiple - * of section size. Those features allow optimal use of address space and - * establish proper alignment when this function is called first time after - * boot (last section not fully populated at boot time contains unused memory - * pages with PG_reserved bit not set; online_pages_range() does not allow page - * onlining in whole range if first onlined page does not have PG_reserved - * bit set). Real size of added memory is established at page onlining stage. - */ + /* + * No need to reset region to identity mapped since we now + * know that no I/O can be in this region + */ + release_resource(resource); + kfree(resource); +} -static enum bp_state reserve_additional_memory(long credit) +static enum bp_state reserve_additional_memory(void) { + long credit; + struct resource *resource; int nid, rc; - u64 hotplug_start_paddr; - unsigned long balloon_hotplug = credit; + unsigned long balloon_hotplug; + + credit = balloon_stats.target_pages + balloon_stats.target_unpopulated + - balloon_stats.total_pages; + + /* + * Already hotplugged enough pages? Wait for them to be + * onlined. + */ + if (credit <= 0) + return BP_WAIT; - hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn)); - balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION); - nid = memory_add_physaddr_to_nid(hotplug_start_paddr); + balloon_hotplug = round_up(credit, PAGES_PER_SECTION); + + resource = additional_memory_resource(balloon_hotplug * PAGE_SIZE); + if (!resource) + goto err; + + nid = memory_add_physaddr_to_nid(resource->start); #ifdef CONFIG_XEN_HAVE_PVMMU + /* + * We don't support PV MMU when Linux and Xen is using + * different page granularity. + */ + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); + /* * add_memory() will build page tables for the new memory so * the p2m must contain invalid entries so the correct @@ -242,29 +328,28 @@ static enum bp_state reserve_additional_memory(long credit) if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long pfn, i; - pfn = PFN_DOWN(hotplug_start_paddr); + pfn = PFN_DOWN(resource->start); for (i = 0; i < balloon_hotplug; i++) { if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) { pr_warn("set_phys_to_machine() failed, no memory added\n"); - return BP_ECANCELED; + goto err; } } } #endif - rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT); - + rc = add_memory_resource(nid, resource); if (rc) { pr_warn("Cannot add additional memory (%i)\n", rc); - return BP_ECANCELED; + goto err; } - balloon_hotplug -= credit; + balloon_stats.total_pages += balloon_hotplug; - balloon_stats.hotplug_pages += credit; - balloon_stats.balloon_hotplug = balloon_hotplug; - - return BP_DONE; + return BP_WAIT; + err: + release_memory_resource(resource); + return BP_ECANCELED; } static void xen_online_page(struct page *page) @@ -275,11 +360,6 @@ static void xen_online_page(struct page *page) __balloon_append(page); - if (balloon_stats.hotplug_pages) - --balloon_stats.hotplug_pages; - else - --balloon_stats.balloon_hotplug; - mutex_unlock(&balloon_mutex); } @@ -296,53 +376,34 @@ static struct notifier_block xen_memory_nb = { .priority = 0 }; #else -static long current_credit(void) +static enum bp_state reserve_additional_memory(void) { - unsigned long target = balloon_stats.target_pages; - - target = min(target, - balloon_stats.current_pages + - balloon_stats.balloon_low + - balloon_stats.balloon_high); - - return target - balloon_stats.current_pages; + balloon_stats.target_pages = balloon_stats.current_pages; + return BP_ECANCELED; } +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ -static bool balloon_is_inflated(void) +static long current_credit(void) { - if (balloon_stats.balloon_low || balloon_stats.balloon_high) - return true; - else - return false; + return balloon_stats.target_pages - balloon_stats.current_pages; } -static enum bp_state reserve_additional_memory(long credit) +static bool balloon_is_inflated(void) { - balloon_stats.target_pages = balloon_stats.current_pages; - return BP_DONE; + return balloon_stats.balloon_low || balloon_stats.balloon_high; } -#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ static enum bp_state increase_reservation(unsigned long nr_pages) { int rc; - unsigned long pfn, i; + unsigned long i; struct page *page; struct xen_memory_reservation reservation = { .address_bits = 0, - .extent_order = 0, + .extent_order = EXTENT_ORDER, .domid = DOMID_SELF }; -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG - if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) { - nr_pages = min(nr_pages, balloon_stats.balloon_hotplug); - balloon_stats.hotplug_pages += nr_pages; - balloon_stats.balloon_hotplug -= nr_pages; - return BP_DONE; - } -#endif - if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); @@ -352,7 +413,11 @@ static enum bp_state increase_reservation(unsigned long nr_pages) nr_pages = i; break; } - frame_list[i] = page_to_pfn(page); + + /* XENMEM_populate_physmap requires a PFN based on Xen + * granularity. + */ + frame_list[i] = page_to_xen_pfn(page); page = balloon_next_page(page); } @@ -366,10 +431,16 @@ static enum bp_state increase_reservation(unsigned long nr_pages) page = balloon_retrieve(false); BUG_ON(page == NULL); - pfn = page_to_pfn(page); - #ifdef CONFIG_XEN_HAVE_PVMMU + /* + * We don't support PV MMU when Linux and Xen is using + * different page granularity. + */ + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long pfn = page_to_pfn(page); + set_phys_to_machine(pfn, frame_list[i]); /* Link back into the page tables if not highmem. */ @@ -396,23 +467,15 @@ static enum bp_state increase_reservation(unsigned long nr_pages) static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) { enum bp_state state = BP_DONE; - unsigned long pfn, i; - struct page *page; + unsigned long i; + struct page *page, *tmp; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, - .extent_order = 0, + .extent_order = EXTENT_ORDER, .domid = DOMID_SELF }; - -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG - if (balloon_stats.hotplug_pages) { - nr_pages = min(nr_pages, balloon_stats.hotplug_pages); - balloon_stats.hotplug_pages -= nr_pages; - balloon_stats.balloon_hotplug += nr_pages; - return BP_DONE; - } -#endif + LIST_HEAD(pages); if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); @@ -425,8 +488,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) break; } scrub_page(page); - - frame_list[i] = page_to_pfn(page); + list_add(&page->lru, &pages); } /* @@ -438,14 +500,25 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) */ kmap_flush_unused(); - /* Update direct mapping, invalidate P2M, and add to balloon. */ - for (i = 0; i < nr_pages; i++) { - pfn = frame_list[i]; - frame_list[i] = pfn_to_gfn(pfn); - page = pfn_to_page(pfn); + /* + * Setup the frame, update direct mapping, invalidate P2M, + * and add to balloon. + */ + i = 0; + list_for_each_entry_safe(page, tmp, &pages, lru) { + /* XENMEM_decrease_reservation requires a GFN */ + frame_list[i++] = xen_page_to_gfn(page); #ifdef CONFIG_XEN_HAVE_PVMMU + /* + * We don't support PV MMU when Linux and Xen is using + * different page granularity. + */ + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long pfn = page_to_pfn(page); + if (!PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), @@ -455,6 +528,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } #endif + list_del(&page->lru); balloon_append(page); } @@ -492,7 +566,7 @@ static void balloon_process(struct work_struct *work) if (balloon_is_inflated()) state = increase_reservation(credit); else - state = reserve_additional_memory(credit); + state = reserve_additional_memory(); } if (credit < 0) @@ -520,41 +594,71 @@ void balloon_set_new_target(unsigned long target) } EXPORT_SYMBOL_GPL(balloon_set_new_target); +static int add_ballooned_pages(int nr_pages) +{ + enum bp_state st; + + if (xen_hotplug_unpopulated) { + st = reserve_additional_memory(); + if (st != BP_ECANCELED) { + mutex_unlock(&balloon_mutex); + wait_event(balloon_wq, + !list_empty(&ballooned_pages)); + mutex_lock(&balloon_mutex); + return 0; + } + } + + st = decrease_reservation(nr_pages, GFP_USER); + if (st != BP_DONE) + return -ENOMEM; + + return 0; +} + /** * alloc_xenballooned_pages - get pages that have been ballooned out * @nr_pages: Number of pages to get * @pages: pages returned - * @highmem: allow highmem pages * @return 0 on success, error otherwise */ -int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) +int alloc_xenballooned_pages(int nr_pages, struct page **pages) { int pgno = 0; struct page *page; + int ret; + mutex_lock(&balloon_mutex); + + balloon_stats.target_unpopulated += nr_pages; + while (pgno < nr_pages) { - page = balloon_retrieve(highmem); - if (page && (highmem || !PageHighMem(page))) { + page = balloon_retrieve(true); + if (page) { pages[pgno++] = page; +#ifdef CONFIG_XEN_HAVE_PVMMU + /* + * We don't support PV MMU when Linux and Xen is using + * different page granularity. + */ + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); + + ret = xen_alloc_p2m_entry(page_to_pfn(page)); + if (ret < 0) + goto out_undo; +#endif } else { - enum bp_state st; - if (page) - balloon_append(page); - st = decrease_reservation(nr_pages - pgno, - highmem ? GFP_HIGHUSER : GFP_USER); - if (st != BP_DONE) + ret = add_ballooned_pages(nr_pages - pgno); + if (ret < 0) goto out_undo; } } mutex_unlock(&balloon_mutex); return 0; out_undo: - while (pgno) - balloon_append(pages[--pgno]); - /* Free the memory back to the kernel soon */ - schedule_delayed_work(&balloon_worker, 0); mutex_unlock(&balloon_mutex); - return -ENOMEM; + free_xenballooned_pages(pgno, pages); + return ret; } EXPORT_SYMBOL(alloc_xenballooned_pages); @@ -574,6 +678,8 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) balloon_append(pages[i]); } + balloon_stats.target_unpopulated -= nr_pages; + /* The balloon may be too large now. Shrink it if needed. */ if (current_credit()) schedule_delayed_work(&balloon_worker, 0); @@ -602,6 +708,8 @@ static void __init balloon_add_region(unsigned long start_pfn, don't subtract from it. */ __balloon_append(page); } + + balloon_stats.total_pages += extra_pfn_end - start_pfn; } static int __init balloon_init(void) @@ -619,6 +727,7 @@ static int __init balloon_init(void) balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; + balloon_stats.total_pages = balloon_stats.current_pages; balloon_stats.schedule_delay = 1; balloon_stats.max_schedule_delay = 32; @@ -626,11 +735,9 @@ static int __init balloon_init(void) balloon_stats.max_retry_count = RETRY_UNLIMITED; #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG - balloon_stats.hotplug_pages = 0; - balloon_stats.balloon_hotplug = 0; - set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); + register_sysctl_table(xen_root); #endif /* diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c index 8ae2fc90e..4da69dbf7 100644 --- a/drivers/xen/biomerge.c +++ b/drivers/xen/biomerge.c @@ -6,10 +6,18 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, const struct bio_vec *vec2) { +#if XEN_PAGE_SIZE == PAGE_SIZE unsigned long bfn1 = pfn_to_bfn(page_to_pfn(vec1->bv_page)); unsigned long bfn2 = pfn_to_bfn(page_to_pfn(vec2->bv_page)); return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) && ((bfn1 == bfn2) || ((bfn1+1) == bfn2)); +#else + /* + * XXX: Add support for merging bio_vec when using different page + * size in Xen and Linux. + */ + return 0; +#endif } EXPORT_SYMBOL(xen_biovec_phys_mergeable); diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index cc6513a17..5676aefdf 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -11,15 +11,20 @@ static void enable_hotplug_cpu(int cpu) { if (!cpu_present(cpu)) - arch_register_cpu(cpu); + xen_arch_register_cpu(cpu); set_cpu_present(cpu, true); } static void disable_hotplug_cpu(int cpu) { + if (cpu_online(cpu)) { + lock_device_hotplug(); + device_offline(get_cpu_device(cpu)); + unlock_device_hotplug(); + } if (cpu_present(cpu)) - arch_unregister_cpu(cpu); + xen_arch_unregister_cpu(cpu); set_cpu_present(cpu, false); } @@ -55,7 +60,6 @@ static void vcpu_hotplug(unsigned int cpu) enable_hotplug_cpu(cpu); break; case 0: - (void)cpu_down(cpu); disable_hotplug_cpu(cpu); break; default: @@ -102,7 +106,11 @@ static int __init setup_vcpu_hotplug_event(void) static struct notifier_block xsn_cpu = { .notifier_call = setup_cpu_watcher }; +#ifdef CONFIG_X86 if (!xen_pv_domain()) +#else + if (!xen_domain()) +#endif return -ENODEV; register_xenstore_notifier(&xsn_cpu); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index fb2362399..524c22146 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -41,11 +41,11 @@ #include #include #include -#include #endif #include #include #include +#include #include #include diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 1d4baf56c..96a1b8da5 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -54,7 +54,7 @@ #include "events_internal.h" -#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t)) +#define EVENT_WORDS_PER_PAGE (XEN_PAGE_SIZE / sizeof(event_word_t)) #define MAX_EVENT_ARRAY_PAGES (EVTCHN_FIFO_NR_CHANNELS / EVENT_WORDS_PER_PAGE) struct evtchn_fifo_queue { @@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port) static void consume_one_event(unsigned cpu, struct evtchn_fifo_control_block *control_block, - unsigned priority, unsigned long *ready) + unsigned priority, unsigned long *ready, + bool drop) { struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu); uint32_t head; @@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu, if (head == 0) clear_bit(priority, ready); - if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) - handle_irq_for_port(port); + if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) { + if (unlikely(drop)) + pr_warn("Dropping pending event for port %u\n", port); + else + handle_irq_for_port(port); + } q->head[priority] = head; } -static void evtchn_fifo_handle_events(unsigned cpu) +static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) { struct evtchn_fifo_control_block *control_block; unsigned long ready; @@ -331,11 +336,16 @@ static void evtchn_fifo_handle_events(unsigned cpu) while (ready) { q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES); - consume_one_event(cpu, control_block, q, &ready); + consume_one_event(cpu, control_block, q, &ready, drop); ready |= xchg(&control_block->ready, 0); } } +static void evtchn_fifo_handle_events(unsigned cpu) +{ + __evtchn_fifo_handle_events(cpu, false); +} + static void evtchn_fifo_resume(void) { unsigned cpu; @@ -420,6 +430,9 @@ static int evtchn_fifo_cpu_notification(struct notifier_block *self, if (!per_cpu(cpu_control_block, cpu)) ret = evtchn_fifo_alloc_control_block(cpu); break; + case CPU_DEAD: + __evtchn_fifo_handle_events(cpu, true); + break; default: break; } diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 00f40f051..38272ad24 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -49,6 +49,8 @@ #include #include #include +#include +#include #include #include @@ -58,10 +60,10 @@ struct per_user_data { struct mutex bind_mutex; /* serialize bind/unbind operations */ struct rb_root evtchns; + unsigned int nr_evtchns; /* Notification ring, accessed via /dev/xen/evtchn. */ -#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) -#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) + unsigned int ring_size; evtchn_port_t *ring; unsigned int ring_cons, ring_prod, ring_overflow; struct mutex ring_cons_mutex; /* protect against concurrent readers */ @@ -80,10 +82,41 @@ struct user_evtchn { bool enabled; }; +static evtchn_port_t *evtchn_alloc_ring(unsigned int size) +{ + evtchn_port_t *ring; + size_t s = size * sizeof(*ring); + + ring = kmalloc(s, GFP_KERNEL); + if (!ring) + ring = vmalloc(s); + + return ring; +} + +static void evtchn_free_ring(evtchn_port_t *ring) +{ + kvfree(ring); +} + +static unsigned int evtchn_ring_offset(struct per_user_data *u, + unsigned int idx) +{ + return idx & (u->ring_size - 1); +} + +static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u, + unsigned int idx) +{ + return u->ring + evtchn_ring_offset(u, idx); +} + static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) { struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; + u->nr_evtchns++; + while (*new) { struct user_evtchn *this; @@ -107,6 +140,7 @@ static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) { + u->nr_evtchns--; rb_erase(&evtchn->node, &u->evtchns); kfree(evtchn); } @@ -144,8 +178,8 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) spin_lock(&u->ring_prod_lock); - if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { - u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port; + if ((u->ring_prod - u->ring_cons) < u->ring_size) { + *evtchn_ring_entry(u, u->ring_prod) = evtchn->port; wmb(); /* Ensure ring contents visible */ if (u->ring_cons == u->ring_prod++) { wake_up_interruptible(&u->evtchn_wait); @@ -200,10 +234,10 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, } /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ - if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { - bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * + if (((c ^ p) & u->ring_size) != 0) { + bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) * sizeof(evtchn_port_t); - bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); + bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t); } else { bytes1 = (p - c) * sizeof(evtchn_port_t); bytes2 = 0; @@ -219,7 +253,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, rc = -EFAULT; rmb(); /* Ensure that we see the port before we copy it. */ - if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || + if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) || ((bytes2 != 0) && copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) goto unlock_out; @@ -278,6 +312,66 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, return rc; } +static int evtchn_resize_ring(struct per_user_data *u) +{ + unsigned int new_size; + evtchn_port_t *new_ring, *old_ring; + unsigned int p, c; + + /* + * Ensure the ring is large enough to capture all possible + * events. i.e., one free slot for each bound event. + */ + if (u->nr_evtchns <= u->ring_size) + return 0; + + if (u->ring_size == 0) + new_size = 64; + else + new_size = 2 * u->ring_size; + + new_ring = evtchn_alloc_ring(new_size); + if (!new_ring) + return -ENOMEM; + + old_ring = u->ring; + + /* + * Access to the ring contents is serialized by either the + * prod /or/ cons lock so take both when resizing. + */ + mutex_lock(&u->ring_cons_mutex); + spin_lock_irq(&u->ring_prod_lock); + + /* + * Copy the old ring contents to the new ring. + * + * If the ring contents crosses the end of the current ring, + * it needs to be copied in two chunks. + * + * +---------+ +------------------+ + * |34567 12| -> | 1234567 | + * +-----p-c-+ +------------------+ + */ + p = evtchn_ring_offset(u, u->ring_prod); + c = evtchn_ring_offset(u, u->ring_cons); + if (p < c) { + memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring)); + memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring)); + } else + memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring)); + + u->ring = new_ring; + u->ring_size = new_size; + + spin_unlock_irq(&u->ring_prod_lock); + mutex_unlock(&u->ring_cons_mutex); + + evtchn_free_ring(old_ring); + + return 0; +} + static int evtchn_bind_to_user(struct per_user_data *u, int port) { struct user_evtchn *evtchn; @@ -305,6 +399,10 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) if (rc < 0) goto err; + rc = evtchn_resize_ring(u); + if (rc < 0) + goto err; + rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, u->name, evtchn); if (rc < 0) @@ -503,13 +601,6 @@ static int evtchn_open(struct inode *inode, struct file *filp) init_waitqueue_head(&u->evtchn_wait); - u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL); - if (u->ring == NULL) { - kfree(u->name); - kfree(u); - return -ENOMEM; - } - mutex_init(&u->bind_mutex); mutex_init(&u->ring_cons_mutex); spin_lock_init(&u->ring_prod_lock); @@ -532,7 +623,7 @@ static int evtchn_release(struct inode *inode, struct file *filp) evtchn_unbind_from_user(u, evtchn); } - free_page((unsigned long)u->ring); + evtchn_free_ring(u->ring); kfree(u->name); kfree(u); diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 2ea0b3b2a..1be5dd048 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -804,7 +804,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_IO; if (use_ptemod) vma->vm_flags |= VM_DONTCOPY; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 62f591f87..c49f79ed5 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -642,7 +642,7 @@ int gnttab_setup_auto_xlat_frames(phys_addr_t addr) if (xen_auto_xlat_grant_frames.count) return -EINVAL; - vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes); + vaddr = xen_remap(addr, XEN_PAGE_SIZE * max_nr_gframes); if (vaddr == NULL) { pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n", &addr); @@ -654,7 +654,7 @@ int gnttab_setup_auto_xlat_frames(phys_addr_t addr) return -ENOMEM; } for (i = 0; i < max_nr_gframes; i++) - pfn[i] = PFN_DOWN(addr) + i; + pfn[i] = XEN_PFN_DOWN(addr) + i; xen_auto_xlat_grant_frames.vaddr = vaddr; xen_auto_xlat_grant_frames.pfn = pfn; @@ -687,7 +687,7 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages) int i; int ret; - ret = alloc_xenballooned_pages(nr_pages, pages, false); + ret = alloc_xenballooned_pages(nr_pages, pages); if (ret < 0) return ret; @@ -776,6 +776,54 @@ void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count) } EXPORT_SYMBOL_GPL(gnttab_batch_copy); +void gnttab_foreach_grant_in_range(struct page *page, + unsigned int offset, + unsigned int len, + xen_grant_fn_t fn, + void *data) +{ + unsigned int goffset; + unsigned int glen; + unsigned long xen_pfn; + + len = min_t(unsigned int, PAGE_SIZE - offset, len); + goffset = xen_offset_in_page(offset); + + xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(offset); + + while (len) { + glen = min_t(unsigned int, XEN_PAGE_SIZE - goffset, len); + fn(pfn_to_gfn(xen_pfn), goffset, glen, data); + + goffset = 0; + xen_pfn++; + len -= glen; + } +} +EXPORT_SYMBOL_GPL(gnttab_foreach_grant_in_range); + +void gnttab_foreach_grant(struct page **pages, + unsigned int nr_grefs, + xen_grant_fn_t fn, + void *data) +{ + unsigned int goffset = 0; + unsigned long xen_pfn = 0; + unsigned int i; + + for (i = 0; i < nr_grefs; i++) { + if ((i % XEN_PFN_PER_PAGE) == 0) { + xen_pfn = page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]); + goffset = 0; + } + + fn(pfn_to_gfn(xen_pfn), goffset, XEN_PAGE_SIZE, data); + + goffset += XEN_PAGE_SIZE; + xen_pfn++; + } +} + int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) @@ -978,7 +1026,7 @@ static void gnttab_request_version(void) { /* Only version 1 is used, which will always be available. */ grant_table_version = 1; - grefs_per_grant_frame = PAGE_SIZE / sizeof(struct grant_entry_v1); + grefs_per_grant_frame = XEN_PAGE_SIZE / sizeof(struct grant_entry_v1); gnttab_interface = &gnttab_v1_ops; pr_info("Grant tables using version %d layout\n", grant_table_version); diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 5e9adac92..df2e6f783 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -401,7 +401,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) if (pages == NULL) return -ENOMEM; - rc = alloc_xenballooned_pages(numpgs, pages, 0); + rc = alloc_xenballooned_pages(numpgs, pages); if (rc != 0) { pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, numpgs, rc); @@ -446,7 +446,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) return -EINVAL; } - nr_pages = m.num; + nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE); if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) return -EINVAL; @@ -494,7 +494,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) goto out_unlock; } if (xen_feature(XENFEAT_auto_translated_physmap)) { - ret = alloc_empty_pages(vma, m.num); + ret = alloc_empty_pages(vma, nr_pages); if (ret < 0) goto out_unlock; } else @@ -518,6 +518,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) state.global_error = 0; state.version = version; + BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0); /* mmap_batch_fn guarantees ret == 0 */ BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), &pagelist, mmap_batch_fn, &state)); @@ -582,12 +583,13 @@ static void privcmd_close(struct vm_area_struct *vma) { struct page **pages = vma->vm_private_data; int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT; int rc; if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages) return; - rc = xen_unmap_domain_gfn_range(vma, numpgs, pages); + rc = xen_unmap_domain_gfn_range(vma, numgfns, pages); if (rc == 0) free_xenballooned_pages(numpgs, pages); else diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 79bc4933b..7399782c0 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -76,27 +76,27 @@ static unsigned long xen_io_tlb_nslabs; static u64 start_dma_addr; /* - * Both of these functions should avoid PFN_PHYS because phys_addr_t + * Both of these functions should avoid XEN_PFN_PHYS because phys_addr_t * can be 32bit when dma_addr_t is 64bit leading to a loss in * information if the shift is done before casting to 64bit. */ static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr) { - unsigned long bfn = pfn_to_bfn(PFN_DOWN(paddr)); - dma_addr_t dma = (dma_addr_t)bfn << PAGE_SHIFT; + unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr)); + dma_addr_t dma = (dma_addr_t)bfn << XEN_PAGE_SHIFT; - dma |= paddr & ~PAGE_MASK; + dma |= paddr & ~XEN_PAGE_MASK; return dma; } static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr) { - unsigned long pfn = bfn_to_pfn(PFN_DOWN(baddr)); - dma_addr_t dma = (dma_addr_t)pfn << PAGE_SHIFT; + unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr)); + dma_addr_t dma = (dma_addr_t)xen_pfn << XEN_PAGE_SHIFT; phys_addr_t paddr = dma; - paddr |= baddr & ~PAGE_MASK; + paddr |= baddr & ~XEN_PAGE_MASK; return paddr; } @@ -106,7 +106,7 @@ static inline dma_addr_t xen_virt_to_bus(void *address) return xen_phys_to_bus(virt_to_phys(address)); } -static int check_pages_physically_contiguous(unsigned long pfn, +static int check_pages_physically_contiguous(unsigned long xen_pfn, unsigned int offset, size_t length) { @@ -114,11 +114,11 @@ static int check_pages_physically_contiguous(unsigned long pfn, int i; int nr_pages; - next_bfn = pfn_to_bfn(pfn); - nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT; + next_bfn = pfn_to_bfn(xen_pfn); + nr_pages = (offset + length + XEN_PAGE_SIZE-1) >> XEN_PAGE_SHIFT; for (i = 1; i < nr_pages; i++) { - if (pfn_to_bfn(++pfn) != ++next_bfn) + if (pfn_to_bfn(++xen_pfn) != ++next_bfn) return 0; } return 1; @@ -126,28 +126,27 @@ static int check_pages_physically_contiguous(unsigned long pfn, static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) { - unsigned long pfn = PFN_DOWN(p); - unsigned int offset = p & ~PAGE_MASK; + unsigned long xen_pfn = XEN_PFN_DOWN(p); + unsigned int offset = p & ~XEN_PAGE_MASK; - if (offset + size <= PAGE_SIZE) + if (offset + size <= XEN_PAGE_SIZE) return 0; - if (check_pages_physically_contiguous(pfn, offset, size)) + if (check_pages_physically_contiguous(xen_pfn, offset, size)) return 0; return 1; } static int is_xen_swiotlb_buffer(dma_addr_t dma_addr) { - unsigned long bfn = PFN_DOWN(dma_addr); - unsigned long pfn = bfn_to_local_pfn(bfn); - phys_addr_t paddr; + unsigned long bfn = XEN_PFN_DOWN(dma_addr); + unsigned long xen_pfn = bfn_to_local_pfn(bfn); + phys_addr_t paddr = XEN_PFN_PHYS(xen_pfn); /* If the address is outside our domain, it CAN * have the same virtual address as another address * in our domain. Therefore _only_ check address within our domain. */ - if (pfn_valid(pfn)) { - paddr = PFN_PHYS(pfn); + if (pfn_valid(PFN_DOWN(paddr))) { return paddr >= virt_to_phys(xen_io_tlb_start) && paddr < virt_to_phys(xen_io_tlb_end); } @@ -392,7 +391,7 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, */ if (dma_capable(dev, dev_addr, size) && !range_straddles_page_boundary(phys, size) && - !xen_arch_need_swiotlb(dev, PFN_DOWN(phys), PFN_DOWN(dev_addr)) && + !xen_arch_need_swiotlb(dev, phys, dev_addr) && !swiotlb_force) { /* we are not interested in the dma_addr returned by * xen_dma_map_page, only in the potential cache flushes executed @@ -551,7 +550,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, dma_addr_t dev_addr = xen_phys_to_bus(paddr); if (swiotlb_force || - xen_arch_need_swiotlb(hwdev, PFN_DOWN(paddr), PFN_DOWN(dev_addr)) || + xen_arch_need_swiotlb(hwdev, paddr, dev_addr) || !dma_capable(hwdev, dev_addr, sg->length) || range_straddles_page_boundary(paddr, sg->length)) { phys_addr_t map = swiotlb_tbl_map_single(hwdev, diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 58e38d586..4d529f3e4 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -37,6 +37,7 @@ struct xen_pcibk_device { struct xen_pci_sharedinfo *sh_info; unsigned long flags; struct work_struct op_work; + struct xen_pci_op op; }; struct xen_pcibk_dev_data { diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index c4a0666de..73dafdc49 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -70,6 +70,13 @@ static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) enable ? "enable" : "disable"); if (enable) { + /* + * The MSI or MSI-X should not have an IRQ handler. Otherwise + * if the guest terminates we BUG_ON in free_msi_irqs. + */ + if (dev->msi_enabled || dev->msix_enabled) + goto out; + rc = request_irq(dev_data->irq, xen_pcibk_guest_interrupt, IRQF_SHARED, dev_data->irq_name, dev); @@ -144,7 +151,12 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); - status = pci_enable_msi(dev); + if (dev->msi_enabled) + status = -EALREADY; + else if (dev->msix_enabled) + status = -ENXIO; + else + status = pci_enable_msi(dev); if (status) { pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n", @@ -173,20 +185,23 @@ static int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) { - struct xen_pcibk_dev_data *dev_data; - if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", pci_name(dev)); - pci_disable_msi(dev); + if (dev->msi_enabled) { + struct xen_pcibk_dev_data *dev_data; + + pci_disable_msi(dev); + + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; + } op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), op->value); - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 1; return 0; } @@ -197,13 +212,26 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, struct xen_pcibk_dev_data *dev_data; int i, result; struct msix_entry *entries; + u16 cmd; if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", pci_name(dev)); + if (op->value > SH_INFO_MAX_VEC) return -EINVAL; + if (dev->msix_enabled) + return -EALREADY; + + /* + * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able + * to access the BARs where the MSI-X entries reside. + */ + pci_read_config_word(dev, PCI_COMMAND, &cmd); + if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) + return -ENXIO; + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); if (entries == NULL) return -ENOMEM; @@ -245,23 +273,27 @@ static int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) { - struct xen_pcibk_dev_data *dev_data; if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", pci_name(dev)); - pci_disable_msix(dev); + if (dev->msix_enabled) { + struct xen_pcibk_dev_data *dev_data; + + pci_disable_msix(dev); + + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; + } /* * SR-IOV devices (which don't have any legacy IRQ) have * an undefined IRQ value of zero. */ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), - op->value); - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 1; + printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", + pci_name(dev), op->value); return 0; } #endif @@ -298,9 +330,11 @@ void xen_pcibk_do_op(struct work_struct *data) container_of(data, struct xen_pcibk_device, op_work); struct pci_dev *dev; struct xen_pcibk_dev_data *dev_data = NULL; - struct xen_pci_op *op = &pdev->sh_info->op; + struct xen_pci_op *op = &pdev->op; int test_intx = 0; + *op = pdev->sh_info->op; + barrier(); dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); if (dev == NULL) @@ -342,6 +376,17 @@ void xen_pcibk_do_op(struct work_struct *data) if ((dev_data->enable_intx != test_intx)) xen_pcibk_control_isr(dev, 0 /* no reset */); } + pdev->sh_info->op.err = op->err; + pdev->sh_info->op.value = op->value; +#ifdef CONFIG_PCI_MSI + if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { + unsigned int i; + + for (i = 0; i < op->value; i++) + pdev->sh_info->op.msix_entries[i].vector = + op->msix_entries[i].vector; + } +#endif /* Tell the driver domain that we're done. */ wmb(); clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 98bc345f2..4843741e7 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -44,7 +44,6 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); pdev->xdev = xdev; - dev_set_drvdata(&xdev->dev, pdev); mutex_init(&pdev->dev_lock); @@ -58,6 +57,9 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) kfree(pdev); pdev = NULL; } + + dev_set_drvdata(&xdev->dev, pdev); + out: return pdev; } diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 9eeefd7ca..ad4eb1024 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -53,7 +53,6 @@ #include #include -#include #include @@ -727,7 +726,7 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) if (!pending_req) return 1; - ring_req = *RING_GET_REQUEST(ring, rc); + RING_COPY_REQUEST(ring, rc, &ring_req); ring->req_cons = ++rc; err = prepare_pending_reqs(info, &ring_req, pending_req); @@ -1438,9 +1437,10 @@ static void scsiback_aborted_task(struct se_cmd *se_cmd) { } -static ssize_t scsiback_tpg_param_show_alias(struct se_portal_group *se_tpg, +static ssize_t scsiback_tpg_param_alias_show(struct config_item *item, char *page) { + struct se_portal_group *se_tpg = param_to_tpg(item); struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg, se_tpg); ssize_t rb; @@ -1452,9 +1452,10 @@ static ssize_t scsiback_tpg_param_show_alias(struct se_portal_group *se_tpg, return rb; } -static ssize_t scsiback_tpg_param_store_alias(struct se_portal_group *se_tpg, +static ssize_t scsiback_tpg_param_alias_store(struct config_item *item, const char *page, size_t count) { + struct se_portal_group *se_tpg = param_to_tpg(item); struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg, se_tpg); int len; @@ -1474,10 +1475,10 @@ static ssize_t scsiback_tpg_param_store_alias(struct se_portal_group *se_tpg, return count; } -TF_TPG_PARAM_ATTR(scsiback, alias, S_IRUGO | S_IWUSR); +CONFIGFS_ATTR(scsiback_tpg_param_, alias); static struct configfs_attribute *scsiback_param_attrs[] = { - &scsiback_tpg_param_alias.attr, + &scsiback_tpg_param_attr_alias, NULL, }; @@ -1585,9 +1586,9 @@ static int scsiback_drop_nexus(struct scsiback_tpg *tpg) return 0; } -static ssize_t scsiback_tpg_show_nexus(struct se_portal_group *se_tpg, - char *page) +static ssize_t scsiback_tpg_nexus_show(struct config_item *item, char *page) { + struct se_portal_group *se_tpg = to_tpg(item); struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg, se_tpg); struct scsiback_nexus *tv_nexus; @@ -1606,10 +1607,10 @@ static ssize_t scsiback_tpg_show_nexus(struct se_portal_group *se_tpg, return ret; } -static ssize_t scsiback_tpg_store_nexus(struct se_portal_group *se_tpg, - const char *page, - size_t count) +static ssize_t scsiback_tpg_nexus_store(struct config_item *item, + const char *page, size_t count) { + struct se_portal_group *se_tpg = to_tpg(item); struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg, se_tpg); struct scsiback_tport *tport_wwn = tpg->tport; @@ -1681,26 +1682,25 @@ check_newline: return count; } -TF_TPG_BASE_ATTR(scsiback, nexus, S_IRUGO | S_IWUSR); +CONFIGFS_ATTR(scsiback_tpg_, nexus); static struct configfs_attribute *scsiback_tpg_attrs[] = { - &scsiback_tpg_nexus.attr, + &scsiback_tpg_attr_nexus, NULL, }; static ssize_t -scsiback_wwn_show_attr_version(struct target_fabric_configfs *tf, - char *page) +scsiback_wwn_version_show(struct config_item *item, char *page) { return sprintf(page, "xen-pvscsi fabric module %s on %s/%s on " UTS_RELEASE"\n", VSCSI_VERSION, utsname()->sysname, utsname()->machine); } -TF_WWN_ATTR_RO(scsiback, version); +CONFIGFS_ATTR_RO(scsiback_wwn_, version); static struct configfs_attribute *scsiback_wwn_attrs[] = { - &scsiback_wwn_version.attr, + &scsiback_wwn_attr_version, NULL, }; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 2ba09c119..056da6ee1 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -49,6 +49,10 @@ #include "xenbus_probe.h" +#define XENBUS_PAGES(_grants) (DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE)) + +#define XENBUS_MAX_RING_PAGES (XENBUS_PAGES(XENBUS_MAX_RING_GRANTS)) + struct xenbus_map_node { struct list_head next; union { @@ -57,10 +61,11 @@ struct xenbus_map_node { } pv; struct { struct page *pages[XENBUS_MAX_RING_PAGES]; + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; void *addr; } hvm; }; - grant_handle_t handles[XENBUS_MAX_RING_PAGES]; + grant_handle_t handles[XENBUS_MAX_RING_GRANTS]; unsigned int nr_handles; }; @@ -388,7 +393,7 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, } grefs[i] = err; - vaddr = vaddr + PAGE_SIZE; + vaddr = vaddr + XEN_PAGE_SIZE; } return 0; @@ -479,12 +484,12 @@ static int __xenbus_map_ring(struct xenbus_device *dev, unsigned int flags, bool *leaked) { - struct gnttab_map_grant_ref map[XENBUS_MAX_RING_PAGES]; - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS]; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; int i, j; int err = GNTST_okay; - if (nr_grefs > XENBUS_MAX_RING_PAGES) + if (nr_grefs > XENBUS_MAX_RING_GRANTS) return -EINVAL; for (i = 0; i < nr_grefs; i++) { @@ -540,22 +545,22 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, { struct xenbus_map_node *node; struct vm_struct *area; - pte_t *ptes[XENBUS_MAX_RING_PAGES]; - phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; + pte_t *ptes[XENBUS_MAX_RING_GRANTS]; + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; int err = GNTST_okay; int i; bool leaked; *vaddr = NULL; - if (nr_grefs > XENBUS_MAX_RING_PAGES) + if (nr_grefs > XENBUS_MAX_RING_GRANTS) return -EINVAL; node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; - area = alloc_vm_area(PAGE_SIZE * nr_grefs, ptes); + area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes); if (!area) { kfree(node); return -ENOMEM; @@ -591,21 +596,44 @@ failed: return err; } +struct map_ring_valloc_hvm +{ + unsigned int idx; + + /* Why do we need two arrays? See comment of __xenbus_map_ring */ + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; +}; + +static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn, + unsigned int goffset, + unsigned int len, + void *data) +{ + struct map_ring_valloc_hvm *info = data; + unsigned long vaddr = (unsigned long)gfn_to_virt(gfn); + + info->phys_addrs[info->idx] = vaddr; + info->addrs[info->idx] = vaddr; + + info->idx++; +} + static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, grant_ref_t *gnt_ref, unsigned int nr_grefs, void **vaddr) { struct xenbus_map_node *node; - int i; int err; void *addr; bool leaked = false; - /* Why do we need two arrays? See comment of __xenbus_map_ring */ - phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; - unsigned long addrs[XENBUS_MAX_RING_PAGES]; + struct map_ring_valloc_hvm info = { + .idx = 0, + }; + unsigned int nr_pages = XENBUS_PAGES(nr_grefs); - if (nr_grefs > XENBUS_MAX_RING_PAGES) + if (nr_grefs > XENBUS_MAX_RING_GRANTS) return -EINVAL; *vaddr = NULL; @@ -614,25 +642,22 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, if (!node) return -ENOMEM; - err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages, - false /* lowmem */); + err = alloc_xenballooned_pages(nr_pages, node->hvm.pages); if (err) goto out_err; - for (i = 0; i < nr_grefs; i++) { - unsigned long pfn = page_to_pfn(node->hvm.pages[i]); - phys_addrs[i] = (unsigned long)pfn_to_kaddr(pfn); - addrs[i] = (unsigned long)pfn_to_kaddr(pfn); - } + gnttab_foreach_grant(node->hvm.pages, nr_grefs, + xenbus_map_ring_setup_grant_hvm, + &info); err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles, - phys_addrs, GNTMAP_host_map, &leaked); + info.phys_addrs, GNTMAP_host_map, &leaked); node->nr_handles = nr_grefs; if (err) goto out_free_ballooned_pages; - addr = vmap(node->hvm.pages, nr_grefs, VM_MAP | VM_IOREMAP, + addr = vmap(node->hvm.pages, nr_pages, VM_MAP | VM_IOREMAP, PAGE_KERNEL); if (!addr) { err = -ENOMEM; @@ -650,14 +675,13 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, out_xenbus_unmap_ring: if (!leaked) - xenbus_unmap_ring(dev, node->handles, node->nr_handles, - addrs); + xenbus_unmap_ring(dev, node->handles, nr_grefs, info.addrs); else pr_alert("leaking %p size %u page(s)", - addr, nr_grefs); + addr, nr_pages); out_free_ballooned_pages: if (!leaked) - free_xenballooned_pages(nr_grefs, node->hvm.pages); + free_xenballooned_pages(nr_pages, node->hvm.pages); out_err: kfree(node); return err; @@ -687,10 +711,10 @@ int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs, unsigned int nr_grefs, grant_handle_t *handles, unsigned long *vaddrs, bool *leaked) { - phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; int i; - if (nr_grefs > XENBUS_MAX_RING_PAGES) + if (nr_grefs > XENBUS_MAX_RING_GRANTS) return -EINVAL; for (i = 0; i < nr_grefs; i++) @@ -723,7 +747,7 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) { struct xenbus_map_node *node; - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; unsigned int level; int i; bool leaked = false; @@ -750,7 +774,7 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) unsigned long addr; memset(&unmap[i], 0, sizeof(unmap[i])); - addr = (unsigned long)vaddr + (PAGE_SIZE * i); + addr = (unsigned long)vaddr + (XEN_PAGE_SIZE * i); unmap[i].host_addr = arbitrary_virt_to_machine( lookup_address(addr, &level)).maddr; unmap[i].dev_bus_addr = 0; @@ -783,13 +807,33 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) return err; } +struct unmap_ring_vfree_hvm +{ + unsigned int idx; + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; +}; + +static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn, + unsigned int goffset, + unsigned int len, + void *data) +{ + struct unmap_ring_vfree_hvm *info = data; + + info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn); + + info->idx++; +} + static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) { int rv; struct xenbus_map_node *node; void *addr; - unsigned long addrs[XENBUS_MAX_RING_PAGES]; - int i; + struct unmap_ring_vfree_hvm info = { + .idx = 0, + }; + unsigned int nr_pages; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { @@ -809,18 +853,20 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) return GNTST_bad_virt_addr; } - for (i = 0; i < node->nr_handles; i++) - addrs[i] = (unsigned long)pfn_to_kaddr(page_to_pfn(node->hvm.pages[i])); + nr_pages = XENBUS_PAGES(node->nr_handles); + + gnttab_foreach_grant(node->hvm.pages, node->nr_handles, + xenbus_unmap_ring_setup_grant_hvm, + &info); rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles, - addrs); + info.addrs); if (!rv) { vunmap(vaddr); - free_xenballooned_pages(node->nr_handles, node->hvm.pages); + free_xenballooned_pages(nr_pages, node->hvm.pages); } else - WARN(1, "Leaking %p, size %u page(s)\n", vaddr, - node->nr_handles); + WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages); kfree(node); return rv; @@ -841,11 +887,11 @@ int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles, unsigned int nr_handles, unsigned long *vaddrs) { - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; int i; int err; - if (nr_handles > XENBUS_MAX_RING_PAGES) + if (nr_handles > XENBUS_MAX_RING_GRANTS) return -EINVAL; for (i = 0; i < nr_handles; i++) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 3cbe0556d..33a31cfef 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -802,7 +802,8 @@ static int __init xenbus_init(void) goto out_error; xen_store_gfn = (unsigned long)v; xen_store_interface = - xen_remap(xen_store_gfn << PAGE_SHIFT, PAGE_SIZE); + xen_remap(xen_store_gfn << XEN_PAGE_SHIFT, + XEN_PAGE_SIZE); break; default: pr_warn("Xenstore state unknown\n"); diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index cff23872c..5063c5e79 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -38,31 +38,28 @@ #include #include -/* map fgfn of domid to lpfn in the current domain */ -static int map_foreign_page(unsigned long lpfn, unsigned long fgfn, - unsigned int domid) -{ - int rc; - struct xen_add_to_physmap_range xatp = { - .domid = DOMID_SELF, - .foreign_domid = domid, - .size = 1, - .space = XENMAPSPACE_gmfn_foreign, - }; - xen_ulong_t idx = fgfn; - xen_pfn_t gpfn = lpfn; - int err = 0; +typedef void (*xen_gfn_fn_t)(unsigned long gfn, void *data); - set_xen_guest_handle(xatp.idxs, &idx); - set_xen_guest_handle(xatp.gpfns, &gpfn); - set_xen_guest_handle(xatp.errs, &err); +/* Break down the pages in 4KB chunk and call fn for each gfn */ +static void xen_for_each_gfn(struct page **pages, unsigned nr_gfn, + xen_gfn_fn_t fn, void *data) +{ + unsigned long xen_pfn = 0; + struct page *page; + int i; - rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); - return rc < 0 ? rc : err; + for (i = 0; i < nr_gfn; i++) { + if ((i % XEN_PFN_PER_PAGE) == 0) { + page = pages[i / XEN_PFN_PER_PAGE]; + xen_pfn = page_to_xen_pfn(page); + } + fn(pfn_to_gfn(xen_pfn++), data); + } } struct remap_data { xen_pfn_t *fgfn; /* foreign domain's gfn */ + int nr_fgfn; /* Number of foreign gfn left to map */ pgprot_t prot; domid_t domid; struct vm_area_struct *vma; @@ -71,24 +68,71 @@ struct remap_data { struct xen_remap_gfn_info *info; int *err_ptr; int mapped; + + /* Hypercall parameters */ + int h_errs[XEN_PFN_PER_PAGE]; + xen_ulong_t h_idxs[XEN_PFN_PER_PAGE]; + xen_pfn_t h_gpfns[XEN_PFN_PER_PAGE]; + + int h_iter; /* Iterator */ }; +static void setup_hparams(unsigned long gfn, void *data) +{ + struct remap_data *info = data; + + info->h_idxs[info->h_iter] = *info->fgfn; + info->h_gpfns[info->h_iter] = gfn; + info->h_errs[info->h_iter] = 0; + + info->h_iter++; + info->fgfn++; +} + static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { struct remap_data *info = data; struct page *page = info->pages[info->index++]; - unsigned long pfn = page_to_pfn(page); - pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot)); - int rc; + pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), info->prot)); + int rc, nr_gfn; + uint32_t i; + struct xen_add_to_physmap_range xatp = { + .domid = DOMID_SELF, + .foreign_domid = info->domid, + .space = XENMAPSPACE_gmfn_foreign, + }; - rc = map_foreign_page(pfn, *info->fgfn, info->domid); - *info->err_ptr++ = rc; - if (!rc) { - set_pte_at(info->vma->vm_mm, addr, ptep, pte); - info->mapped++; + nr_gfn = min_t(typeof(info->nr_fgfn), XEN_PFN_PER_PAGE, info->nr_fgfn); + info->nr_fgfn -= nr_gfn; + + info->h_iter = 0; + xen_for_each_gfn(&page, nr_gfn, setup_hparams, info); + BUG_ON(info->h_iter != nr_gfn); + + set_xen_guest_handle(xatp.idxs, info->h_idxs); + set_xen_guest_handle(xatp.gpfns, info->h_gpfns); + set_xen_guest_handle(xatp.errs, info->h_errs); + xatp.size = nr_gfn; + + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); + + /* info->err_ptr expect to have one error status per Xen PFN */ + for (i = 0; i < nr_gfn; i++) { + int err = (rc < 0) ? rc : info->h_errs[i]; + + *(info->err_ptr++) = err; + if (!err) + info->mapped++; } - info->fgfn++; + + /* + * Note: The hypercall will return 0 in most of the case if even if + * all the fgmfn are not mapped. We still have to update the pte + * as the userspace may decide to continue. + */ + if (!rc) + set_pte_at(info->vma->vm_mm, addr, ptep, pte); return 0; } @@ -102,13 +146,14 @@ int xen_xlate_remap_gfn_array(struct vm_area_struct *vma, { int err; struct remap_data data; - unsigned long range = nr << PAGE_SHIFT; + unsigned long range = DIV_ROUND_UP(nr, XEN_PFN_PER_PAGE) << PAGE_SHIFT; /* Kept here for the purpose of making sure code doesn't break x86 PVOPS */ BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); data.fgfn = gfn; + data.nr_fgfn = nr; data.prot = prot; data.domid = domid; data.vma = vma; @@ -123,21 +168,20 @@ int xen_xlate_remap_gfn_array(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array); -int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, - int nr, struct page **pages) +static void unmap_gfn(unsigned long gfn, void *data) { - int i; + struct xen_remove_from_physmap xrp; - for (i = 0; i < nr; i++) { - struct xen_remove_from_physmap xrp; - unsigned long pfn; + xrp.domid = DOMID_SELF; + xrp.gpfn = gfn; + (void)HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); +} - pfn = page_to_pfn(pages[i]); +int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, + int nr, struct page **pages) +{ + xen_for_each_gfn(pages, nr, unmap_gfn, NULL); - xrp.domid = DOMID_SELF; - xrp.gpfn = pfn; - (void)HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); - } return 0; } EXPORT_SYMBOL_GPL(xen_xlate_unmap_gfn_range); -- cgit v1.2.3-54-g00ecf