diff options
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/channel_mgmt.c | 58 | ||||
-rw-r--r-- | drivers/hv/connection.c | 1 | ||||
-rw-r--r-- | drivers/hv/hv_balloon.c | 5 | ||||
-rw-r--r-- | drivers/hv/hv_kvp.c | 31 | ||||
-rw-r--r-- | drivers/hv/hyperv_vmbus.h | 23 | ||||
-rw-r--r-- | drivers/hv/ring_buffer.c | 95 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 150 |
7 files changed, 218 insertions, 145 deletions
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 38b682bab..b6c1211b4 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -597,27 +597,55 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) static void vmbus_wait_for_unload(void) { - int cpu = smp_processor_id(); - void *page_addr = hv_context.synic_message_page[cpu]; - struct hv_message *msg = (struct hv_message *)page_addr + - VMBUS_MESSAGE_SINT; + int cpu; + void *page_addr; + struct hv_message *msg; struct vmbus_channel_message_header *hdr; - bool unloaded = false; + u32 message_type; + /* + * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was + * used for initial contact or to CPU0 depending on host version. When + * we're crashing on a different CPU let's hope that IRQ handler on + * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still + * functional and vmbus_unload_response() will complete + * vmbus_connection.unload_event. If not, the last thing we can do is + * read message pages for all CPUs directly. + */ while (1) { - if (READ_ONCE(msg->header.message_type) == HVMSG_NONE) { - mdelay(10); - continue; - } + if (completion_done(&vmbus_connection.unload_event)) + break; - hdr = (struct vmbus_channel_message_header *)msg->u.payload; - if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE) - unloaded = true; + for_each_online_cpu(cpu) { + page_addr = hv_context.synic_message_page[cpu]; + msg = (struct hv_message *)page_addr + + VMBUS_MESSAGE_SINT; - vmbus_signal_eom(msg); + message_type = READ_ONCE(msg->header.message_type); + if (message_type == HVMSG_NONE) + continue; - if (unloaded) - break; + hdr = (struct vmbus_channel_message_header *) + msg->u.payload; + + if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE) + complete(&vmbus_connection.unload_event); + + vmbus_signal_eom(msg, message_type); + } + + mdelay(10); + } + + /* + * We're crashing and already got the UNLOAD_RESPONSE, cleanup all + * maybe-pending messages on all CPUs to be able to receive new + * messages after we reconnect. + */ + for_each_online_cpu(cpu) { + page_addr = hv_context.synic_message_page[cpu]; + msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; + msg->header.message_type = HVMSG_NONE; } } diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index d02f1373d..fcf8a02dc 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -495,3 +495,4 @@ void vmbus_set_event(struct vmbus_channel *channel) hv_do_hypercall(HVCALL_SIGNAL_EVENT, channel->sig_event, NULL); } +EXPORT_SYMBOL_GPL(vmbus_set_event); diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index b853b4b08..df35fb7ed 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -714,7 +714,7 @@ static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) * If the pfn range we are dealing with is not in the current * "hot add block", move on. */ - if ((start_pfn >= has->end_pfn)) + if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn) continue; /* * If the current hot add-request extends beyond @@ -768,7 +768,7 @@ static unsigned long handle_pg_range(unsigned long pg_start, * If the pfn range we are dealing with is not in the current * "hot add block", move on. */ - if ((start_pfn >= has->end_pfn)) + if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn) continue; old_covered_state = has->covered_end_pfn; @@ -1400,6 +1400,7 @@ static void balloon_onchannelcallback(void *context) * This is a normal hot-add request specifying * hot-add memory. */ + dm->host_specified_ha_region = false; ha_pg_range = &ha_msg->range; dm->ha_wrk.ha_page_range = *ha_pg_range; dm->ha_wrk.ha_region_range.page_range = 0; diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 9b9b370fe..cb1a9160a 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -78,9 +78,11 @@ static void kvp_send_key(struct work_struct *dummy); static void kvp_respond_to_host(struct hv_kvp_msg *msg, int error); static void kvp_timeout_func(struct work_struct *dummy); +static void kvp_host_handshake_func(struct work_struct *dummy); static void kvp_register(int); static DECLARE_DELAYED_WORK(kvp_timeout_work, kvp_timeout_func); +static DECLARE_DELAYED_WORK(kvp_host_handshake_work, kvp_host_handshake_func); static DECLARE_WORK(kvp_sendkey_work, kvp_send_key); static const char kvp_devname[] = "vmbus/hv_kvp"; @@ -130,6 +132,11 @@ static void kvp_timeout_func(struct work_struct *dummy) hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); } +static void kvp_host_handshake_func(struct work_struct *dummy) +{ + hv_poll_channel(kvp_transaction.recv_channel, hv_kvp_onchannelcallback); +} + static int kvp_handle_handshake(struct hv_kvp_msg *msg) { switch (msg->kvp_hdr.operation) { @@ -154,6 +161,12 @@ static int kvp_handle_handshake(struct hv_kvp_msg *msg) pr_debug("KVP: userspace daemon ver. %d registered\n", KVP_OP_REGISTER); kvp_register(dm_reg_value); + + /* + * If we're still negotiating with the host cancel the timeout + * work to not poll the channel twice. + */ + cancel_delayed_work_sync(&kvp_host_handshake_work); hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); return 0; @@ -594,7 +607,22 @@ void hv_kvp_onchannelcallback(void *context) struct icmsg_negotiate *negop = NULL; int util_fw_version; int kvp_srv_version; + static enum {NEGO_NOT_STARTED, + NEGO_IN_PROGRESS, + NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED; + if (host_negotiatied == NEGO_NOT_STARTED && + kvp_transaction.state < HVUTIL_READY) { + /* + * If userspace daemon is not connected and host is asking + * us to negotiate we need to delay to not lose messages. + * This is important for Failover IP setting. + */ + host_negotiatied = NEGO_IN_PROGRESS; + schedule_delayed_work(&kvp_host_handshake_work, + HV_UTIL_NEGO_TIMEOUT * HZ); + return; + } if (kvp_transaction.state > HVUTIL_READY) return; @@ -672,6 +700,8 @@ void hv_kvp_onchannelcallback(void *context) vmbus_sendpacket(channel, recv_buffer, recvlen, requestid, VM_PKT_DATA_INBAND, 0); + + host_negotiatied = NEGO_FINISHED; } } @@ -708,6 +738,7 @@ hv_kvp_init(struct hv_util_service *srv) void hv_kvp_deinit(void) { kvp_transaction.state = HVUTIL_DEVICE_DYING; + cancel_delayed_work_sync(&kvp_host_handshake_work); cancel_delayed_work_sync(&kvp_timeout_work); cancel_work_sync(&kvp_sendkey_work); hvutil_transport_destroy(hvt); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 12321b93a..718b5c72f 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -36,6 +36,11 @@ #define HV_UTIL_TIMEOUT 30 /* + * Timeout for guest-host handshake for services. + */ +#define HV_UTIL_NEGO_TIMEOUT 60 + +/* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HVCPUID_VERSION_FEATURES). */ @@ -620,9 +625,21 @@ extern struct vmbus_channel_message_table_entry channel_message_table[CHANNELMSG_COUNT]; /* Free the message slot and signal end-of-message if required */ -static inline void vmbus_signal_eom(struct hv_message *msg) +static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) { - msg->header.message_type = HVMSG_NONE; + /* + * On crash we're reading some other CPU's message page and we need + * to be careful: this other CPU may already had cleared the header + * and the host may already had delivered some other message there. + * In case we blindly write msg->header.message_type we're going + * to lose it. We can still lose a message of the same type but + * we count on the fact that there can only be one + * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages + * on crash. + */ + if (cmpxchg(&msg->header.message_type, old_msg_type, + HVMSG_NONE) != old_msg_type) + return; /* * Make sure the write to MessageType (ie set to @@ -667,8 +684,6 @@ void vmbus_disconnect(void); int vmbus_post_msg(void *buffer, size_t buflen); -void vmbus_set_event(struct vmbus_channel *channel); - void vmbus_on_event(unsigned long data); void vmbus_on_msg_dpc(unsigned long data); diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index a40a73a7b..fe586bf74 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -33,25 +33,21 @@ void hv_begin_read(struct hv_ring_buffer_info *rbi) { rbi->ring_buffer->interrupt_mask = 1; - mb(); + virt_mb(); } u32 hv_end_read(struct hv_ring_buffer_info *rbi) { - u32 read; - u32 write; rbi->ring_buffer->interrupt_mask = 0; - mb(); + virt_mb(); /* * Now check to see if the ring buffer is still empty. * If it is not, we raced and we need to process new * incoming messages. */ - hv_get_ringbuffer_availbytes(rbi, &read, &write); - - return read; + return hv_get_bytes_to_read(rbi); } /* @@ -72,69 +68,17 @@ u32 hv_end_read(struct hv_ring_buffer_info *rbi) static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi) { - mb(); - if (rbi->ring_buffer->interrupt_mask) + virt_mb(); + if (READ_ONCE(rbi->ring_buffer->interrupt_mask)) return false; /* check interrupt_mask before read_index */ - rmb(); + virt_rmb(); /* * This is the only case we need to signal when the * ring transitions from being empty to non-empty. */ - if (old_write == rbi->ring_buffer->read_index) - return true; - - return false; -} - -/* - * To optimize the flow management on the send-side, - * when the sender is blocked because of lack of - * sufficient space in the ring buffer, potential the - * consumer of the ring buffer can signal the producer. - * This is controlled by the following parameters: - * - * 1. pending_send_sz: This is the size in bytes that the - * producer is trying to send. - * 2. The feature bit feat_pending_send_sz set to indicate if - * the consumer of the ring will signal when the ring - * state transitions from being full to a state where - * there is room for the producer to send the pending packet. - */ - -static bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) -{ - u32 cur_write_sz; - u32 r_size; - u32 write_loc; - u32 read_loc = rbi->ring_buffer->read_index; - u32 pending_sz; - - /* - * Issue a full memory barrier before making the signaling decision. - * Here is the reason for having this barrier: - * If the reading of the pend_sz (in this function) - * were to be reordered and read before we commit the new read - * index (in the calling function) we could - * have a problem. If the host were to set the pending_sz after we - * have sampled pending_sz and go to sleep before we commit the - * read index, we could miss sending the interrupt. Issue a full - * memory barrier to address this. - */ - mb(); - - pending_sz = rbi->ring_buffer->pending_send_sz; - write_loc = rbi->ring_buffer->write_index; - /* If the other end is not blocked on write don't bother. */ - if (pending_sz == 0) - return false; - - r_size = rbi->ring_datasize; - cur_write_sz = write_loc >= read_loc ? r_size - (write_loc - read_loc) : - read_loc - write_loc; - - if (cur_write_sz >= pending_sz) + if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) return true; return false; @@ -188,17 +132,9 @@ hv_set_next_read_location(struct hv_ring_buffer_info *ring_info, u32 next_read_location) { ring_info->ring_buffer->read_index = next_read_location; + ring_info->priv_read_index = next_read_location; } - -/* Get the start of the ring buffer. */ -static inline void * -hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info) -{ - return (void *)ring_info->ring_buffer->buffer; -} - - /* Get the size of the ring buffer. */ static inline u32 hv_get_ring_buffersize(struct hv_ring_buffer_info *ring_info) @@ -332,7 +268,6 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, { int i = 0; u32 bytes_avail_towrite; - u32 bytes_avail_toread; u32 totalbytes_towrite = 0; u32 next_write_location; @@ -348,9 +283,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, if (lock) spin_lock_irqsave(&outring_info->ring_lock, flags); - hv_get_ringbuffer_availbytes(outring_info, - &bytes_avail_toread, - &bytes_avail_towrite); + bytes_avail_towrite = hv_get_bytes_to_write(outring_info); /* * If there is only room for the packet, assume it is full. @@ -384,7 +317,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, sizeof(u64)); /* Issue a full memory barrier before updating the write index */ - mb(); + virt_mb(); /* Now, update the write location */ hv_set_next_write_location(outring_info, next_write_location); @@ -401,7 +334,6 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer, u32 buflen, u32 *buffer_actual_len, u64 *requestid, bool *signal, bool raw) { - u32 bytes_avail_towrite; u32 bytes_avail_toread; u32 next_read_location = 0; u64 prev_indices = 0; @@ -417,10 +349,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, *buffer_actual_len = 0; *requestid = 0; - hv_get_ringbuffer_availbytes(inring_info, - &bytes_avail_toread, - &bytes_avail_towrite); - + bytes_avail_toread = hv_get_bytes_to_read(inring_info); /* Make sure there is something to read */ if (bytes_avail_toread < sizeof(desc)) { /* @@ -464,7 +393,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, * the writer may start writing to the read area once the read index * is updated. */ - mb(); + virt_mb(); /* Update the read index */ hv_set_next_read_location(inring_info, next_read_location); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 64713ff47..952f20fdc 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -41,6 +41,7 @@ #include <linux/ptrace.h> #include <linux/screen_info.h> #include <linux/kdebug.h> +#include <linux/efi.h> #include "hyperv_vmbus.h" static struct acpi_device *hv_acpi_dev; @@ -101,7 +102,10 @@ static struct notifier_block hyperv_panic_block = { .notifier_call = hyperv_panic_event, }; +static const char *fb_mmio_name = "fb_range"; +static struct resource *fb_mmio; struct resource *hyperv_mmio; +DEFINE_SEMAPHORE(hyperv_mmio_lock); static int vmbus_exists(void) { @@ -708,7 +712,7 @@ static void hv_process_timer_expiration(struct hv_message *msg, int cpu) if (dev->event_handler) dev->event_handler(dev); - vmbus_signal_eom(msg); + vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); } void vmbus_on_msg_dpc(unsigned long data) @@ -720,8 +724,9 @@ void vmbus_on_msg_dpc(unsigned long data) struct vmbus_channel_message_header *hdr; struct vmbus_channel_message_table_entry *entry; struct onmessage_work_context *ctx; + u32 message_type = msg->header.message_type; - if (msg->header.message_type == HVMSG_NONE) + if (message_type == HVMSG_NONE) /* no msg */ return; @@ -746,7 +751,7 @@ void vmbus_on_msg_dpc(unsigned long data) entry->message_handler(hdr); msg_handled: - vmbus_signal_eom(msg); + vmbus_signal_eom(msg, message_type); } static void vmbus_isr(void) @@ -1048,7 +1053,6 @@ static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) new_res->end = end; /* - * Stick ranges from higher in address space at the front of the list. * If two ranges are adjacent, merge them. */ do { @@ -1069,7 +1073,7 @@ static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) break; } - if ((*old_res)->end < new_res->start) { + if ((*old_res)->start > new_res->end) { new_res->sibling = *old_res; if (prev_res) (*prev_res)->sibling = new_res; @@ -1091,6 +1095,12 @@ static int vmbus_acpi_remove(struct acpi_device *device) struct resource *next_res; if (hyperv_mmio) { + if (fb_mmio) { + __release_region(hyperv_mmio, fb_mmio->start, + resource_size(fb_mmio)); + fb_mmio = NULL; + } + for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { next_res = cur_res->sibling; kfree(cur_res); @@ -1100,6 +1110,30 @@ static int vmbus_acpi_remove(struct acpi_device *device) return 0; } +static void vmbus_reserve_fb(void) +{ + int size; + /* + * Make a claim for the frame buffer in the resource tree under the + * first node, which will be the one below 4GB. The length seems to + * be underreported, particularly in a Generation 1 VM. So start out + * reserving a larger area and make it smaller until it succeeds. + */ + + if (screen_info.lfb_base) { + if (efi_enabled(EFI_BOOT)) + size = max_t(__u32, screen_info.lfb_size, 0x800000); + else + size = max_t(__u32, screen_info.lfb_size, 0x4000000); + + for (; !fb_mmio && (size >= 0x100000); size >>= 1) { + fb_mmio = __request_region(hyperv_mmio, + screen_info.lfb_base, size, + fb_mmio_name, 0); + } + } +} + /** * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. * @new: If successful, supplied a pointer to the @@ -1128,11 +1162,33 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, resource_size_t size, resource_size_t align, bool fb_overlap_ok) { - struct resource *iter; - resource_size_t range_min, range_max, start, local_min, local_max; + struct resource *iter, *shadow; + resource_size_t range_min, range_max, start; const char *dev_n = dev_name(&device_obj->device); - u32 fb_end = screen_info.lfb_base + (screen_info.lfb_size << 1); - int i; + int retval; + + retval = -ENXIO; + down(&hyperv_mmio_lock); + + /* + * If overlaps with frame buffers are allowed, then first attempt to + * make the allocation from within the reserved region. Because it + * is already reserved, no shadow allocation is necessary. + */ + if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) && + !(max < fb_mmio->start)) { + + range_min = fb_mmio->start; + range_max = fb_mmio->end; + start = (range_min + align - 1) & ~(align - 1); + for (; start + size - 1 <= range_max; start += align) { + *new = request_mem_region_exclusive(start, size, dev_n); + if (*new) { + retval = 0; + goto exit; + } + } + } for (iter = hyperv_mmio; iter; iter = iter->sibling) { if ((iter->start >= max) || (iter->end <= min)) @@ -1140,46 +1196,56 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, range_min = iter->start; range_max = iter->end; - - /* If this range overlaps the frame buffer, split it into - two tries. */ - for (i = 0; i < 2; i++) { - local_min = range_min; - local_max = range_max; - if (fb_overlap_ok || (range_min >= fb_end) || - (range_max <= screen_info.lfb_base)) { - i++; - } else { - if ((range_min <= screen_info.lfb_base) && - (range_max >= screen_info.lfb_base)) { - /* - * The frame buffer is in this window, - * so trim this into the part that - * preceeds the frame buffer. - */ - local_max = screen_info.lfb_base - 1; - range_min = fb_end; - } else { - range_min = fb_end; - continue; - } + start = (range_min + align - 1) & ~(align - 1); + for (; start + size - 1 <= range_max; start += align) { + shadow = __request_region(iter, start, size, NULL, + IORESOURCE_BUSY); + if (!shadow) + continue; + + *new = request_mem_region_exclusive(start, size, dev_n); + if (*new) { + shadow->name = (char *)*new; + retval = 0; + goto exit; } - start = (local_min + align - 1) & ~(align - 1); - for (; start + size - 1 <= local_max; start += align) { - *new = request_mem_region_exclusive(start, size, - dev_n); - if (*new) - return 0; - } + __release_region(iter, start, size); } } - return -ENXIO; +exit: + up(&hyperv_mmio_lock); + return retval; } EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); /** + * vmbus_free_mmio() - Free a memory-mapped I/O range. + * @start: Base address of region to release. + * @size: Size of the range to be allocated + * + * This function releases anything requested by + * vmbus_mmio_allocate(). + */ +void vmbus_free_mmio(resource_size_t start, resource_size_t size) +{ + struct resource *iter; + + down(&hyperv_mmio_lock); + for (iter = hyperv_mmio; iter; iter = iter->sibling) { + if ((iter->start >= start + size) || (iter->end <= start)) + continue; + + __release_region(iter, start, size); + } + release_mem_region(start, size); + up(&hyperv_mmio_lock); + +} +EXPORT_SYMBOL_GPL(vmbus_free_mmio); + +/** * vmbus_cpu_number_to_vp_number() - Map CPU to VP. * @cpu_number: CPU number in Linux terms * @@ -1219,8 +1285,10 @@ static int vmbus_acpi_add(struct acpi_device *device) if (ACPI_FAILURE(result)) continue; - if (hyperv_mmio) + if (hyperv_mmio) { + vmbus_reserve_fb(); break; + } } ret_val = 0; |