From 8d91c1e411f55d7ea91b1183a2e9f8088fb4d5be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Fabian=20Silva=20Delgado?=
 <emulatorman@parabola.nu>
Date: Tue, 15 Dec 2015 14:52:16 -0300
Subject: Linux-libre 4.3.2-gnu

---
 drivers/hv/channel.c      |   4 +-
 drivers/hv/channel_mgmt.c |  51 ++++++-
 drivers/hv/hv.c           | 152 +++++++++++++++-----
 drivers/hv/hv_balloon.c   |  26 +++-
 drivers/hv/hv_fcopy.c     |  21 +--
 drivers/hv/hv_kvp.c       |   3 +
 drivers/hv/hyperv_vmbus.h |  16 ++-
 drivers/hv/ring_buffer.c  |  14 +-
 drivers/hv/vmbus_drv.c    | 353 +++++++++++++++++++++++++++++++++++++---------
 9 files changed, 504 insertions(+), 136 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 603ce97e9..c4dcab048 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -601,6 +601,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
 	u64 aligned_data = 0;
 	int ret;
 	bool signal = false;
+	int num_vecs = ((bufferlen != 0) ? 3 : 1);
 
 
 	/* Setup the descriptor */
@@ -618,7 +619,8 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
 	bufferlist[2].iov_base = &aligned_data;
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
-	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
+	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
+				  &signal);
 
 	/*
 	 * Signalling the host is conditional on many factors:
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 4506a6623..652afd11a 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -204,6 +204,8 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 		spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
 		list_del(&channel->listentry);
 		spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
+
+		primary_channel = channel;
 	} else {
 		primary_channel = channel->primary_channel;
 		spin_lock_irqsave(&primary_channel->lock, flags);
@@ -211,6 +213,14 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 		primary_channel->num_sc--;
 		spin_unlock_irqrestore(&primary_channel->lock, flags);
 	}
+
+	/*
+	 * We need to free the bit for init_vp_index() to work in the case
+	 * of sub-channel, when we reload drivers like hv_netvsc.
+	 */
+	cpumask_clear_cpu(channel->target_cpu,
+			  &primary_channel->alloced_cpus_in_node);
+
 	free_channel(channel);
 }
 
@@ -347,6 +357,7 @@ enum {
 	IDE = 0,
 	SCSI,
 	NIC,
+	ND_NIC,
 	MAX_PERF_CHN,
 };
 
@@ -391,6 +402,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui
 	struct vmbus_channel *primary = channel->primary_channel;
 	int next_node;
 	struct cpumask available_mask;
+	struct cpumask *alloced_mask;
 
 	for (i = IDE; i < MAX_PERF_CHN; i++) {
 		if (!memcmp(type_guid->b, hp_devs[i].guid,
@@ -408,7 +420,6 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui
 		 * channel, bind it to cpu 0.
 		 */
 		channel->numa_node = 0;
-		cpumask_set_cpu(0, &channel->alloced_cpus_in_node);
 		channel->target_cpu = 0;
 		channel->target_vp = hv_context.vp_index[0];
 		return;
@@ -433,21 +444,45 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui
 		channel->numa_node = next_node;
 		primary = channel;
 	}
+	alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
 
-	if (cpumask_weight(&primary->alloced_cpus_in_node) ==
+	if (cpumask_weight(alloced_mask) ==
 	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
 		/*
 		 * We have cycled through all the CPUs in the node;
 		 * reset the alloced map.
 		 */
-		cpumask_clear(&primary->alloced_cpus_in_node);
+		cpumask_clear(alloced_mask);
 	}
 
-	cpumask_xor(&available_mask, &primary->alloced_cpus_in_node,
+	cpumask_xor(&available_mask, alloced_mask,
 		    cpumask_of_node(primary->numa_node));
 
-	cur_cpu = cpumask_next(-1, &available_mask);
-	cpumask_set_cpu(cur_cpu, &primary->alloced_cpus_in_node);
+	cur_cpu = -1;
+	while (true) {
+		cur_cpu = cpumask_next(cur_cpu, &available_mask);
+		if (cur_cpu >= nr_cpu_ids) {
+			cur_cpu = -1;
+			cpumask_copy(&available_mask,
+				     cpumask_of_node(primary->numa_node));
+			continue;
+		}
+
+		/*
+		 * NOTE: in the case of sub-channel, we clear the sub-channel
+		 * related bit(s) in primary->alloced_cpus_in_node in
+		 * hv_process_channel_removal(), so when we reload drivers
+		 * like hv_netvsc in SMP guest, here we're able to re-allocate
+		 * bit from primary->alloced_cpus_in_node.
+		 */
+		if (!cpumask_test_cpu(cur_cpu,
+				&primary->alloced_cpus_in_node)) {
+			cpumask_set_cpu(cur_cpu,
+					&primary->alloced_cpus_in_node);
+			cpumask_set_cpu(cur_cpu, alloced_mask);
+			break;
+		}
+	}
 
 	channel->target_cpu = cur_cpu;
 	channel->target_vp = hv_context.vp_index[cur_cpu];
@@ -469,6 +504,10 @@ void vmbus_initiate_unload(void)
 {
 	struct vmbus_channel_message_header hdr;
 
+	/* Pre-Win2012R2 hosts don't support reconnect */
+	if (vmbus_proto_version < VERSION_WIN8_1)
+		return;
+
 	init_completion(&vmbus_connection.unload_event);
 	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
 	hdr.msgtype = CHANNELMSG_UNLOAD;
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index d3943bcee..6341be873 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -93,11 +93,14 @@ static int query_hypervisor_info(void)
  */
 static u64 do_hypercall(u64 control, void *input, void *output)
 {
-#ifdef CONFIG_X86_64
-	u64 hv_status = 0;
 	u64 input_address = (input) ? virt_to_phys(input) : 0;
 	u64 output_address = (output) ? virt_to_phys(output) : 0;
 	void *hypercall_page = hv_context.hypercall_page;
+#ifdef CONFIG_X86_64
+	u64 hv_status = 0;
+
+	if (!hypercall_page)
+		return (u64)ULLONG_MAX;
 
 	__asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
 	__asm__ __volatile__("call *%3" : "=a" (hv_status) :
@@ -112,13 +115,13 @@ static u64 do_hypercall(u64 control, void *input, void *output)
 	u32 control_lo = control & 0xFFFFFFFF;
 	u32 hv_status_hi = 1;
 	u32 hv_status_lo = 1;
-	u64 input_address = (input) ? virt_to_phys(input) : 0;
 	u32 input_address_hi = input_address >> 32;
 	u32 input_address_lo = input_address & 0xFFFFFFFF;
-	u64 output_address = (output) ? virt_to_phys(output) : 0;
 	u32 output_address_hi = output_address >> 32;
 	u32 output_address_lo = output_address & 0xFFFFFFFF;
-	void *hypercall_page = hv_context.hypercall_page;
+
+	if (!hypercall_page)
+		return (u64)ULLONG_MAX;
 
 	__asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
 			      "=a"(hv_status_lo) : "d" (control_hi),
@@ -130,6 +133,56 @@ static u64 do_hypercall(u64 control, void *input, void *output)
 #endif /* !x86_64 */
 }
 
+#ifdef CONFIG_X86_64
+static cycle_t read_hv_clock_tsc(struct clocksource *arg)
+{
+	cycle_t current_tick;
+	struct ms_hyperv_tsc_page *tsc_pg = hv_context.tsc_page;
+
+	if (tsc_pg->tsc_sequence != -1) {
+		/*
+		 * Use the tsc page to compute the value.
+		 */
+
+		while (1) {
+			cycle_t tmp;
+			u32 sequence = tsc_pg->tsc_sequence;
+			u64 cur_tsc;
+			u64 scale = tsc_pg->tsc_scale;
+			s64 offset = tsc_pg->tsc_offset;
+
+			rdtscll(cur_tsc);
+			/* current_tick = ((cur_tsc *scale) >> 64) + offset */
+			asm("mulq %3"
+				: "=d" (current_tick), "=a" (tmp)
+				: "a" (cur_tsc), "r" (scale));
+
+			current_tick += offset;
+			if (tsc_pg->tsc_sequence == sequence)
+				return current_tick;
+
+			if (tsc_pg->tsc_sequence != -1)
+				continue;
+			/*
+			 * Fallback using MSR method.
+			 */
+			break;
+		}
+	}
+	rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
+	return current_tick;
+}
+
+static struct clocksource hyperv_cs_tsc = {
+		.name           = "hyperv_clocksource_tsc_page",
+		.rating         = 425,
+		.read           = read_hv_clock_tsc,
+		.mask           = CLOCKSOURCE_MASK(64),
+		.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+#endif
+
+
 /*
  * hv_init - Main initialization routine.
  *
@@ -139,7 +192,9 @@ int hv_init(void)
 {
 	int max_leaf;
 	union hv_x64_msr_hypercall_contents hypercall_msr;
+	union hv_x64_msr_hypercall_contents tsc_msr;
 	void *virtaddr = NULL;
+	void *va_tsc = NULL;
 
 	memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS);
 	memset(hv_context.synic_message_page, 0,
@@ -183,6 +238,22 @@ int hv_init(void)
 
 	hv_context.hypercall_page = virtaddr;
 
+#ifdef CONFIG_X86_64
+	if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
+		va_tsc = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+		if (!va_tsc)
+			goto cleanup;
+		hv_context.tsc_page = va_tsc;
+
+		rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
+
+		tsc_msr.enable = 1;
+		tsc_msr.guest_physical_address = vmalloc_to_pfn(va_tsc);
+
+		wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
+		clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
+	}
+#endif
 	return 0;
 
 cleanup:
@@ -216,6 +287,21 @@ void hv_cleanup(void)
 		vfree(hv_context.hypercall_page);
 		hv_context.hypercall_page = NULL;
 	}
+
+#ifdef CONFIG_X86_64
+	/*
+	 * Cleanup the TSC page based CS.
+	 */
+	if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
+		clocksource_change_rating(&hyperv_cs_tsc, 10);
+		clocksource_unregister(&hyperv_cs_tsc);
+
+		hypercall_msr.as_uint64 = 0;
+		wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
+		vfree(hv_context.tsc_page);
+		hv_context.tsc_page = NULL;
+	}
+#endif
 }
 
 /*
@@ -271,7 +357,7 @@ static int hv_ce_set_next_event(unsigned long delta,
 {
 	cycle_t current_tick;
 
-	WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
+	WARN_ON(!clockevent_state_oneshot(evt));
 
 	rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
 	current_tick += delta;
@@ -279,31 +365,24 @@ static int hv_ce_set_next_event(unsigned long delta,
 	return 0;
 }
 
-static void hv_ce_setmode(enum clock_event_mode mode,
-			  struct clock_event_device *evt)
+static int hv_ce_shutdown(struct clock_event_device *evt)
+{
+	wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0);
+	wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0);
+
+	return 0;
+}
+
+static int hv_ce_set_oneshot(struct clock_event_device *evt)
 {
 	union hv_timer_config timer_cfg;
 
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-		/* unsupported */
-		break;
-
-	case CLOCK_EVT_MODE_ONESHOT:
-		timer_cfg.enable = 1;
-		timer_cfg.auto_enable = 1;
-		timer_cfg.sintx = VMBUS_MESSAGE_SINT;
-		wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
-		break;
-
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0);
-		wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0);
-		break;
-	case CLOCK_EVT_MODE_RESUME:
-		break;
-	}
+	timer_cfg.enable = 1;
+	timer_cfg.auto_enable = 1;
+	timer_cfg.sintx = VMBUS_MESSAGE_SINT;
+	wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
+
+	return 0;
 }
 
 static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
@@ -318,7 +397,8 @@ static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
 	 * references to the hv_vmbus module making it impossible to unload.
 	 */
 
-	dev->set_mode = hv_ce_setmode;
+	dev->set_state_shutdown = hv_ce_shutdown;
+	dev->set_state_oneshot = hv_ce_set_oneshot;
 	dev->set_next_event = hv_ce_set_next_event;
 }
 
@@ -329,6 +409,13 @@ int hv_synic_alloc(void)
 	size_t ced_size = sizeof(struct clock_event_device);
 	int cpu;
 
+	hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids,
+					 GFP_ATOMIC);
+	if (hv_context.hv_numa_map == NULL) {
+		pr_err("Unable to allocate NUMA map\n");
+		goto err;
+	}
+
 	for_each_online_cpu(cpu) {
 		hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
 		if (hv_context.event_dpc[cpu] == NULL) {
@@ -342,6 +429,7 @@ int hv_synic_alloc(void)
 			pr_err("Unable to allocate clock event device\n");
 			goto err;
 		}
+
 		hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu);
 
 		hv_context.synic_message_page[cpu] =
@@ -390,6 +478,7 @@ void hv_synic_free(void)
 {
 	int cpu;
 
+	kfree(hv_context.hv_numa_map);
 	for_each_online_cpu(cpu)
 		hv_synic_free_cpu(cpu);
 }
@@ -503,8 +592,7 @@ void hv_synic_cleanup(void *arg)
 
 	/* Turn off clockevent device */
 	if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
-		hv_ce_setmode(CLOCK_EVT_MODE_SHUTDOWN,
-			      hv_context.clk_evt[cpu]);
+		hv_ce_shutdown(hv_context.clk_evt[cpu]);
 
 	rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 
@@ -530,6 +618,4 @@ void hv_synic_cleanup(void *arg)
 	rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
 	sctrl.enable = 0;
 	wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
-
-	hv_synic_free_cpu(cpu);
 }
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 8a725cd69..b853b4b08 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -62,11 +62,13 @@
 enum {
 	DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
 	DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
+	DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0),
 
 	DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1,
 	DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2,
+	DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3,
 
-	DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN8
+	DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10
 };
 
 
@@ -1296,13 +1298,25 @@ static void version_resp(struct hv_dynmem_device *dm,
 	if (dm->next_version == 0)
 		goto version_error;
 
-	dm->next_version = 0;
 	memset(&version_req, 0, sizeof(struct dm_version_request));
 	version_req.hdr.type = DM_VERSION_REQUEST;
 	version_req.hdr.size = sizeof(struct dm_version_request);
 	version_req.hdr.trans_id = atomic_inc_return(&trans_id);
-	version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN7;
-	version_req.is_last_attempt = 1;
+	version_req.version.version = dm->next_version;
+
+	/*
+	 * Set the next version to try in case current version fails.
+	 * Win7 protocol ought to be the last one to try.
+	 */
+	switch (version_req.version.version) {
+	case DYNMEM_PROTOCOL_VERSION_WIN8:
+		dm->next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
+		version_req.is_last_attempt = 0;
+		break;
+	default:
+		dm->next_version = 0;
+		version_req.is_last_attempt = 1;
+	}
 
 	ret = vmbus_sendpacket(dm->dev->channel, &version_req,
 				sizeof(struct dm_version_request),
@@ -1442,7 +1456,7 @@ static int balloon_probe(struct hv_device *dev,
 
 	dm_device.dev = dev;
 	dm_device.state = DM_INITIALIZING;
-	dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
+	dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
 	init_completion(&dm_device.host_event);
 	init_completion(&dm_device.config_event);
 	INIT_LIST_HEAD(&dm_device.ha_region_list);
@@ -1474,7 +1488,7 @@ static int balloon_probe(struct hv_device *dev,
 	version_req.hdr.type = DM_VERSION_REQUEST;
 	version_req.hdr.size = sizeof(struct dm_version_request);
 	version_req.hdr.trans_id = atomic_inc_return(&trans_id);
-	version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN8;
+	version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN10;
 	version_req.is_last_attempt = 0;
 
 	ret = vmbus_sendpacket(dev->channel, &version_req,
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index b50dd330c..db4b887b8 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -116,7 +116,7 @@ static int fcopy_handle_handshake(u32 version)
 
 static void fcopy_send_data(struct work_struct *dummy)
 {
-	struct hv_start_fcopy smsg_out;
+	struct hv_start_fcopy *smsg_out = NULL;
 	int operation = fcopy_transaction.fcopy_msg->operation;
 	struct hv_start_fcopy *smsg_in;
 	void *out_src;
@@ -136,21 +136,24 @@ static void fcopy_send_data(struct work_struct *dummy)
 	switch (operation) {
 	case START_FILE_COPY:
 		out_len = sizeof(struct hv_start_fcopy);
-		memset(&smsg_out, 0, out_len);
-		smsg_out.hdr.operation = operation;
+		smsg_out = kzalloc(sizeof(*smsg_out), GFP_KERNEL);
+		if (!smsg_out)
+			return;
+
+		smsg_out->hdr.operation = operation;
 		smsg_in = (struct hv_start_fcopy *)fcopy_transaction.fcopy_msg;
 
 		utf16s_to_utf8s((wchar_t *)smsg_in->file_name, W_MAX_PATH,
 				UTF16_LITTLE_ENDIAN,
-				(__u8 *)&smsg_out.file_name, W_MAX_PATH - 1);
+				(__u8 *)&smsg_out->file_name, W_MAX_PATH - 1);
 
 		utf16s_to_utf8s((wchar_t *)smsg_in->path_name, W_MAX_PATH,
 				UTF16_LITTLE_ENDIAN,
-				(__u8 *)&smsg_out.path_name, W_MAX_PATH - 1);
+				(__u8 *)&smsg_out->path_name, W_MAX_PATH - 1);
 
-		smsg_out.copy_flags = smsg_in->copy_flags;
-		smsg_out.file_size = smsg_in->file_size;
-		out_src = &smsg_out;
+		smsg_out->copy_flags = smsg_in->copy_flags;
+		smsg_out->file_size = smsg_in->file_size;
+		out_src = smsg_out;
 		break;
 
 	default:
@@ -168,6 +171,8 @@ static void fcopy_send_data(struct work_struct *dummy)
 			fcopy_transaction.state = HVUTIL_READY;
 		}
 	}
+	kfree(smsg_out);
+
 	return;
 }
 
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index d85798d59..74c38a9f3 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -353,6 +353,9 @@ kvp_send_key(struct work_struct *dummy)
 		return;
 
 	message = kzalloc(sizeof(*message), GFP_KERNEL);
+	if (!message)
+		return;
+
 	message->kvp_hdr.operation = operation;
 	message->kvp_hdr.pool = pool;
 	in_msg = kvp_transaction.kvp_msg;
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index cddc0c9f6..3d70e36c9 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -141,7 +141,7 @@ struct hv_port_info {
 		struct {
 			u32 target_sint;
 			u32 target_vp;
-			u16 base_flag_bumber;
+			u16 base_flag_number;
 			u16 flag_count;
 			u32 rsvdz;
 		} event_port_info;
@@ -517,6 +517,7 @@ struct hv_context {
 	u64 guestid;
 
 	void *hypercall_page;
+	void *tsc_page;
 
 	bool synic_initialized;
 
@@ -551,10 +552,23 @@ struct hv_context {
 	 * Support PV clockevent device.
 	 */
 	struct clock_event_device *clk_evt[NR_CPUS];
+	/*
+	 * To manage allocations in a NUMA node.
+	 * Array indexed by numa node ID.
+	 */
+	struct cpumask *hv_numa_map;
 };
 
 extern struct hv_context hv_context;
 
+struct ms_hyperv_tsc_page {
+	volatile u32 tsc_sequence;
+	u32 reserved1;
+	volatile u64 tsc_scale;
+	volatile s64 tsc_offset;
+	u64 reserved2[509];
+};
+
 struct hv_ring_buffer_debug_info {
 	u32 current_interrupt_mask;
 	u32 current_read_index;
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 6361d124f..70a1a9a22 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -103,10 +103,9 @@ static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi)
  *    there is room for the producer to send the pending packet.
  */
 
-static bool hv_need_to_signal_on_read(u32 old_rd,
-					 struct hv_ring_buffer_info *rbi)
+static bool hv_need_to_signal_on_read(u32 prev_write_sz,
+				      struct hv_ring_buffer_info *rbi)
 {
-	u32 prev_write_sz;
 	u32 cur_write_sz;
 	u32 r_size;
 	u32 write_loc = rbi->ring_buffer->write_index;
@@ -123,10 +122,6 @@ static bool hv_need_to_signal_on_read(u32 old_rd,
 	cur_write_sz = write_loc >= read_loc ? r_size - (write_loc - read_loc) :
 			read_loc - write_loc;
 
-	prev_write_sz = write_loc >= old_rd ? r_size - (write_loc - old_rd) :
-			old_rd - write_loc;
-
-
 	if ((prev_write_sz < pending_sz) && (cur_write_sz >= pending_sz))
 		return true;
 
@@ -517,7 +512,6 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer,
 	u32 next_read_location = 0;
 	u64 prev_indices = 0;
 	unsigned long flags;
-	u32 old_read;
 
 	if (buflen <= 0)
 		return -EINVAL;
@@ -528,8 +522,6 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer,
 				&bytes_avail_toread,
 				&bytes_avail_towrite);
 
-	old_read = bytes_avail_toread;
-
 	/* Make sure there is something to read */
 	if (bytes_avail_toread < buflen) {
 		spin_unlock_irqrestore(&inring_info->ring_lock, flags);
@@ -560,7 +552,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer,
 
 	spin_unlock_irqrestore(&inring_info->ring_lock, flags);
 
-	*signal = hv_need_to_signal_on_read(old_read, inring_info);
+	*signal = hv_need_to_signal_on_read(bytes_avail_towrite, inring_info);
 
 	return 0;
 }
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index cf204005e..f19b6f7a4 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -39,6 +39,8 @@
 #include <asm/mshyperv.h>
 #include <linux/notifier.h>
 #include <linux/ptrace.h>
+#include <linux/screen_info.h>
+#include <linux/kdebug.h>
 #include "hyperv_vmbus.h"
 
 static struct acpi_device  *hv_acpi_dev;
@@ -48,12 +50,18 @@ static struct completion probe_event;
 static int irq;
 
 
-static int hyperv_panic_event(struct notifier_block *nb,
-			unsigned long event, void *ptr)
+static void hyperv_report_panic(struct pt_regs *regs)
 {
-	struct pt_regs *regs;
+	static bool panic_reported;
 
-	regs = current_pt_regs();
+	/*
+	 * We prefer to report panic on 'die' chain as we have proper
+	 * registers to report, but if we miss it (e.g. on BUG()) we need
+	 * to report it on 'panic'.
+	 */
+	if (panic_reported)
+		return;
+	panic_reported = true;
 
 	wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip);
 	wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax);
@@ -65,18 +73,37 @@ static int hyperv_panic_event(struct notifier_block *nb,
 	 * Let Hyper-V know there is crash data available
 	 */
 	wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY);
+}
+
+static int hyperv_panic_event(struct notifier_block *nb, unsigned long val,
+			      void *args)
+{
+	struct pt_regs *regs;
+
+	regs = current_pt_regs();
+
+	hyperv_report_panic(regs);
 	return NOTIFY_DONE;
 }
 
+static int hyperv_die_event(struct notifier_block *nb, unsigned long val,
+			    void *args)
+{
+	struct die_args *die = (struct die_args *)args;
+	struct pt_regs *regs = die->regs;
+
+	hyperv_report_panic(regs);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block hyperv_die_block = {
+	.notifier_call = hyperv_die_event,
+};
 static struct notifier_block hyperv_panic_block = {
 	.notifier_call = hyperv_panic_event,
 };
 
-struct resource hyperv_mmio = {
-	.name  = "hyperv mmio",
-	.flags = IORESOURCE_MEM,
-};
-EXPORT_SYMBOL_GPL(hyperv_mmio);
+struct resource *hyperv_mmio;
 
 static int vmbus_exists(void)
 {
@@ -414,6 +441,43 @@ static ssize_t in_write_bytes_avail_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(in_write_bytes_avail);
 
+static ssize_t channel_vp_mapping_show(struct device *dev,
+				       struct device_attribute *dev_attr,
+				       char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+	struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
+	unsigned long flags;
+	int buf_size = PAGE_SIZE, n_written, tot_written;
+	struct list_head *cur;
+
+	if (!channel)
+		return -ENODEV;
+
+	tot_written = snprintf(buf, buf_size, "%u:%u\n",
+		channel->offermsg.child_relid, channel->target_cpu);
+
+	spin_lock_irqsave(&channel->lock, flags);
+
+	list_for_each(cur, &channel->sc_list) {
+		if (tot_written >= buf_size - 1)
+			break;
+
+		cur_sc = list_entry(cur, struct vmbus_channel, sc_list);
+		n_written = scnprintf(buf + tot_written,
+				     buf_size - tot_written,
+				     "%u:%u\n",
+				     cur_sc->offermsg.child_relid,
+				     cur_sc->target_cpu);
+		tot_written += n_written;
+	}
+
+	spin_unlock_irqrestore(&channel->lock, flags);
+
+	return tot_written;
+}
+static DEVICE_ATTR_RO(channel_vp_mapping);
+
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 static struct attribute *vmbus_attrs[] = {
 	&dev_attr_id.attr,
@@ -438,6 +502,7 @@ static struct attribute *vmbus_attrs[] = {
 	&dev_attr_in_write_index.attr,
 	&dev_attr_in_read_bytes_avail.attr,
 	&dev_attr_in_write_bytes_avail.attr,
+	&dev_attr_channel_vp_mapping.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(vmbus);
@@ -763,38 +828,6 @@ static void vmbus_isr(void)
 	}
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-static int hyperv_cpu_disable(void)
-{
-	return -ENOSYS;
-}
-
-static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
-{
-	static void *previous_cpu_disable;
-
-	/*
-	 * Offlining a CPU when running on newer hypervisors (WS2012R2, Win8,
-	 * ...) is not supported at this moment as channel interrupts are
-	 * distributed across all of them.
-	 */
-
-	if ((vmbus_proto_version == VERSION_WS2008) ||
-	    (vmbus_proto_version == VERSION_WIN7))
-		return;
-
-	if (vmbus_loaded) {
-		previous_cpu_disable = smp_ops.cpu_disable;
-		smp_ops.cpu_disable = hyperv_cpu_disable;
-		pr_notice("CPU offlining is not supported by hypervisor\n");
-	} else if (previous_cpu_disable)
-		smp_ops.cpu_disable = previous_cpu_disable;
-}
-#else
-static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
-{
-}
-#endif
 
 /*
  * vmbus_bus_init -Main vmbus driver initialization routine.
@@ -836,12 +869,14 @@ static int vmbus_bus_init(int irq)
 	if (ret)
 		goto err_alloc;
 
-	hv_cpu_hotplug_quirk(true);
+	if (vmbus_proto_version > VERSION_WIN7)
+		cpu_hotplug_disable();
 
 	/*
 	 * Only register if the crash MSRs are available
 	 */
-	if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
+	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
+		register_die_notifier(&hyperv_die_block);
 		atomic_notifier_chain_register(&panic_notifier_list,
 					       &hyperv_panic_block);
 	}
@@ -863,8 +898,8 @@ err_cleanup:
 }
 
 /**
- * __vmbus_child_driver_register - Register a vmbus's driver
- * @drv: Pointer to driver structure you want to register
+ * __vmbus_child_driver_register() - Register a vmbus's driver
+ * @hv_driver: Pointer to driver structure you want to register
  * @owner: owner module of the drv
  * @mod_name: module name string
  *
@@ -896,7 +931,8 @@ EXPORT_SYMBOL_GPL(__vmbus_driver_register);
 
 /**
  * vmbus_driver_unregister() - Unregister a vmbus's driver
- * @drv: Pointer to driver structure you want to un-register
+ * @hv_driver: Pointer to driver structure you want to
+ *             un-register
  *
  * Un-register the given driver that was previous registered with a call to
  * vmbus_driver_register()
@@ -982,30 +1018,184 @@ void vmbus_device_unregister(struct hv_device *device_obj)
 
 
 /*
- * VMBUS is an acpi enumerated device. Get the the information we
+ * VMBUS is an acpi enumerated device. Get the information we
  * need from DSDT.
  */
-
+#define VTPM_BASE_ADDRESS 0xfed40000
 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx)
 {
+	resource_size_t start = 0;
+	resource_size_t end = 0;
+	struct resource *new_res;
+	struct resource **old_res = &hyperv_mmio;
+	struct resource **prev_res = NULL;
+
 	switch (res->type) {
 	case ACPI_RESOURCE_TYPE_IRQ:
 		irq = res->data.irq.interrupts[0];
+		return AE_OK;
+
+	/*
+	 * "Address" descriptors are for bus windows. Ignore
+	 * "memory" descriptors, which are for registers on
+	 * devices.
+	 */
+	case ACPI_RESOURCE_TYPE_ADDRESS32:
+		start = res->data.address32.address.minimum;
+		end = res->data.address32.address.maximum;
 		break;
 
 	case ACPI_RESOURCE_TYPE_ADDRESS64:
-		hyperv_mmio.start = res->data.address64.address.minimum;
-		hyperv_mmio.end = res->data.address64.address.maximum;
+		start = res->data.address64.address.minimum;
+		end = res->data.address64.address.maximum;
 		break;
+
+	default:
+		/* Unused resource type */
+		return AE_OK;
+
 	}
+	/*
+	 * Ignore ranges that are below 1MB, as they're not
+	 * necessary or useful here.
+	 */
+	if (end < 0x100000)
+		return AE_OK;
+
+	new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC);
+	if (!new_res)
+		return AE_NO_MEMORY;
+
+	/* If this range overlaps the virtual TPM, truncate it. */
+	if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS)
+		end = VTPM_BASE_ADDRESS;
+
+	new_res->name = "hyperv mmio";
+	new_res->flags = IORESOURCE_MEM;
+	new_res->start = start;
+	new_res->end = end;
+
+	do {
+		if (!*old_res) {
+			*old_res = new_res;
+			break;
+		}
+
+		if ((*old_res)->end < new_res->start) {
+			new_res->sibling = *old_res;
+			if (prev_res)
+				(*prev_res)->sibling = new_res;
+			*old_res = new_res;
+			break;
+		}
+
+		prev_res = old_res;
+		old_res = &(*old_res)->sibling;
+
+	} while (1);
 
 	return AE_OK;
 }
 
+static int vmbus_acpi_remove(struct acpi_device *device)
+{
+	struct resource *cur_res;
+	struct resource *next_res;
+
+	if (hyperv_mmio) {
+		for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) {
+			next_res = cur_res->sibling;
+			kfree(cur_res);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * vmbus_allocate_mmio() - Pick a memory-mapped I/O range.
+ * @new:		If successful, supplied a pointer to the
+ *			allocated MMIO space.
+ * @device_obj:		Identifies the caller
+ * @min:		Minimum guest physical address of the
+ *			allocation
+ * @max:		Maximum guest physical address
+ * @size:		Size of the range to be allocated
+ * @align:		Alignment of the range to be allocated
+ * @fb_overlap_ok:	Whether this allocation can be allowed
+ *			to overlap the video frame buffer.
+ *
+ * This function walks the resources granted to VMBus by the
+ * _CRS object in the ACPI namespace underneath the parent
+ * "bridge" whether that's a root PCI bus in the Generation 1
+ * case or a Module Device in the Generation 2 case.  It then
+ * attempts to allocate from the global MMIO pool in a way that
+ * matches the constraints supplied in these parameters and by
+ * that _CRS.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
+			resource_size_t min, resource_size_t max,
+			resource_size_t size, resource_size_t align,
+			bool fb_overlap_ok)
+{
+	struct resource *iter;
+	resource_size_t range_min, range_max, start, local_min, local_max;
+	const char *dev_n = dev_name(&device_obj->device);
+	u32 fb_end = screen_info.lfb_base + (screen_info.lfb_size << 1);
+	int i;
+
+	for (iter = hyperv_mmio; iter; iter = iter->sibling) {
+		if ((iter->start >= max) || (iter->end <= min))
+			continue;
+
+		range_min = iter->start;
+		range_max = iter->end;
+
+		/* If this range overlaps the frame buffer, split it into
+		   two tries. */
+		for (i = 0; i < 2; i++) {
+			local_min = range_min;
+			local_max = range_max;
+			if (fb_overlap_ok || (range_min >= fb_end) ||
+			    (range_max <= screen_info.lfb_base)) {
+				i++;
+			} else {
+				if ((range_min <= screen_info.lfb_base) &&
+				    (range_max >= screen_info.lfb_base)) {
+					/*
+					 * The frame buffer is in this window,
+					 * so trim this into the part that
+					 * preceeds the frame buffer.
+					 */
+					local_max = screen_info.lfb_base - 1;
+					range_min = fb_end;
+				} else {
+					range_min = fb_end;
+					continue;
+				}
+			}
+
+			start = (local_min + align - 1) & ~(align - 1);
+			for (; start + size - 1 <= local_max; start += align) {
+				*new = request_mem_region_exclusive(start, size,
+								    dev_n);
+				if (*new)
+					return 0;
+			}
+		}
+	}
+
+	return -ENXIO;
+}
+EXPORT_SYMBOL_GPL(vmbus_allocate_mmio);
+
 static int vmbus_acpi_add(struct acpi_device *device)
 {
 	acpi_status result;
 	int ret_val = -ENODEV;
+	struct acpi_device *ancestor;
 
 	hv_acpi_dev = device;
 
@@ -1015,35 +1205,27 @@ static int vmbus_acpi_add(struct acpi_device *device)
 	if (ACPI_FAILURE(result))
 		goto acpi_walk_err;
 	/*
-	 * The parent of the vmbus acpi device (Gen2 firmware) is the VMOD that
-	 * has the mmio ranges. Get that.
+	 * Some ancestor of the vmbus acpi device (Gen1 or Gen2
+	 * firmware) is the VMOD that has the mmio ranges. Get that.
 	 */
-	if (device->parent) {
-		result = acpi_walk_resources(device->parent->handle,
-					METHOD_NAME__CRS,
-					vmbus_walk_resources, NULL);
+	for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) {
+		result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS,
+					     vmbus_walk_resources, NULL);
 
 		if (ACPI_FAILURE(result))
-			goto acpi_walk_err;
-		if (hyperv_mmio.start && hyperv_mmio.end)
-			request_resource(&iomem_resource, &hyperv_mmio);
+			continue;
+		if (hyperv_mmio)
+			break;
 	}
 	ret_val = 0;
 
 acpi_walk_err:
 	complete(&probe_event);
+	if (ret_val)
+		vmbus_acpi_remove(device);
 	return ret_val;
 }
 
-static int vmbus_acpi_remove(struct acpi_device *device)
-{
-	int ret = 0;
-
-	if (hyperv_mmio.start && hyperv_mmio.end)
-		ret = release_resource(&hyperv_mmio);
-	return ret;
-}
-
 static const struct acpi_device_id vmbus_acpi_device_ids[] = {
 	{"VMBUS", 0},
 	{"VMBus", 0},
@@ -1060,6 +1242,29 @@ static struct acpi_driver vmbus_acpi_driver = {
 	},
 };
 
+static void hv_kexec_handler(void)
+{
+	int cpu;
+
+	hv_synic_clockevents_cleanup();
+	vmbus_initiate_unload();
+	for_each_online_cpu(cpu)
+		smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
+	hv_cleanup();
+};
+
+static void hv_crash_handler(struct pt_regs *regs)
+{
+	vmbus_initiate_unload();
+	/*
+	 * In crash handler we can't schedule synic cleanup for all CPUs,
+	 * doing the cleanup for current CPU only. This should be sufficient
+	 * for kdump.
+	 */
+	hv_synic_cleanup(NULL);
+	hv_cleanup();
+};
+
 static int __init hv_acpi_init(void)
 {
 	int ret, t;
@@ -1092,6 +1297,9 @@ static int __init hv_acpi_init(void)
 	if (ret)
 		goto cleanup;
 
+	hv_setup_kexec_handler(hv_kexec_handler);
+	hv_setup_crash_handler(hv_crash_handler);
+
 	return 0;
 
 cleanup:
@@ -1104,13 +1312,16 @@ static void __exit vmbus_exit(void)
 {
 	int cpu;
 
+	hv_remove_kexec_handler();
+	hv_remove_crash_handler();
 	vmbus_connection.conn_state = DISCONNECTED;
 	hv_synic_clockevents_cleanup();
 	vmbus_disconnect();
 	hv_remove_vmbus_irq();
 	tasklet_kill(&msg_dpc);
 	vmbus_free_channels();
-	if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
+	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
+		unregister_die_notifier(&hyperv_die_block);
 		atomic_notifier_chain_unregister(&panic_notifier_list,
 						 &hyperv_panic_block);
 	}
@@ -1120,8 +1331,10 @@ static void __exit vmbus_exit(void)
 		tasklet_kill(hv_context.event_dpc[cpu]);
 		smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
 	}
+	hv_synic_free();
 	acpi_bus_unregister_driver(&vmbus_acpi_driver);
-	hv_cpu_hotplug_quirk(false);
+	if (vmbus_proto_version > VERSION_WIN7)
+		cpu_hotplug_enable();
 }
 
 
-- 
cgit v1.2.3