diff options
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r-- | drivers/net/hyperv/hyperv_net.h | 33 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc.c | 43 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc_drv.c | 166 | ||||
-rw-r--r-- | drivers/net/hyperv/rndis_filter.c | 37 |
4 files changed, 258 insertions, 21 deletions
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index dd4544085..5fa98f599 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -162,6 +162,7 @@ struct netvsc_device_info { bool link_state; /* 0 - link up, 1 - link down */ int ring_size; u32 max_num_vrss_chns; + u32 num_chn; }; enum rndis_device_state { @@ -541,6 +542,29 @@ union nvsp_2_message_uber { struct nvsp_2_free_rxbuf free_rxbuf; } __packed; +struct nvsp_4_send_vf_association { + /* 1: allocated, serial number is valid. 0: not allocated */ + u32 allocated; + + /* Serial number of the VF to team with */ + u32 serial; +} __packed; + +enum nvsp_vm_datapath { + NVSP_DATAPATH_SYNTHETIC = 0, + NVSP_DATAPATH_VF, + NVSP_DATAPATH_MAX +}; + +struct nvsp_4_sw_datapath { + u32 active_datapath; /* active data path in VM */ +} __packed; + +union nvsp_4_message_uber { + struct nvsp_4_send_vf_association vf_assoc; + struct nvsp_4_sw_datapath active_dp; +} __packed; + enum nvsp_subchannel_operation { NVSP_SUBCHANNEL_NONE = 0, NVSP_SUBCHANNEL_ALLOCATE, @@ -578,6 +602,7 @@ union nvsp_all_messages { union nvsp_message_init_uber init_msg; union nvsp_1_message_uber v1_msg; union nvsp_2_message_uber v2_msg; + union nvsp_4_message_uber v4_msg; union nvsp_5_message_uber v5_msg; } __packed; @@ -589,6 +614,7 @@ struct nvsp_message { #define NETVSC_MTU 65536 +#define NETVSC_MTU_MIN 68 #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ @@ -670,6 +696,8 @@ struct netvsc_device { u32 send_table[VRSS_SEND_TAB_SIZE]; u32 max_chn; u32 num_chn; + spinlock_t sc_lock; /* Protects num_sc_offered variable */ + u32 num_sc_offered; atomic_t queue_sends[NR_CPUS]; /* Holds rndis device info */ @@ -688,6 +716,11 @@ struct netvsc_device { /* The net device context */ struct net_device_context *nd_ctx; + + /* 1: allocated, serial number is valid. 0: not allocated */ + u32 vf_alloc; + /* Serial number of the VF to team with */ + u32 vf_serial; }; /* NdisInitialize message */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 23126a74f..51e4c0fd0 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -453,13 +453,16 @@ static int negotiate_nvsp_ver(struct hv_device *device, if (nvsp_ver == NVSP_PROTOCOL_VERSION_1) return 0; - /* NVSPv2 only: Send NDIS config */ + /* NVSPv2 or later: Send NDIS config */ memset(init_packet, 0, sizeof(struct nvsp_message)); init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG; init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu + ETH_HLEN; init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1; + if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) + init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1; + ret = vmbus_sendpacket(device->channel, init_packet, sizeof(struct nvsp_message), (unsigned long)init_packet, @@ -1064,11 +1067,10 @@ static void netvsc_receive(struct netvsc_device *net_device, static void netvsc_send_table(struct hv_device *hdev, - struct vmpacket_descriptor *vmpkt) + struct nvsp_message *nvmsg) { struct netvsc_device *nvscdev; struct net_device *ndev; - struct nvsp_message *nvmsg; int i; u32 count, *tab; @@ -1077,12 +1079,6 @@ static void netvsc_send_table(struct hv_device *hdev, return; ndev = nvscdev->ndev; - nvmsg = (struct nvsp_message *)((unsigned long)vmpkt + - (vmpkt->offset8 << 3)); - - if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE) - return; - count = nvmsg->msg.v5_msg.send_table.count; if (count != VRSS_SEND_TAB_SIZE) { netdev_err(ndev, "Received wrong send-table size:%u\n", count); @@ -1096,6 +1092,28 @@ static void netvsc_send_table(struct hv_device *hdev, nvscdev->send_table[i] = tab[i]; } +static void netvsc_send_vf(struct netvsc_device *nvdev, + struct nvsp_message *nvmsg) +{ + nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; + nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; +} + +static inline void netvsc_receive_inband(struct hv_device *hdev, + struct netvsc_device *nvdev, + struct nvsp_message *nvmsg) +{ + switch (nvmsg->hdr.msg_type) { + case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE: + netvsc_send_table(hdev, nvmsg); + break; + + case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: + netvsc_send_vf(nvdev, nvmsg); + break; + } +} + void netvsc_channel_cb(void *context) { int ret; @@ -1108,6 +1126,7 @@ void netvsc_channel_cb(void *context) unsigned char *buffer; int bufferlen = NETVSC_PACKET_SIZE; struct net_device *ndev; + struct nvsp_message *nvmsg; if (channel->primary_channel != NULL) device = channel->primary_channel->device_obj; @@ -1126,6 +1145,8 @@ void netvsc_channel_cb(void *context) if (ret == 0) { if (bytes_recvd > 0) { desc = (struct vmpacket_descriptor *)buffer; + nvmsg = (struct nvsp_message *)((unsigned long) + desc + (desc->offset8 << 3)); switch (desc->type) { case VM_PKT_COMP: netvsc_send_completion(net_device, @@ -1138,7 +1159,9 @@ void netvsc_channel_cb(void *context) break; case VM_PKT_DATA_INBAND: - netvsc_send_table(device, desc); + netvsc_receive_inband(device, + net_device, + nvmsg); break; default: diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 358475ed9..409b48e1e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -106,7 +106,7 @@ static int netvsc_open(struct net_device *net) return ret; } - netif_tx_start_all_queues(net); + netif_tx_wake_all_queues(net); nvdev = hv_get_drvdata(device_obj); rdev = nvdev->extension; @@ -120,15 +120,56 @@ static int netvsc_close(struct net_device *net) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_device *device_obj = net_device_ctx->device_ctx; + struct netvsc_device *nvdev = hv_get_drvdata(device_obj); int ret; + u32 aread, awrite, i, msec = 10, retry = 0, retry_max = 20; + struct vmbus_channel *chn; netif_tx_disable(net); /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */ cancel_work_sync(&net_device_ctx->work); ret = rndis_filter_close(device_obj); - if (ret != 0) + if (ret != 0) { netdev_err(net, "unable to close device (ret %d).\n", ret); + return ret; + } + + /* Ensure pending bytes in ring are read */ + while (true) { + aread = 0; + for (i = 0; i < nvdev->num_chn; i++) { + chn = nvdev->chn_table[i]; + if (!chn) + continue; + + hv_get_ringbuffer_availbytes(&chn->inbound, &aread, + &awrite); + + if (aread) + break; + + hv_get_ringbuffer_availbytes(&chn->outbound, &aread, + &awrite); + + if (aread) + break; + } + + retry++; + if (retry > retry_max || aread == 0) + break; + + msleep(msec); + + if (msec < 1000) + msec *= 2; + } + + if (aread) { + netdev_err(net, "Ring buffer not empty after closing rndis\n"); + ret = -ETIMEDOUT; + } return ret; } @@ -198,7 +239,7 @@ static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) struct flow_keys flow; int data_len; - if (!skb_flow_dissect_flow_keys(skb, &flow) || + if (!skb_flow_dissect_flow_keys(skb, &flow, 0) || !(flow.basic.n_proto == htons(ETH_P_IP) || flow.basic.n_proto == htons(ETH_P_IPV6))) return false; @@ -729,6 +770,104 @@ static void netvsc_get_channels(struct net_device *net, } } +static int netvsc_set_channels(struct net_device *net, + struct ethtool_channels *channels) +{ + struct net_device_context *net_device_ctx = netdev_priv(net); + struct hv_device *dev = net_device_ctx->device_ctx; + struct netvsc_device *nvdev = hv_get_drvdata(dev); + struct netvsc_device_info device_info; + u32 num_chn; + u32 max_chn; + int ret = 0; + bool recovering = false; + + if (!nvdev || nvdev->destroy) + return -ENODEV; + + num_chn = nvdev->num_chn; + max_chn = min_t(u32, nvdev->max_chn, num_online_cpus()); + + if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5) { + pr_info("vRSS unsupported before NVSP Version 5\n"); + return -EINVAL; + } + + /* We do not support rx, tx, or other */ + if (!channels || + channels->rx_count || + channels->tx_count || + channels->other_count || + (channels->combined_count < 1)) + return -EINVAL; + + if (channels->combined_count > max_chn) { + pr_info("combined channels too high, using %d\n", max_chn); + channels->combined_count = max_chn; + } + + ret = netvsc_close(net); + if (ret) + goto out; + + do_set: + nvdev->start_remove = true; + rndis_filter_device_remove(dev); + + nvdev->num_chn = channels->combined_count; + + net_device_ctx->device_ctx = dev; + hv_set_drvdata(dev, net); + + memset(&device_info, 0, sizeof(device_info)); + device_info.num_chn = nvdev->num_chn; /* passed to RNDIS */ + device_info.ring_size = ring_size; + device_info.max_num_vrss_chns = max_num_vrss_chns; + + ret = rndis_filter_device_add(dev, &device_info); + if (ret) { + if (recovering) { + netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); + return ret; + } + goto recover; + } + + nvdev = hv_get_drvdata(dev); + + ret = netif_set_real_num_tx_queues(net, nvdev->num_chn); + if (ret) { + if (recovering) { + netdev_err(net, "could not set tx queue count (ret %d)\n", ret); + return ret; + } + goto recover; + } + + ret = netif_set_real_num_rx_queues(net, nvdev->num_chn); + if (ret) { + if (recovering) { + netdev_err(net, "could not set rx queue count (ret %d)\n", ret); + return ret; + } + goto recover; + } + + out: + netvsc_open(net); + + return ret; + + recover: + /* If the above failed, we attempt to recover through the same + * process but with the original number of channels. + */ + netdev_err(net, "could not set channels, recovering\n"); + recovering = true; + channels->combined_count = num_chn; + goto do_set; +} + static int netvsc_change_mtu(struct net_device *ndev, int mtu) { struct net_device_context *ndevctx = netdev_priv(ndev); @@ -736,6 +875,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct netvsc_device *nvdev = hv_get_drvdata(hdev); struct netvsc_device_info device_info; int limit = ETH_DATA_LEN; + int ret = 0; if (nvdev == NULL || nvdev->destroy) return -ENODEV; @@ -743,25 +883,31 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) limit = NETVSC_MTU - ETH_HLEN; - /* Hyper-V hosts don't support MTU < ETH_DATA_LEN (1500) */ - if (mtu < ETH_DATA_LEN || mtu > limit) + if (mtu < NETVSC_MTU_MIN || mtu > limit) return -EINVAL; + ret = netvsc_close(ndev); + if (ret) + goto out; + nvdev->start_remove = true; - cancel_work_sync(&ndevctx->work); - netif_tx_disable(ndev); rndis_filter_device_remove(hdev); ndev->mtu = mtu; ndevctx->device_ctx = hdev; hv_set_drvdata(hdev, ndev); + + memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; + device_info.num_chn = nvdev->num_chn; device_info.max_num_vrss_chns = max_num_vrss_chns; rndis_filter_device_add(hdev, &device_info); - netif_tx_wake_all_queues(ndev); - return 0; +out: + netvsc_open(ndev); + + return ret; } static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, @@ -844,6 +990,7 @@ static const struct ethtool_ops ethtool_ops = { .get_drvinfo = netvsc_get_drvinfo, .get_link = ethtool_op_get_link, .get_channels = netvsc_get_channels, + .set_channels = netvsc_set_channels, }; static const struct net_device_ops device_ops = { @@ -977,6 +1124,7 @@ static int netvsc_probe(struct hv_device *dev, net->needed_headroom = max_needed_headroom; /* Notify the netvsc driver of the new device */ + memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; device_info.max_num_vrss_chns = max_num_vrss_chns; ret = rndis_filter_device_add(dev, &device_info); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 236aeb76e..5931a799a 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -984,9 +984,16 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) struct netvsc_device *nvscdev; u16 chn_index = new_sc->offermsg.offer.sub_channel_index; int ret; + unsigned long flags; nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj); + spin_lock_irqsave(&nvscdev->sc_lock, flags); + nvscdev->num_sc_offered--; + spin_unlock_irqrestore(&nvscdev->sc_lock, flags); + if (nvscdev->num_sc_offered == 0) + complete(&nvscdev->channel_init_wait); + if (chn_index >= nvscdev->num_chn) return; @@ -1015,8 +1022,10 @@ int rndis_filter_device_add(struct hv_device *dev, u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); u32 mtu, size; u32 num_rss_qs; + u32 sc_delta; const struct cpumask *node_cpu_mask; u32 num_possible_rss_qs; + unsigned long flags; rndis_device = get_rndis_device(); if (!rndis_device) @@ -1039,6 +1048,8 @@ int rndis_filter_device_add(struct hv_device *dev, net_device->max_chn = 1; net_device->num_chn = 1; + spin_lock_init(&net_device->sc_lock); + net_device->extension = rndis_device; rndis_device->net_dev = net_device; @@ -1054,7 +1065,7 @@ int rndis_filter_device_add(struct hv_device *dev, ret = rndis_filter_query_device(rndis_device, RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE, &mtu, &size); - if (ret == 0 && size == sizeof(u32)) + if (ret == 0 && size == sizeof(u32) && mtu < net_device->ndev->mtu) net_device->ndev->mtu = mtu; /* Get the mac address */ @@ -1114,7 +1125,15 @@ int rndis_filter_device_add(struct hv_device *dev, */ node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu)); num_possible_rss_qs = cpumask_weight(node_cpu_mask); - net_device->num_chn = min(num_possible_rss_qs, num_rss_qs); + + /* We will use the given number of channels if available. */ + if (device_info->num_chn && device_info->num_chn < net_device->max_chn) + net_device->num_chn = device_info->num_chn; + else + net_device->num_chn = min(num_possible_rss_qs, num_rss_qs); + + num_rss_qs = net_device->num_chn - 1; + net_device->num_sc_offered = num_rss_qs; if (net_device->num_chn == 1) goto out; @@ -1157,11 +1176,25 @@ int rndis_filter_device_add(struct hv_device *dev, ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn); + /* + * Wait for the host to send us the sub-channel offers. + */ + spin_lock_irqsave(&net_device->sc_lock, flags); + sc_delta = num_rss_qs - (net_device->num_chn - 1); + net_device->num_sc_offered -= sc_delta; + spin_unlock_irqrestore(&net_device->sc_lock, flags); + + while (net_device->num_sc_offered != 0) { + t = wait_for_completion_timeout(&net_device->channel_init_wait, 10*HZ); + if (t == 0) + WARN(1, "Netvsc: Waiting for sub-channel processing"); + } out: if (ret) { net_device->max_chn = 1; net_device->num_chn = 1; } + return 0; /* return 0 because primary channel can be used alone */ err_dev_remv: |