diff options
Diffstat (limited to 'drivers/staging/rdma/hfi1')
30 files changed, 719 insertions, 486 deletions
diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index aa58e597d..e48981994 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -1295,7 +1295,7 @@ static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr, static u64 dev_access_u32_csr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + struct hfi1_devdata *dd = context; if (vl != CNTR_INVALID_VL) return 0; @@ -1305,7 +1305,7 @@ static u64 dev_access_u32_csr(const struct cntr_entry *entry, static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + struct hfi1_devdata *dd = context; u64 val = 0; u64 csr = entry->csr; @@ -1326,7 +1326,7 @@ static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context, static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + struct hfi1_devdata *dd = context; u32 csr = entry->csr; int ret = 0; @@ -1350,7 +1350,7 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context, static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; + struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; @@ -1360,7 +1360,7 @@ static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context, static u64 port_access_u64_csr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; + struct hfi1_pportdata *ppd = context; u64 val; u64 csr = entry->csr; @@ -1400,7 +1400,7 @@ static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode, static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; + struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; @@ -1410,7 +1410,7 @@ static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context, static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; + struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; @@ -1420,7 +1420,7 @@ static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context, static u64 access_sw_xmit_discards(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; + struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; @@ -1431,7 +1431,7 @@ static u64 access_sw_xmit_discards(const struct cntr_entry *entry, static u64 access_xmit_constraint_errs(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; + struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; @@ -1443,7 +1443,7 @@ static u64 access_xmit_constraint_errs(const struct cntr_entry *entry, static u64 access_rcv_constraint_errs(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; + struct hfi1_pportdata *ppd = context; if (vl != CNTR_INVALID_VL) return 0; @@ -1491,7 +1491,7 @@ static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val, static u64 access_sw_cpu_intr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + struct hfi1_devdata *dd = context; return read_write_cpu(dd, &dd->z_int_counter, dd->int_counter, vl, mode, data); @@ -1500,7 +1500,7 @@ static u64 access_sw_cpu_intr(const struct cntr_entry *entry, static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + struct hfi1_devdata *dd = context; return read_write_cpu(dd, &dd->z_rcv_limit, dd->rcv_limit, vl, mode, data); @@ -1509,7 +1509,7 @@ static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry, static u64 access_sw_pio_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + struct hfi1_devdata *dd = context; return dd->verbs_dev.n_piowait; } @@ -1517,7 +1517,7 @@ static u64 access_sw_pio_wait(const struct cntr_entry *entry, static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + struct hfi1_devdata *dd = context; return dd->verbs_dev.n_txwait; } @@ -1525,11 +1525,19 @@ static u64 access_sw_vtx_wait(const struct cntr_entry *entry, static u64 access_sw_kmem_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + struct hfi1_devdata *dd = context; return dd->verbs_dev.n_kmem_wait; } +static u64 access_sw_send_schedule(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + return dd->verbs_dev.n_send_schedule; +} + #define def_access_sw_cpu(cntr) \ static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \ void *context, int vl, int mode, u64 data) \ @@ -1720,6 +1728,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_pio_wait), [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL, access_sw_kmem_wait), +[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL, + access_sw_send_schedule), }; static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = { @@ -2215,9 +2225,7 @@ static void update_rcverr_timer(unsigned long opaque) static int init_rcverr(struct hfi1_devdata *dd) { - init_timer(&dd->rcverr_timer); - dd->rcverr_timer.function = update_rcverr_timer; - dd->rcverr_timer.data = (unsigned long) dd; + setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd); /* Assume the hardware counter has been reset */ dd->rcv_ovfl_cnt = 0; return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME); @@ -4416,7 +4424,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) rcd = dd->rcd[source]; if (rcd) { if (source < dd->first_user_ctxt) - rcd->do_interrupt(rcd); + rcd->do_interrupt(rcd, 0); else handle_user_interrupt(rcd); return; /* OK */ @@ -4582,23 +4590,106 @@ static irqreturn_t sdma_interrupt(int irq, void *data) } /* - * NOTE: this routine expects to be on its own MSI-X interrupt. If - * multiple receive contexts share the same MSI-X interrupt, then this - * routine must check for who received it. + * Clear the receive interrupt, forcing the write and making sure + * we have data from the chip, pushing everything in front of it + * back to the host. + */ +static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg); + + mmiowb(); /* make sure everything before is written */ + write_csr(dd, addr, rcd->imask); + /* force the above write on the chip and get a value back */ + (void)read_csr(dd, addr); +} + +/* force the receive interrupt */ +static inline void force_recv_intr(struct hfi1_ctxtdata *rcd) +{ + write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask); +} + +/* return non-zero if a packet is present */ +static inline int check_packet_present(struct hfi1_ctxtdata *rcd) +{ + if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) + return (rcd->seq_cnt == + rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)))); + + /* else is RDMA rtail */ + return (rcd->head != get_rcvhdrtail(rcd)); +} + +/* + * Receive packet IRQ handler. This routine expects to be on its own IRQ. + * This routine will try to handle packets immediately (latency), but if + * it finds too many, it will invoke the thread handler (bandwitdh). The + * chip receive interupt is *not* cleared down until this or the thread (if + * invoked) is finished. The intent is to avoid extra interrupts while we + * are processing packets anyway. */ static irqreturn_t receive_context_interrupt(int irq, void *data) { struct hfi1_ctxtdata *rcd = data; struct hfi1_devdata *dd = rcd->dd; + int disposition; + int present; trace_hfi1_receive_interrupt(dd, rcd->ctxt); this_cpu_inc(*dd->int_counter); - /* clear the interrupt */ - write_csr(rcd->dd, CCE_INT_CLEAR + (8*rcd->ireg), rcd->imask); + /* receive interrupt remains blocked while processing packets */ + disposition = rcd->do_interrupt(rcd, 0); + + /* + * Too many packets were seen while processing packets in this + * IRQ handler. Invoke the handler thread. The receive interrupt + * remains blocked. + */ + if (disposition == RCV_PKT_LIMIT) + return IRQ_WAKE_THREAD; - /* handle the interrupt */ - rcd->do_interrupt(rcd); + /* + * The packet processor detected no more packets. Clear the receive + * interrupt and recheck for a packet packet that may have arrived + * after the previous check and interrupt clear. If a packet arrived, + * force another interrupt. + */ + clear_recv_intr(rcd); + present = check_packet_present(rcd); + if (present) + force_recv_intr(rcd); + + return IRQ_HANDLED; +} + +/* + * Receive packet thread handler. This expects to be invoked with the + * receive interrupt still blocked. + */ +static irqreturn_t receive_context_thread(int irq, void *data) +{ + struct hfi1_ctxtdata *rcd = data; + int present; + + /* receive interrupt is still blocked from the IRQ handler */ + (void)rcd->do_interrupt(rcd, 1); + + /* + * The packet processor will only return if it detected no more + * packets. Hold IRQs here so we can safely clear the interrupt and + * recheck for a packet that may have arrived after the previous + * check and the interrupt clear. If a packet arrived, force another + * interrupt. + */ + local_irq_disable(); + clear_recv_intr(rcd); + present = check_packet_present(rcd); + if (present) + force_recv_intr(rcd); + local_irq_enable(); return IRQ_HANDLED; } @@ -5407,6 +5498,8 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) if (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G) ppd->local_tx_rate |= 1; } + + enable_lane_tx = 0xF; /* enable all four lanes */ ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion, rx_polarity_inversion, ppd->local_tx_rate); if (ret != HCMD_SUCCESS) @@ -5716,8 +5809,7 @@ void init_qsfp(struct hfi1_pportdata *ppd) u64 qsfp_mask; if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB || - ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR || - !HFI1_CAP_IS_KSET(QSFP_ENABLED)) { + ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { ppd->driver_link_ready = 1; return; } @@ -6205,7 +6297,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) if (do_wait) { /* it can take a while for the link to go down */ - ret = wait_phy_linkstate(dd, PLS_OFFLINE, 5000); + ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000); if (ret < 0) return ret; } @@ -8156,9 +8248,8 @@ static int init_cntrs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; /* set up the stats timer; the add_timer is done at the end */ - init_timer(&dd->synth_stats_timer); - dd->synth_stats_timer.function = update_synth_timer; - dd->synth_stats_timer.data = (unsigned long) dd; + setup_timer(&dd->synth_stats_timer, update_synth_timer, + (unsigned long)dd); /***********************/ /* per device counters */ @@ -8850,6 +8941,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) struct hfi1_msix_entry *me = &dd->msix_entries[i]; const char *err_info; irq_handler_t handler; + irq_handler_t thread = NULL; void *arg; int idx; struct hfi1_ctxtdata *rcd = NULL; @@ -8886,6 +8978,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START+idx) % 64); handler = receive_context_interrupt; + thread = receive_context_thread; arg = rcd; snprintf(me->name, sizeof(me->name), DRIVER_NAME"_%d kctxt%d", dd->unit, idx); @@ -8904,7 +8997,8 @@ static int request_msix_irqs(struct hfi1_devdata *dd) /* make sure the name is terminated */ me->name[sizeof(me->name)-1] = 0; - ret = request_irq(me->msix.vector, handler, 0, me->name, arg); + ret = request_threaded_irq(me->msix.vector, handler, thread, 0, + me->name, arg); if (ret) { dd_dev_err(dd, "unable to allocate %s interrupt, vector %d, index %d, err %d\n", @@ -8991,7 +9085,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd) entries = kcalloc(total, sizeof(*entries), GFP_KERNEL); if (!entries) { - dd_dev_err(dd, "cannot allocate msix table\n"); ret = -ENOMEM; goto fail; } @@ -9328,8 +9421,6 @@ static void reset_cce_csrs(struct hfi1_devdata *dd) /* set ASIC CSRs to chip reset defaults */ static void reset_asic_csrs(struct hfi1_devdata *dd) { - static DEFINE_MUTEX(asic_mutex); - static int called; int i; /* @@ -9339,15 +9430,8 @@ static void reset_asic_csrs(struct hfi1_devdata *dd) * a known first load do the reset and blocking all others. */ - /* - * These CSRs should only be reset once - the first one here will - * do the work. Use a mutex so that a non-first caller waits until - * the first is finished before it can proceed. - */ - mutex_lock(&asic_mutex); - if (called) - goto done; - called = 1; + if (!(dd->flags & HFI1_DO_INIT_ASIC)) + return; if (dd->icode != ICODE_FPGA_EMULATION) { /* emulation does not have an SBus - leave these alone */ @@ -9367,7 +9451,10 @@ static void reset_asic_csrs(struct hfi1_devdata *dd) for (i = 0; i < ASIC_NUM_SCRATCH; i++) write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0); write_csr(dd, ASIC_CFG_MUTEX, 0); /* this will clear it */ + + /* We might want to retain this state across FLR if we ever use it */ write_csr(dd, ASIC_CFG_DRV_STR, 0); + write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0); /* ASIC_STS_THERM read-only */ /* ASIC_CFG_RESET leave alone */ @@ -9414,9 +9501,6 @@ static void reset_asic_csrs(struct hfi1_devdata *dd) /* this also writes a NOP command, clearing paging mode */ write_csr(dd, ASIC_EEP_ADDR_CMD, 0); write_csr(dd, ASIC_EEP_DATA, 0); - -done: - mutex_unlock(&asic_mutex); } /* set MISC CSRs to chip reset defaults */ @@ -9828,6 +9912,7 @@ static void init_chip(struct hfi1_devdata *dd) restore_pci_variables(dd); } + reset_asic_csrs(dd); } else { dd_dev_info(dd, "Resetting CSRs with writes\n"); reset_cce_csrs(dd); @@ -9838,6 +9923,7 @@ static void init_chip(struct hfi1_devdata *dd) } /* clear the DC reset */ write_csr(dd, CCE_DC_CTRL, 0); + /* Set the LED off */ if (is_a0(dd)) setextled(dd, 0); @@ -10333,7 +10419,7 @@ static void asic_should_init(struct hfi1_devdata *dd) } /** - * Allocate an initialize the device structure for the hfi. + * Allocate and initialize the device structure for the hfi. * @dev: the pci_dev for hfi1_ib device * @ent: pci_device_id struct for this dev * @@ -10489,6 +10575,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, else if (dd->rcv_intr_timeout_csr == 0 && rcv_intr_timeout) dd->rcv_intr_timeout_csr = 1; + /* needs to be done before we look for the peer device */ + read_guid(dd); + + /* should this device init the ASIC block? */ + asic_should_init(dd); + /* obtain chip sizes, reset chip CSRs */ init_chip(dd); @@ -10497,11 +10589,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; - /* needs to be done before we look for the peer device */ - read_guid(dd); - - asic_should_init(dd); - /* read in firmware */ ret = hfi1_firmware_init(dd); if (ret) @@ -10716,6 +10803,7 @@ static int thermal_init(struct hfi1_devdata *dd) acquire_hw_mutex(dd); dd_dev_info(dd, "Initializing thermal sensor\n"); + /* Thermal Sensor Initialization */ /* Step 1: Reset the Thermal SBus Receiver */ ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0, diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index f89a432c7..ebf9041a1 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -609,6 +609,7 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt, u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32); /* firmware.c */ +#define SBUS_MASTER_BROADCAST 0xfd #define NUM_PCIE_SERDES 16 /* number of PCIe serdes on the SBus */ extern const u8 pcie_serdes_broadcast[]; extern const u8 pcie_pcs_addrs[2][NUM_PCIE_SERDES]; @@ -786,6 +787,7 @@ enum { C_SW_VTX_WAIT, C_SW_PIO_WAIT, C_SW_KMEM_WAIT, + C_SW_SEND_SCHED, DEV_CNTR_LAST /* Must be kept last */ }; diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/staging/rdma/hfi1/common.h index 5f2293729..5e203239c 100644 --- a/drivers/staging/rdma/hfi1/common.h +++ b/drivers/staging/rdma/hfi1/common.h @@ -147,7 +147,6 @@ HFI1_CAP_USE_SDMA_HEAD | \ HFI1_CAP_EXTENDED_PSN | \ HFI1_CAP_PRINT_UNIMPL | \ - HFI1_CAP_QSFP_ENABLED | \ HFI1_CAP_NO_INTEGRITY | \ HFI1_CAP_PKEY_CHECK) << \ HFI1_CAP_USER_SHIFT) @@ -163,7 +162,6 @@ HFI1_CAP_SDMA | \ HFI1_CAP_PRINT_UNIMPL | \ HFI1_CAP_STATIC_RATE_CTRL | \ - HFI1_CAP_QSFP_ENABLED | \ HFI1_CAP_PKEY_CHECK | \ HFI1_CAP_MULTI_PKT_EGR | \ HFI1_CAP_EXTENDED_PSN | \ @@ -206,7 +204,7 @@ * to the driver itself, not the software interfaces it supports. */ #ifndef HFI1_DRIVER_VERSION_BASE -#define HFI1_DRIVER_VERSION_BASE "0.9-248" +#define HFI1_DRIVER_VERSION_BASE "0.9-294" #endif /* create the final driver version string */ diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/staging/rdma/hfi1/device.c index bc26a5392..58472e5ac 100644 --- a/drivers/staging/rdma/hfi1/device.c +++ b/drivers/staging/rdma/hfi1/device.c @@ -124,7 +124,7 @@ static char *hfi1_devnode(struct device *dev, umode_t *mode) } static const char *hfi1_class_name_user = "hfi1_user"; -const char *class_name_user(void) +static const char *class_name_user(void) { return hfi1_class_name_user; } diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index 3e8d5ac4c..88414d720 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -607,7 +607,7 @@ static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name) static struct hfi1_devdata *hfi1_dd_from_sc_inode(struct inode *in) { int unit = iminor(in) - HFI1_SNOOP_CAPTURE_BASE; - struct hfi1_devdata *dd = NULL; + struct hfi1_devdata *dd; dd = hfi1_lookup(unit); return dd; @@ -1159,9 +1159,8 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) filter_cmd.opcode, filter_cmd.length, filter_cmd.value_ptr); - filter_value = kzalloc( - filter_cmd.length * sizeof(u8), - GFP_KERNEL); + filter_value = kcalloc(filter_cmd.length, sizeof(u8), + GFP_KERNEL); if (!filter_value) { pr_alert("Not enough memory\n"); ret = -ENOMEM; @@ -1478,7 +1477,7 @@ static struct snoop_packet *allocate_snoop_packet(u32 hdr_len, u32 md_len) { - struct snoop_packet *packet = NULL; + struct snoop_packet *packet; packet = kzalloc(sizeof(struct snoop_packet) + hdr_len + data_len + md_len, diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index c0a59001e..ce69141b5 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -302,6 +302,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, qp_num = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; if (lid < HFI1_MULTICAST_LID_BASE) { struct hfi1_qp *qp; + unsigned long flags; rcu_read_lock(); qp = hfi1_lookup_qpn(ibp, qp_num); @@ -314,7 +315,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, * Handle only RC QPs - for other QP types drop error * packet. */ - spin_lock(&qp->r_lock); + spin_lock_irqsave(&qp->r_lock, flags); /* Check for valid receive state. */ if (!(ib_hfi1_state_ops[qp->state] & @@ -335,7 +336,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, break; } - spin_unlock(&qp->r_lock); + spin_unlock_irqrestore(&qp->r_lock, flags); rcu_read_unlock(); } /* Unicast QP */ } /* Valid packet with TIDErr */ @@ -426,8 +427,7 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd, packet->rcd = rcd; packet->updegr = 0; packet->etail = -1; - packet->rhf_addr = (__le32 *) rcd->rcvhdrq + rcd->head + - rcd->dd->rhf_offset; + packet->rhf_addr = get_rhf_addr(rcd); packet->rhf = rhf_to_cpu(packet->rhf_addr); packet->rhqoff = rcd->head; packet->numpkt = 0; @@ -618,10 +618,7 @@ next: } #endif /* CONFIG_PRESCAN_RXQ */ -#define RCV_PKT_OK 0x0 -#define RCV_PKT_MAX 0x1 - -static inline int process_rcv_packet(struct hfi1_packet *packet) +static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) { int ret = RCV_PKT_OK; @@ -663,9 +660,13 @@ static inline int process_rcv_packet(struct hfi1_packet *packet) if (packet->rhqoff >= packet->maxcnt) packet->rhqoff = 0; - if (packet->numpkt == MAX_PKT_RECV) { - ret = RCV_PKT_MAX; - this_cpu_inc(*packet->rcd->dd->rcv_limit); + if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) { + if (thread) { + cond_resched(); + } else { + ret = RCV_PKT_LIMIT; + this_cpu_inc(*packet->rcd->dd->rcv_limit); + } } packet->rhf_addr = (__le32 *) packet->rcd->rcvhdrq + packet->rhqoff + @@ -742,57 +743,63 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet) /* * Handle receive interrupts when using the no dma rtail option. */ -void handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd) +int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread) { u32 seq; - int last = 0; + int last = RCV_PKT_OK; struct hfi1_packet packet; init_packet(rcd, &packet); seq = rhf_rcv_seq(packet.rhf); - if (seq != rcd->seq_cnt) + if (seq != rcd->seq_cnt) { + last = RCV_PKT_DONE; goto bail; + } prescan_rxq(&packet); - while (!last) { - last = process_rcv_packet(&packet); + while (last == RCV_PKT_OK) { + last = process_rcv_packet(&packet, thread); seq = rhf_rcv_seq(packet.rhf); if (++rcd->seq_cnt > 13) rcd->seq_cnt = 1; if (seq != rcd->seq_cnt) - last = 1; + last = RCV_PKT_DONE; process_rcv_update(last, &packet); } process_rcv_qp_work(&packet); bail: finish_packet(&packet); + return last; } -void handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd) +int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread) { u32 hdrqtail; - int last = 0; + int last = RCV_PKT_OK; struct hfi1_packet packet; init_packet(rcd, &packet); hdrqtail = get_rcvhdrtail(rcd); - if (packet.rhqoff == hdrqtail) + if (packet.rhqoff == hdrqtail) { + last = RCV_PKT_DONE; goto bail; + } smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ prescan_rxq(&packet); - while (!last) { - last = process_rcv_packet(&packet); + while (last == RCV_PKT_OK) { + last = process_rcv_packet(&packet, thread); + hdrqtail = get_rcvhdrtail(rcd); if (packet.rhqoff == hdrqtail) - last = 1; + last = RCV_PKT_DONE; process_rcv_update(last, &packet); } process_rcv_qp_work(&packet); bail: finish_packet(&packet); - + return last; } static inline void set_all_nodma_rtail(struct hfi1_devdata *dd) @@ -820,12 +827,11 @@ static inline void set_all_dma_rtail(struct hfi1_devdata *dd) * Called from interrupt handler for errors or receive interrupt. * This is the slow path interrupt handler. */ -void handle_receive_interrupt(struct hfi1_ctxtdata *rcd) +int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) { - struct hfi1_devdata *dd = rcd->dd; u32 hdrqtail; - int last = 0, needset = 1; + int last = RCV_PKT_OK, needset = 1; struct hfi1_packet packet; init_packet(rcd, &packet); @@ -833,19 +839,23 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd) if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) { u32 seq = rhf_rcv_seq(packet.rhf); - if (seq != rcd->seq_cnt) + if (seq != rcd->seq_cnt) { + last = RCV_PKT_DONE; goto bail; + } hdrqtail = 0; } else { hdrqtail = get_rcvhdrtail(rcd); - if (packet.rhqoff == hdrqtail) + if (packet.rhqoff == hdrqtail) { + last = RCV_PKT_DONE; goto bail; + } smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ } prescan_rxq(&packet); - while (!last) { + while (last == RCV_PKT_OK) { if (unlikely(dd->do_drop && atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) == DROP_PACKET_ON)) { @@ -859,7 +869,7 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd) packet.rhf = rhf_to_cpu(packet.rhf_addr); } else { - last = process_rcv_packet(&packet); + last = process_rcv_packet(&packet, thread); } if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) { @@ -868,7 +878,7 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd) if (++rcd->seq_cnt > 13) rcd->seq_cnt = 1; if (seq != rcd->seq_cnt) - last = 1; + last = RCV_PKT_DONE; if (needset) { dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n"); @@ -877,7 +887,7 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd) } } else { if (packet.rhqoff == hdrqtail) - last = 1; + last = RCV_PKT_DONE; if (needset) { dd_dev_info(dd, "Switching to DMA_RTAIL\n"); @@ -897,6 +907,7 @@ bail: * if no packets were processed. */ finish_packet(&packet); + return last; } /* @@ -1062,9 +1073,9 @@ void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val) */ if (atomic_inc_return(&ppd->led_override_timer_active) == 1) { /* Need to start timer */ - init_timer(&ppd->led_override_timer); - ppd->led_override_timer.function = run_led_override; - ppd->led_override_timer.data = (unsigned long) ppd; + setup_timer(&ppd->led_override_timer, run_led_override, + (unsigned long)ppd); + ppd->led_override_timer.expires = jiffies + 1; add_timer(&ppd->led_override_timer); } else { diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 72d38500d..aae9826ec 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -168,7 +168,7 @@ enum mmap_types { HFI1_MMAP_TOKEN_SET(TYPE, type) | \ HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \ HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \ - HFI1_MMAP_TOKEN_SET(OFFSET, ((unsigned long)addr & ~PAGE_MASK))) + HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr)))) #define EXP_TID_SET(field, value) \ (((value) & EXP_TID_TID##field##_MASK) << \ @@ -508,7 +508,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) case PIO_BUFS_SOP: memaddr = ((dd->physaddr + TXE_PIO_SEND) + /* chip pio base */ - (uctxt->sc->hw_context * (1 << 16))) + + (uctxt->sc->hw_context * BIT(16))) + /* 64K PIO space / ctxt */ (type == PIO_BUFS_SOP ? (TXE_PIO_SIZE / 2) : 0); /* sop? */ @@ -607,9 +607,9 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) * Use the page where this context's flags are. User level * knows where it's own bitmap is within the page. */ - memaddr = ((unsigned long)dd->events + - ((uctxt->ctxt - dd->first_user_ctxt) * - HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; + memaddr = (unsigned long)(dd->events + + ((uctxt->ctxt - dd->first_user_ctxt) * + HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; memlen = PAGE_SIZE; /* * v3.7 removes VM_RESERVED but the effect is kept by @@ -948,6 +948,7 @@ static int find_shared_ctxt(struct file *fp, /* Skip ctxt if it doesn't match the requested one */ if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) || + uctxt->jkey != generate_jkey(current_uid()) || uctxt->subctxt_id != uinfo->subctxt_id || uctxt->subctxt_cnt != uinfo->subctxt_cnt) continue; @@ -1335,9 +1336,9 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len) */ binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, subctxt_fp(fp), 0); - offset = ((((uctxt->ctxt - dd->first_user_ctxt) * + offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) * HFI1_MAX_SHARED_CTXTS) + subctxt_fp(fp)) * - sizeof(*dd->events)) & ~PAGE_MASK; + sizeof(*dd->events)); binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, subctxt_fp(fp), offset); @@ -1573,7 +1574,7 @@ static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo) vaddr = tinfo->vaddr; - if (vaddr & ~PAGE_MASK) { + if (offset_in_page(vaddr)) { ret = -EINVAL; goto bail; } @@ -2066,6 +2067,7 @@ static const struct file_operations ui_file_ops = { .open = ui_open, .release = ui_release, }; + #define UI_OFFSET 192 /* device minor offset for UI devices */ static int create_ui = 1; diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 9a02432d3..ff4feb4fc 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -924,9 +924,6 @@ static int load_8051_firmware(struct hfi1_devdata *dd, return 0; } -/* SBus Master broadcast address */ -#define SBUS_MASTER_BROADCAST 0xfd - /* * Write the SBus request register * @@ -1239,34 +1236,20 @@ int load_firmware(struct hfi1_devdata *dd) { int ret; - if (fw_sbus_load || fw_fabric_serdes_load) { + if (fw_fabric_serdes_load) { ret = acquire_hw_mutex(dd); if (ret) return ret; set_sbus_fast_mode(dd); - /* - * The SBus contains part of the fabric firmware and so must - * also be downloaded. - */ - if (fw_sbus_load) { - turn_off_spicos(dd, SPICO_SBUS); - ret = load_sbus_firmware(dd, &fw_sbus); - if (ret) - goto clear; - } + set_serdes_broadcast(dd, all_fabric_serdes_broadcast, + fabric_serdes_broadcast[dd->hfi1_id], + fabric_serdes_addrs[dd->hfi1_id], + NUM_FABRIC_SERDES); + turn_off_spicos(dd, SPICO_FABRIC); + ret = load_fabric_serdes_firmware(dd, &fw_fabric); - if (fw_fabric_serdes_load) { - set_serdes_broadcast(dd, all_fabric_serdes_broadcast, - fabric_serdes_broadcast[dd->hfi1_id], - fabric_serdes_addrs[dd->hfi1_id], - NUM_FABRIC_SERDES); - turn_off_spicos(dd, SPICO_FABRIC); - ret = load_fabric_serdes_firmware(dd, &fw_fabric); - } - -clear: clear_sbus_fast_mode(dd); release_hw_mutex(dd); if (ret) @@ -1585,7 +1568,7 @@ int load_pcie_firmware(struct hfi1_devdata *dd) /* both firmware loads below use the SBus */ set_sbus_fast_mode(dd); - if (fw_sbus_load) { + if (fw_sbus_load && (dd->flags & HFI1_DO_INIT_ASIC)) { turn_off_spicos(dd, SPICO_SBUS); ret = load_sbus_firmware(dd, &fw_sbus); if (ret) @@ -1614,6 +1597,10 @@ done: */ void read_guid(struct hfi1_devdata *dd) { + /* Take the DC out of reset to get a valid GUID value */ + write_csr(dd, CCE_DC_CTRL, 0); + (void) read_csr(dd, CCE_DC_CTRL); + dd->base_guid = read_csr(dd, DC_DC8051_CFG_LOCAL_GUID); dd_dev_info(dd, "GUID %llx", (unsigned long long)dd->base_guid); diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 8ca171bf3..190f7a2f6 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -262,7 +262,7 @@ struct hfi1_ctxtdata { pid_t pid; pid_t subpid[HFI1_MAX_SHARED_CTXTS]; /* same size as task_struct .comm[], command that opened context */ - char comm[16]; + char comm[TASK_COMM_LEN]; /* so file ops can get at unit */ struct hfi1_devdata *dd; /* so functions that need physical port can get it easily */ @@ -313,7 +313,7 @@ struct hfi1_ctxtdata { * be valid. Worst case is we process an extra interrupt and up to 64 * packets with the wrong interrupt handler. */ - void (*do_interrupt)(struct hfi1_ctxtdata *rcd); + int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); }; /* @@ -1130,9 +1130,21 @@ void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *, struct hfi1_devdata *, u8, u8); void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *); -void handle_receive_interrupt(struct hfi1_ctxtdata *); -void handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd); -void handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd); +int handle_receive_interrupt(struct hfi1_ctxtdata *, int); +int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int); +int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); + +/* receive packet handler dispositions */ +#define RCV_PKT_OK 0x0 /* keep going */ +#define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */ +#define RCV_PKT_DONE 0x2 /* stop, no more packets detected */ + +/* calculate the current RHF address */ +static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) +{ + return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset; +} + int hfi1_reset_device(int); /* return the driver's idea of the logical OPA port state */ diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index a877eda8c..8666f3ad2 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -134,11 +134,8 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) dd->assigned_node_id = local_node_id; dd->rcd = kcalloc(dd->num_rcv_contexts, sizeof(*dd->rcd), GFP_KERNEL); - if (!dd->rcd) { - dd_dev_err(dd, - "Unable to allocate receive context array, failing\n"); + if (!dd->rcd) goto nomem; - } /* create one or more kernel contexts */ for (i = 0; i < dd->first_user_ctxt; ++i) { @@ -293,12 +290,14 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt) * The resulting value will be rounded down to the closest * multiple of dd->rcv_entries.group_size. */ - rcd->egrbufs.buffers = kzalloc(sizeof(*rcd->egrbufs.buffers) * - rcd->egrbufs.count, GFP_KERNEL); + rcd->egrbufs.buffers = kcalloc(rcd->egrbufs.count, + sizeof(*rcd->egrbufs.buffers), + GFP_KERNEL); if (!rcd->egrbufs.buffers) goto bail; - rcd->egrbufs.rcvtids = kzalloc(sizeof(*rcd->egrbufs.rcvtids) * - rcd->egrbufs.count, GFP_KERNEL); + rcd->egrbufs.rcvtids = kcalloc(rcd->egrbufs.count, + sizeof(*rcd->egrbufs.rcvtids), + GFP_KERNEL); if (!rcd->egrbufs.rcvtids) goto bail; rcd->egrbufs.size = eager_buffer_size; @@ -318,12 +317,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt) if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ rcd->opstats = kzalloc(sizeof(*rcd->opstats), GFP_KERNEL); - if (!rcd->opstats) { - dd_dev_err(dd, - "ctxt%u: Unable to allocate per ctxt stats buffer\n", - rcd->ctxt); + if (!rcd->opstats) goto bail; - } } } return rcd; @@ -418,6 +413,7 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t) int sl; u16 ccti, ccti_timer, ccti_min; struct cc_state *cc_state; + unsigned long flags; cca_timer = container_of(t, struct cca_timer, hrtimer); ppd = cca_timer->ppd; @@ -441,7 +437,7 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t) ccti_min = cc_state->cong_setting.entries[sl].ccti_min; ccti_timer = cc_state->cong_setting.entries[sl].ccti_timer; - spin_lock(&ppd->cca_timer_lock); + spin_lock_irqsave(&ppd->cca_timer_lock, flags); ccti = cca_timer->ccti; @@ -450,7 +446,7 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t) set_link_ipg(ppd); } - spin_unlock(&ppd->cca_timer_lock); + spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); rcu_read_unlock(); @@ -1050,8 +1046,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) if (!hfi1_cpulist_count) { u32 count = num_online_cpus(); - hfi1_cpulist = kzalloc(BITS_TO_LONGS(count) * - sizeof(long), GFP_KERNEL); + hfi1_cpulist = kcalloc(BITS_TO_LONGS(count), sizeof(long), + GFP_KERNEL); if (hfi1_cpulist) hfi1_cpulist_count = count; else @@ -1564,7 +1560,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) * heavy filesystem activity makes these fail, and we can * use compound pages. */ - gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; + gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP; /* * The minimum size of the eager buffers is a groups of MTU-sized diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c index f6eff177a..cb4e6087d 100644 --- a/drivers/staging/rdma/hfi1/keys.c +++ b/drivers/staging/rdma/hfi1/keys.c @@ -354,58 +354,3 @@ bail: rcu_read_unlock(); return 0; } - -/* - * Initialize the memory region specified by the work request. - */ -int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr) -{ - struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; - struct hfi1_pd *pd = to_ipd(qp->ibqp.pd); - struct hfi1_mregion *mr; - u32 rkey = wr->wr.fast_reg.rkey; - unsigned i, n, m; - int ret = -EINVAL; - unsigned long flags; - u64 *page_list; - size_t ps; - - spin_lock_irqsave(&rkt->lock, flags); - if (pd->user || rkey == 0) - goto bail; - - mr = rcu_dereference_protected( - rkt->table[(rkey >> (32 - hfi1_lkey_table_size))], - lockdep_is_held(&rkt->lock)); - if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) - goto bail; - - if (wr->wr.fast_reg.page_list_len > mr->max_segs) - goto bail; - - ps = 1UL << wr->wr.fast_reg.page_shift; - if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len) - goto bail; - - mr->user_base = wr->wr.fast_reg.iova_start; - mr->iova = wr->wr.fast_reg.iova_start; - mr->lkey = rkey; - mr->length = wr->wr.fast_reg.length; - mr->access_flags = wr->wr.fast_reg.access_flags; - page_list = wr->wr.fast_reg.page_list->page_list; - m = 0; - n = 0; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - mr->map[m]->segs[n].vaddr = (void *) page_list[i]; - mr->map[m]->segs[n].length = ps; - if (++n == HFI1_SEGSZ) { - m++; - n = 0; - } - } - - ret = 0; -bail: - spin_unlock_irqrestore(&rkt->lock, flags); - return ret; -} diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index b2c1b72d3..32f703736 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -1438,7 +1438,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data) { - u64 *val = (u64 *)data; + u64 *val = data; *val++ = read_csr(dd, SEND_SC2VLT0); *val++ = read_csr(dd, SEND_SC2VLT1); @@ -1457,7 +1457,7 @@ static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data) static void filter_sc2vlt(void *data) { int i; - u8 *pd = (u8 *)data; + u8 *pd = data; for (i = 0; i < OPA_MAX_SCS; i++) { if (i == 15) @@ -1469,7 +1469,7 @@ static void filter_sc2vlt(void *data) static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data) { - u64 *val = (u64 *)data; + u64 *val = data; filter_sc2vlt(data); @@ -1478,7 +1478,7 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data) write_csr(dd, SEND_SC2VLT2, *val++); write_csr(dd, SEND_SC2VLT3, *val++); write_seqlock_irq(&dd->sc2vl_lock); - memcpy(dd->sc2vl, (u64 *)data, sizeof(dd->sc2vl)); + memcpy(dd->sc2vl, data, sizeof(dd->sc2vl)); write_sequnlock_irq(&dd->sc2vl_lock); return 0; } @@ -1488,7 +1488,7 @@ static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, u32 *resp_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); - u8 *p = (u8 *)data; + u8 *p = data; size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */ unsigned i; @@ -1511,7 +1511,7 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, u32 *resp_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); - u8 *p = (u8 *)data; + u8 *p = data; int i; if (am) { @@ -1530,7 +1530,7 @@ static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, u32 *resp_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); - u8 *p = (u8 *)data; + u8 *p = data; size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */ unsigned i; @@ -1553,7 +1553,7 @@ static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, u32 *resp_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); - u8 *p = (u8 *)data; + u8 *p = data; int i; if (am) { @@ -3257,7 +3257,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, return reply((struct ib_mad_hdr *)smp); } - spin_lock(&ppd->cc_log_lock); + spin_lock_irq(&ppd->cc_log_lock); cong_log->log_type = OPA_CC_LOG_TYPE_HFI; cong_log->congestion_flags = 0; @@ -3300,7 +3300,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, sizeof(ppd->threshold_cong_event_map)); ppd->threshold_event_counter = 0; - spin_unlock(&ppd->cc_log_lock); + spin_unlock_irq(&ppd->cc_log_lock); if (resp_len) *resp_len += sizeof(struct opa_hfi1_cong_log); diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c index bd64e4f98..568f185a0 100644 --- a/drivers/staging/rdma/hfi1/mr.c +++ b/drivers/staging/rdma/hfi1/mr.c @@ -284,20 +284,20 @@ struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, m = 0; n = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - void *vaddr; - - vaddr = page_address(sg_page(sg)); - if (!vaddr) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - mr->mr.map[m]->segs[n].vaddr = vaddr; - mr->mr.map[m]->segs[n].length = umem->page_size; - n++; - if (n == HFI1_SEGSZ) { - m++; - n = 0; - } + void *vaddr; + + vaddr = page_address(sg_page(sg)); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; + mr->mr.map[m]->segs[n].length = umem->page_size; + n++; + if (n == HFI1_SEGSZ) { + m++; + n = 0; + } } ret = &mr->ibmr; @@ -344,9 +344,10 @@ out: /* * Allocate a memory region usable with the - * IB_WR_FAST_REG_MR send work request. + * IB_WR_REG_MR send work request. * * Return the memory region on success, otherwise return an errno. + * FIXME: IB_WR_REG_MR is not supported */ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, @@ -364,36 +365,6 @@ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd, return &mr->ibmr; } -struct ib_fast_reg_page_list * -hfi1_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) -{ - unsigned size = page_list_len * sizeof(u64); - struct ib_fast_reg_page_list *pl; - - if (size > PAGE_SIZE) - return ERR_PTR(-EINVAL); - - pl = kzalloc(sizeof(*pl), GFP_KERNEL); - if (!pl) - return ERR_PTR(-ENOMEM); - - pl->page_list = kzalloc(size, GFP_KERNEL); - if (!pl->page_list) - goto err_free; - - return pl; - -err_free: - kfree(pl); - return ERR_PTR(-ENOMEM); -} - -void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl) -{ - kfree(pl->page_list); - kfree(pl); -} - /** * hfi1_alloc_fmr - allocate a fast memory region * @pd: the protection domain for this memory region diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index ac5653c0f..a95604445 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -946,9 +946,21 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) __func__); } +retry: + + if (therm) { + /* + * toggle SPICO_ENABLE to get back to the state + * just after the firmware load + */ + sbus_request(dd, SBUS_MASTER_BROADCAST, 0x01, + WRITE_SBUS_RECEIVER, 0x00000040); + sbus_request(dd, SBUS_MASTER_BROADCAST, 0x01, + WRITE_SBUS_RECEIVER, 0x00000140); + } + /* step 3: download SBus Master firmware */ /* step 4: download PCIe Gen3 SerDes firmware */ -retry: dd_dev_info(dd, "%s: downloading firmware\n", __func__); ret = load_pcie_firmware(dd); if (ret) @@ -1187,6 +1199,7 @@ retry: /* clear the DC reset */ write_csr(dd, CCE_DC_CTRL, 0); + /* Set the LED off */ if (is_a0(dd)) setextled(dd, 0); diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index 9991814a8..e5c32db4b 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -435,7 +435,6 @@ int init_send_contexts(struct hfi1_devdata *dd) sizeof(struct send_context_info), GFP_KERNEL); if (!dd->send_contexts || !dd->hw_to_sw) { - dd_dev_err(dd, "Unable to allocate send context arrays\n"); kfree(dd->hw_to_sw); kfree(dd->send_contexts); free_credit_return(dd); @@ -684,10 +683,8 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, return NULL; sc = kzalloc_node(sizeof(struct send_context), GFP_KERNEL, numa); - if (!sc) { - dd_dev_err(dd, "Cannot allocate send context structure\n"); + if (!sc) return NULL; - } spin_lock_irqsave(&dd->sc_lock, flags); ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); @@ -813,8 +810,6 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, sc->sr = kzalloc_node(sizeof(union pio_shadow_ring) * sc->sr_size, GFP_KERNEL, numa); if (!sc->sr) { - dd_dev_err(dd, - "Cannot allocate send context shadow ring structure\n"); sc_free(sc); return NULL; } @@ -927,10 +922,12 @@ void sc_disable(struct send_context *sc) static void sc_wait_for_packet_egress(struct send_context *sc, int pause) { struct hfi1_devdata *dd = sc->dd; - u64 reg; + u64 reg = 0; + u64 reg_prev; u32 loop = 0; while (1) { + reg_prev = reg; reg = read_csr(dd, sc->hw_context * 8 + SEND_EGRESS_CTXT_STATUS); /* done if egress is stopped */ @@ -939,11 +936,17 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) reg = packet_occupancy(reg); if (reg == 0) break; - if (loop > 100) { + /* counter is reset if occupancy count changes */ + if (reg != reg_prev) + loop = 0; + if (loop > 500) { + /* timed out - bounce the link */ dd_dev_err(dd, - "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u\n", + "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", __func__, sc->sw_index, sc->hw_context, (u32)reg); + queue_work(dd->pport->hfi1_wq, + &dd->pport->link_bounce_work); break; } loop++; diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index df1fa56ea..f8c361669 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -422,7 +422,7 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends) if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); if (++qp->s_last >= qp->s_size) qp->s_last = 0; } diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 6b505859b..b9c157599 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -52,6 +52,7 @@ #include <linux/hash.h> #include "verbs.h" +#include "sdma.h" #define QPN_MAX (1 << 24) #define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) @@ -117,6 +118,20 @@ static inline struct hfi1_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp, } /** + * clear_ahg - reset ahg status in qp + * @qp - qp pointer + */ +static inline void clear_ahg(struct hfi1_qp *qp) +{ + qp->s_hdr->ahgcount = 0; + qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR); + if (qp->s_sde && qp->s_ahgidx >= 0) + sdma_ahg_free(qp->s_sde, qp->s_ahgidx); + qp->s_ahgidx = -1; + qp->s_sde = NULL; +} + +/** * hfi1_error_qp - put a QP into the error state * @qp: the QP to put into the error state * @err: the receive completion error to signal if a RWQE is active diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index 313893615..ffdb1d787 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -403,16 +403,11 @@ static const char *pwr_codes = "1.5W2.0W2.5W3.5W"; int qsfp_mod_present(struct hfi1_pportdata *ppd) { - if (HFI1_CAP_IS_KSET(QSFP_ENABLED)) { - struct hfi1_devdata *dd = ppd->dd; - u64 reg; + struct hfi1_devdata *dd = ppd->dd; + u64 reg; - reg = read_csr(dd, - dd->hfi1_id ? ASIC_QSFP2_IN : ASIC_QSFP1_IN); - return !(reg & QSFP_HFI0_MODPRST_N); - } - /* always return cable present */ - return 1; + reg = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_IN : ASIC_QSFP1_IN); + return !(reg & QSFP_HFI0_MODPRST_N); } /* diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 632dd5ba7..5fc93bb31 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -404,9 +404,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp) goto bail; } ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); wqe->lpsn = wqe->psn; @@ -455,9 +455,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp) wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); @@ -488,21 +488,21 @@ int hfi1_make_rc_req(struct hfi1_qp *qp) if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.swap); + wqe->atomic_wr.swap); ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); } else { qp->s_state = OP(FETCH_ADD); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); ohdr->u.atomic_eth.compare_data = 0; } ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr >> 32); + wqe->atomic_wr.remote_addr >> 32); ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr); + wqe->atomic_wr.remote_addr); ohdr->u.atomic_eth.rkey = cpu_to_be32( - wqe->wr.wr.atomic.rkey); + wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); ss = NULL; len = 0; @@ -629,9 +629,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp) */ len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu; ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); + cpu_to_be64(wqe->rdma_wr.remote_addr + len); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); @@ -697,6 +697,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp, struct pio_buf *pbuf; struct hfi1_ib_header hdr; struct hfi1_other_headers *ohdr; + unsigned long flags; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ if (qp->s_flags & HFI1_S_RESP_PENDING) @@ -771,7 +772,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp, queue_ack: this_cpu_inc(*ibp->rc_qacks); - spin_lock(&qp->s_lock); + spin_lock_irqsave(&qp->s_lock, flags); qp->s_flags |= HFI1_S_ACK_PENDING | HFI1_S_RESP_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; @@ -780,7 +781,7 @@ queue_ack: /* Schedule the send tasklet. */ hfi1_schedule_send(qp); - spin_unlock(&qp->s_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); } /** @@ -926,6 +927,7 @@ static void rc_timeout(unsigned long arg) ibp->n_rc_timeouts++; qp->s_flags &= ~HFI1_S_TIMER; del_timer(&qp->s_timer); + trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1); restart_rc(qp, qp->s_last_psn + 1, 1); hfi1_schedule_send(qp); } @@ -1152,7 +1154,7 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp, * * This is called from rc_rcv_resp() to process an incoming RC ACK * for the given QP. - * Called at interrupt level with the QP s_lock held. + * May be called at interrupt level, with the QP s_lock held. * Returns 1 if OK, 0 if current operation should be aborted (NAK). */ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode, @@ -1441,6 +1443,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, spin_lock_irqsave(&qp->s_lock, flags); + trace_hfi1_rc_ack(qp, psn); + /* Ignore invalid responses. */ if (cmp_psn(psn, qp->s_next_psn) >= 0) goto ack_done; @@ -1629,6 +1633,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, u8 i, prev; int old_req; + trace_hfi1_rc_rcv_error(qp, psn); if (diff > 0) { /* * Packet sequence error. @@ -1835,11 +1840,12 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, u32 rqpn, u8 svc_type) { struct opa_hfi1_cong_log_event_internal *cc_event; + unsigned long flags; if (sl >= OPA_MAX_SLS) return; - spin_lock(&ppd->cc_log_lock); + spin_lock_irqsave(&ppd->cc_log_lock, flags); ppd->threshold_cong_event_map[sl/8] |= 1 << (sl % 8); ppd->threshold_event_counter++; @@ -1855,7 +1861,7 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, /* keep timestamp in units of 1.024 usec */ cc_event->timestamp = ktime_to_ns(ktime_get()) / 1024; - spin_unlock(&ppd->cc_log_lock); + spin_unlock_irqrestore(&ppd->cc_log_lock, flags); } void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, @@ -1865,6 +1871,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, u16 ccti, ccti_incr, ccti_timer, ccti_limit; u8 trigger_threshold; struct cc_state *cc_state; + unsigned long flags; if (sl >= OPA_MAX_SLS) return; @@ -1887,7 +1894,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, trigger_threshold = cc_state->cong_setting.entries[sl].trigger_threshold; - spin_lock(&ppd->cca_timer_lock); + spin_lock_irqsave(&ppd->cca_timer_lock, flags); if (cca_timer->ccti < ccti_limit) { if (cca_timer->ccti + ccti_incr <= ccti_limit) @@ -1897,7 +1904,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, set_link_ipg(ppd); } - spin_unlock(&ppd->cca_timer_lock); + spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); ccti = cca_timer->ccti; @@ -1924,7 +1931,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, * * This is called from qp_rcv() to process an incoming RC packet * for the given QP. - * Called at interrupt level. + * May be called at interrupt level. */ void hfi1_rc_rcv(struct hfi1_packet *packet) { @@ -2383,7 +2390,7 @@ void hfi1_rc_hdrerr( struct hfi1_other_headers *ohdr; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); int diff; - u8 opcode; + u32 opcode; u32 psn; /* Check for GRH */ diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index a4115288d..49bc9fd7a 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -481,8 +481,8 @@ again: if (wqe->length == 0) break; if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_WRITE))) goto acc_err; qp->r_sge.sg_list = NULL; @@ -494,8 +494,8 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto inv_err; if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; release = 0; @@ -512,18 +512,18 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), - wqe->wr.wr.atomic.remote_addr, - wqe->wr.wr.atomic.rkey, + wqe->atomic_wr.remote_addr, + wqe->atomic_wr.rkey, IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = wqe->wr.wr.atomic.compare_add; + sdata = wqe->atomic_wr.compare_add; *(u64 *) sqp->s_sge.sge.vaddr = (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - sdata, wqe->wr.wr.atomic.swap); + sdata, wqe->atomic_wr.swap); hfi1_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; goto send_comp; @@ -695,19 +695,6 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, return sizeof(struct ib_grh) / sizeof(u32); } -/* - * free_ahg - clear ahg from QP - */ -void clear_ahg(struct hfi1_qp *qp) -{ - qp->s_hdr->ahgcount = 0; - qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR); - if (qp->s_sde) - sdma_ahg_free(qp->s_sde, qp->s_ahgidx); - qp->s_ahgidx = -1; - qp->s_sde = NULL; -} - #define BTH2_OFFSET (offsetof(struct hfi1_pio_header, hdr.u.oth.bth[2]) / 4) /** @@ -833,6 +820,9 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr, ohdr->bth[2] = cpu_to_be32(bth2); } +/* when sending, force a reschedule every one of these periods */ +#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */ + /** * hfi1_do_send - perform a send on a QP * @work: contains a pointer to the QP @@ -849,6 +839,7 @@ void hfi1_do_send(struct work_struct *work) struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); int (*make_req)(struct hfi1_qp *qp); unsigned long flags; + unsigned long timeout; if ((qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) && @@ -877,6 +868,7 @@ void hfi1_do_send(struct work_struct *work) spin_unlock_irqrestore(&qp->s_lock, flags); + timeout = jiffies + SEND_RESCHED_TIMEOUT; do { /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { @@ -890,6 +882,13 @@ void hfi1_do_send(struct work_struct *work) /* Record that s_hdr is empty. */ qp->s_hdrwords = 0; } + + /* allow other tasks to run */ + if (unlikely(time_after(jiffies, timeout))) { + cond_resched(); + ppd->dd->verbs_dev.n_send_schedule++; + timeout = jiffies + SEND_RESCHED_TIMEOUT; + } } while (make_req(qp)); } @@ -913,7 +912,7 @@ void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe, if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) || diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index aecd1a747..2a1da2189 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -55,6 +55,7 @@ #include <linux/bitops.h> #include <linux/timer.h> #include <linux/vmalloc.h> +#include <linux/highmem.h> #include "hfi.h" #include "common.h" @@ -64,7 +65,8 @@ #include "trace.h" /* must be a power of 2 >= 64 <= 32768 */ -#define SDMA_DESCQ_CNT 1024 +#define SDMA_DESCQ_CNT 2048 +#define SDMA_DESC_INTR 64 #define INVALID_TAIL 0xffff static uint sdma_descq_cnt = SDMA_DESCQ_CNT; @@ -79,6 +81,10 @@ uint mod_num_sdma; module_param_named(num_sdma, mod_num_sdma, uint, S_IRUGO); MODULE_PARM_DESC(num_sdma, "Set max number SDMA engines to use"); +static uint sdma_desct_intr = SDMA_DESC_INTR; +module_param_named(desct_intr, sdma_desct_intr, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt"); + #define SDMA_WAIT_BATCH_SIZE 20 /* max wait time for a SDMA engine to indicate it has halted */ #define SDMA_ERR_HALT_TIMEOUT 10 /* ms */ @@ -303,17 +309,26 @@ static void sdma_wait_for_packet_egress(struct sdma_engine *sde, u64 off = 8 * sde->this_idx; struct hfi1_devdata *dd = sde->dd; int lcnt = 0; + u64 reg_prev; + u64 reg = 0; while (1) { - u64 reg = read_csr(dd, off + SEND_EGRESS_SEND_DMA_STATUS); + reg_prev = reg; + reg = read_csr(dd, off + SEND_EGRESS_SEND_DMA_STATUS); reg &= SDMA_EGRESS_PACKET_OCCUPANCY_SMASK; reg >>= SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT; if (reg == 0) break; - if (lcnt++ > 100) { - dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u\n", + /* counter is reest if accupancy count changes */ + if (reg != reg_prev) + lcnt = 0; + if (lcnt++ > 500) { + /* timed out - bounce the link */ + dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n", __func__, sde->this_idx, (u32)reg); + queue_work(dd->pport->hfi1_wq, + &dd->pport->link_bounce_work); break; } udelay(1); @@ -369,16 +384,17 @@ static void sdma_flush(struct sdma_engine *sde) { struct sdma_txreq *txp, *txp_next; LIST_HEAD(flushlist); + unsigned long flags; /* flush from head to tail */ sdma_flush_descq(sde); - spin_lock(&sde->flushlist_lock); + spin_lock_irqsave(&sde->flushlist_lock, flags); /* copy flush list */ list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) { list_del_init(&txp->list); list_add_tail(&txp->list, &flushlist); } - spin_unlock(&sde->flushlist_lock); + spin_unlock_irqrestore(&sde->flushlist_lock, flags); /* flush from flush list */ list_for_each_entry_safe(txp, txp_next, &flushlist, list) { int drained = 0; @@ -741,6 +757,7 @@ u16 sdma_get_descq_cnt(void) return SDMA_DESCQ_CNT; return count; } + /** * sdma_select_engine_vl() - select sdma engine * @dd: devdata @@ -966,10 +983,7 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) sde->descq = NULL; sde->descq_phys = 0; } - if (is_vmalloc_addr(sde->tx_ring)) - vfree(sde->tx_ring); - else - kfree(sde->tx_ring); + kvfree(sde->tx_ring); sde->tx_ring = NULL; } spin_lock_irq(&dd->sde_map_lock); @@ -1038,6 +1052,9 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) return -ENOMEM; idle_cnt = ns_to_cclock(dd, idle_cnt); + if (!sdma_desct_intr) + sdma_desct_intr = SDMA_DESC_INTR; + /* Allocate memory for SendDMA descriptor FIFOs */ for (this_idx = 0; this_idx < num_engines; ++this_idx) { sde = &dd->per_sdma[this_idx]; @@ -1096,10 +1113,8 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) sde->progress_check_head = 0; - init_timer(&sde->err_progress_check_timer); - sde->err_progress_check_timer.function = - sdma_err_progress_check; - sde->err_progress_check_timer.data = (unsigned long)sde; + setup_timer(&sde->err_progress_check_timer, + sdma_err_progress_check, (unsigned long)sde); sde->descq = dma_zalloc_coherent( &dd->pcidev->dev, @@ -1540,7 +1555,7 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status) { trace_hfi1_sdma_engine_interrupt(sde, status); write_seqlock(&sde->head_lock); - sdma_set_desc_cnt(sde, sde->descq_cnt / 2); + sdma_set_desc_cnt(sde, sdma_desct_intr); sdma_make_progress(sde, status); write_sequnlock(&sde->head_lock); } @@ -2699,27 +2714,134 @@ static void __sdma_process_event(struct sdma_engine *sde, * of descriptors in the sdma_txreq is exhausted. * * The code will bump the allocation up to the max - * of MAX_DESC (64) descriptors. There doesn't seem - * much point in an interim step. + * of MAX_DESC (64) descriptors. There doesn't seem + * much point in an interim step. The last descriptor + * is reserved for coalesce buffer in order to support + * cases where input packet has >MAX_DESC iovecs. * */ -int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) +static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int i; + /* Handle last descriptor */ + if (unlikely((tx->num_desc == (MAX_DESC - 1)))) { + /* if tlen is 0, it is for padding, release last descriptor */ + if (!tx->tlen) { + tx->desc_limit = MAX_DESC; + } else if (!tx->coalesce_buf) { + /* allocate coalesce buffer with space for padding */ + tx->coalesce_buf = kmalloc(tx->tlen + sizeof(u32), + GFP_ATOMIC); + if (!tx->coalesce_buf) + return -ENOMEM; + + tx->coalesce_idx = 0; + } + return 0; + } + + if (unlikely(tx->num_desc == MAX_DESC)) + return -ENOMEM; + tx->descp = kmalloc_array( MAX_DESC, sizeof(struct sdma_desc), GFP_ATOMIC); if (!tx->descp) return -ENOMEM; - tx->desc_limit = MAX_DESC; + + /* reserve last descriptor for coalescing */ + tx->desc_limit = MAX_DESC - 1; /* copy ones already built */ for (i = 0; i < tx->num_desc; i++) tx->descp[i] = tx->descs[i]; return 0; } +/* + * ext_coal_sdma_tx_descs() - extend or coalesce sdma tx descriptors + * + * This is called once the initial nominal allocation of descriptors + * in the sdma_txreq is exhausted. + * + * This function calls _extend_sdma_tx_descs to extend or allocate + * coalesce buffer. If there is a allocated coalesce buffer, it will + * copy the input packet data into the coalesce buffer. It also adds + * coalesce buffer descriptor once whe whole packet is received. + * + * Return: + * <0 - error + * 0 - coalescing, don't populate descriptor + * 1 - continue with populating descriptor + */ +int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, + int type, void *kvaddr, struct page *page, + unsigned long offset, u16 len) +{ + int pad_len, rval; + dma_addr_t addr; + + rval = _extend_sdma_tx_descs(dd, tx); + if (rval) { + sdma_txclean(dd, tx); + return rval; + } + + /* If coalesce buffer is allocated, copy data into it */ + if (tx->coalesce_buf) { + if (type == SDMA_MAP_NONE) { + sdma_txclean(dd, tx); + return -EINVAL; + } + + if (type == SDMA_MAP_PAGE) { + kvaddr = kmap(page); + kvaddr += offset; + } else if (WARN_ON(!kvaddr)) { + sdma_txclean(dd, tx); + return -EINVAL; + } + + memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len); + tx->coalesce_idx += len; + if (type == SDMA_MAP_PAGE) + kunmap(page); + + /* If there is more data, return */ + if (tx->tlen - tx->coalesce_idx) + return 0; + + /* Whole packet is received; add any padding */ + pad_len = tx->packet_len & (sizeof(u32) - 1); + if (pad_len) { + pad_len = sizeof(u32) - pad_len; + memset(tx->coalesce_buf + tx->coalesce_idx, 0, pad_len); + /* padding is taken care of for coalescing case */ + tx->packet_len += pad_len; + tx->tlen += pad_len; + } + + /* dma map the coalesce buffer */ + addr = dma_map_single(&dd->pcidev->dev, + tx->coalesce_buf, + tx->tlen, + DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { + sdma_txclean(dd, tx); + return -ENOSPC; + } + + /* Add descriptor for coalesce buffer */ + tx->desc_limit = MAX_DESC; + return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx, + addr, tx->tlen); + } + + return 1; +} + /* Update sdes when the lmc changes */ void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid) { @@ -2745,13 +2867,15 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int rval = 0; + tx->num_desc++; if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); - if (rval) + if (rval) { + sdma_txclean(dd, tx); return rval; + } } - /* finish the one just added */ - tx->num_desc++; + /* finish the one just added */ make_tx_sdma_desc( tx, SDMA_MAP_NONE, diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index 496086903..cc22d2ee2 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -352,6 +352,8 @@ struct sdma_txreq { /* private: */ void *coalesce_buf; /* private: */ + u16 coalesce_idx; + /* private: */ struct iowait *wait; /* private: */ callback_t complete; @@ -735,7 +737,9 @@ static inline void make_tx_sdma_desc( } /* helper to extend txreq */ -int _extend_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *); +int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, + int type, void *kvaddr, struct page *page, + unsigned long offset, u16 len); int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *); void sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *); @@ -762,11 +766,6 @@ static inline int _sdma_txadd_daddr( { int rval = 0; - if ((unlikely(tx->num_desc == tx->desc_limit))) { - rval = _extend_sdma_tx_descs(dd, tx); - if (rval) - return rval; - } make_tx_sdma_desc( tx, type, @@ -798,9 +797,7 @@ static inline int _sdma_txadd_daddr( * * Return: * 0 - success, -ENOSPC - mapping fail, -ENOMEM - couldn't - * extend descriptor array or couldn't allocate coalesce - * buffer. - * + * extend/coalesce descriptor array */ static inline int sdma_txadd_page( struct hfi1_devdata *dd, @@ -809,17 +806,28 @@ static inline int sdma_txadd_page( unsigned long offset, u16 len) { - dma_addr_t addr = - dma_map_page( - &dd->pcidev->dev, - page, - offset, - len, - DMA_TO_DEVICE); + dma_addr_t addr; + int rval; + + if ((unlikely(tx->num_desc == tx->desc_limit))) { + rval = ext_coal_sdma_tx_descs(dd, tx, SDMA_MAP_PAGE, + NULL, page, offset, len); + if (rval <= 0) + return rval; + } + + addr = dma_map_page( + &dd->pcidev->dev, + page, + offset, + len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { sdma_txclean(dd, tx); return -ENOSPC; } + return _sdma_txadd_daddr( dd, SDMA_MAP_PAGE, tx, addr, len); } @@ -846,6 +854,15 @@ static inline int sdma_txadd_daddr( dma_addr_t addr, u16 len) { + int rval; + + if ((unlikely(tx->num_desc == tx->desc_limit))) { + rval = ext_coal_sdma_tx_descs(dd, tx, SDMA_MAP_NONE, + NULL, NULL, 0, 0); + if (rval <= 0) + return rval; + } + return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len); } @@ -862,7 +879,7 @@ static inline int sdma_txadd_daddr( * The mapping/unmapping of the kvaddr and len is automatically handled. * * Return: - * 0 - success, -ENOSPC - mapping fail, -ENOMEM - couldn't extend + * 0 - success, -ENOSPC - mapping fail, -ENOMEM - couldn't extend/coalesce * descriptor array */ static inline int sdma_txadd_kvaddr( @@ -871,16 +888,27 @@ static inline int sdma_txadd_kvaddr( void *kvaddr, u16 len) { - dma_addr_t addr = - dma_map_single( - &dd->pcidev->dev, - kvaddr, - len, - DMA_TO_DEVICE); + dma_addr_t addr; + int rval; + + if ((unlikely(tx->num_desc == tx->desc_limit))) { + rval = ext_coal_sdma_tx_descs(dd, tx, SDMA_MAP_SINGLE, + kvaddr, NULL, 0, len); + if (rval <= 0) + return rval; + } + + addr = dma_map_single( + &dd->pcidev->dev, + kvaddr, + len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { sdma_txclean(dd, tx); return -ENOSPC; } + return _sdma_txadd_daddr( dd, SDMA_MAP_SINGLE, tx, addr, len); } diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c index b78c72861..1dd6727dd 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/staging/rdma/hfi1/sysfs.c @@ -555,7 +555,7 @@ static ssize_t show_tempsense(struct device *device, container_of(device, struct hfi1_ibdev, ibdev.dev); struct hfi1_devdata *dd = dd_from_dev(dev); struct hfi1_temp temp; - int ret = -ENXIO; + int ret; ret = hfi1_tempsense_rd(dd, &temp); if (!ret) { diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c index 70ad7b9fc..f55b75194 100644 --- a/drivers/staging/rdma/hfi1/trace.c +++ b/drivers/staging/rdma/hfi1/trace.c @@ -126,13 +126,13 @@ const char *parse_everbs_hdrs( case OP(RC, ACKNOWLEDGE): trace_seq_printf(p, AETH_PRN, be32_to_cpu(eh->aeth) >> 24, - be32_to_cpu(eh->aeth) & HFI1_QPN_MASK); + be32_to_cpu(eh->aeth) & HFI1_MSN_MASK); break; /* aeth + atomicacketh */ case OP(RC, ATOMIC_ACKNOWLEDGE): trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN, (be32_to_cpu(eh->at.aeth) >> 24) & 0xff, - be32_to_cpu(eh->at.aeth) & HFI1_QPN_MASK, + be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK, (unsigned long long)ib_u64_get(eh->at.atomic_ack_eth)); break; /* atomiceth */ diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index d7851c0a0..57430295c 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -1252,37 +1252,61 @@ TRACE_EVENT(hfi1_sdma_state, #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rc -DECLARE_EVENT_CLASS(hfi1_sdma_rc, +DECLARE_EVENT_CLASS(hfi1_rc_template, TP_PROTO(struct hfi1_qp *qp, u32 psn), TP_ARGS(qp, psn), TP_STRUCT__entry( DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) __field(u32, qpn) - __field(u32, flags) + __field(u32, s_flags) __field(u32, psn) - __field(u32, sending_psn) - __field(u32, sending_hpsn) + __field(u32, s_psn) + __field(u32, s_next_psn) + __field(u32, s_sending_psn) + __field(u32, s_sending_hpsn) + __field(u32, r_psn) ), TP_fast_assign( DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) __entry->qpn = qp->ibqp.qp_num; - __entry->flags = qp->s_flags; + __entry->s_flags = qp->s_flags; __entry->psn = psn; - __entry->sending_psn = qp->s_sending_psn; - __entry->sending_hpsn = qp->s_sending_hpsn; + __entry->s_psn = qp->s_psn; + __entry->s_next_psn = qp->s_next_psn; + __entry->s_sending_psn = qp->s_sending_psn; + __entry->s_sending_hpsn = qp->s_sending_hpsn; + __entry->r_psn = qp->r_psn; ), TP_printk( - "[%s] qpn 0x%x flags 0x%x psn 0x%x sending_psn 0x%x sending_hpsn 0x%x", + "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x", __get_str(dev), __entry->qpn, - __entry->flags, + __entry->s_flags, __entry->psn, - __entry->sending_psn, - __entry->sending_psn + __entry->s_psn, + __entry->s_next_psn, + __entry->s_sending_psn, + __entry->s_sending_hpsn, + __entry->r_psn ) ); -DEFINE_EVENT(hfi1_sdma_rc, hfi1_rc_sendcomplete, +DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete, + TP_PROTO(struct hfi1_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +DEFINE_EVENT(hfi1_rc_template, hfi1_rc_ack, + TP_PROTO(struct hfi1_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +DEFINE_EVENT(hfi1_rc_template, hfi1_rc_timeout, + TP_PROTO(struct hfi1_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error, TP_PROTO(struct hfi1_qp *qp, u32 psn), TP_ARGS(qp, psn) ); diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index b536f3977..6095039c4 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -147,9 +147,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp) case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / 4; if (len > pmtu) { diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index d40d1a1e1..5a9c784be 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -80,7 +80,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) rcu_read_lock(); - qp = hfi1_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); + qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn); if (!qp) { ibp->n_pkt_drops++; rcu_read_unlock(); @@ -98,7 +98,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) goto drop; } - ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(swqe->ud_wr.ah)->attr; ppd = ppd_from_ibp(ibp); if (qp->ibqp.qp_num > 1) { @@ -128,8 +128,8 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) if (qp->ibqp.qp_num) { u32 qkey; - qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ? - sqp->qkey : swqe->wr.wr.ud.remote_qkey; + qkey = (int)swqe->ud_wr.remote_qkey < 0 ? + sqp->qkey : swqe->ud_wr.remote_qkey; if (unlikely(qkey != qp->qkey)) { u16 lid; @@ -234,7 +234,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) { if (sqp->ibqp.qp_type == IB_QPT_GSI || sqp->ibqp.qp_type == IB_QPT_SMI) - wc.pkey_index = swqe->wr.wr.ud.pkey_index; + wc.pkey_index = swqe->ud_wr.pkey_index; else wc.pkey_index = sqp->s_pkey_index; } else { @@ -309,7 +309,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) /* Construct the header. */ ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(wqe->ud_wr.ah)->attr; if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE || ah_attr->dlid == HFI1_PERMISSIVE_LID) { lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); @@ -401,18 +401,18 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) bth0 |= IB_BTH_SOLICITED; bth0 |= extra_bytes << 20; if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) - bth0 |= hfi1_get_pkey(ibp, wqe->wr.wr.ud.pkey_index); + bth0 |= hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); else bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(wqe->wr.wr.ud.remote_qpn); + ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++)); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? - qp->qkey : wqe->wr.wr.ud.remote_qkey); + ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? + qp->qkey : wqe->ud_wr.remote_qkey); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); /* disarm any ahg */ qp->s_hdr->ahgcount = 0; diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index 55526613a..36c838dcf 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -146,7 +146,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) /* Last packet in the request */ -#define USER_SDMA_TXREQ_FLAGS_LAST_PKT (1 << 0) +#define TXREQ_FLAGS_REQ_LAST_PKT (1 << 0) +#define TXREQ_FLAGS_IOVEC_LAST_PKT (1 << 0) #define SDMA_REQ_IN_USE 0 #define SDMA_REQ_FOR_THREAD 1 @@ -249,13 +250,22 @@ struct user_sdma_request { unsigned long flags; }; +/* + * A single txreq could span up to 3 physical pages when the MTU + * is sufficiently large (> 4K). Each of the IOV pointers also + * needs it's own set of flags so the vector has been handled + * independently of each other. + */ struct user_sdma_txreq { /* Packet header for the txreq */ struct hfi1_pkt_header hdr; struct sdma_txreq txreq; struct user_sdma_request *req; - struct user_sdma_iovec *iovec1; - struct user_sdma_iovec *iovec2; + struct { + struct user_sdma_iovec *vec; + u8 flags; + } iovecs[3]; + int idx; u16 flags; unsigned busycount; u64 seqnum; @@ -294,21 +304,6 @@ static int defer_packet_queue( unsigned seq); static void activate_packet_queue(struct iowait *, int); -static inline int iovec_may_free(struct user_sdma_iovec *iovec, - void (*free)(struct user_sdma_iovec *)) -{ - if (ACCESS_ONCE(iovec->offset) == iovec->iov.iov_len) { - free(iovec); - return 1; - } - return 0; -} - -static inline void iovec_set_complete(struct user_sdma_iovec *iovec) -{ - iovec->offset = iovec->iov.iov_len; -} - static int defer_packet_queue( struct sdma_engine *sde, struct iowait *wait, @@ -378,20 +373,14 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) dd = uctxt->dd; pq = kzalloc(sizeof(*pq), GFP_KERNEL); - if (!pq) { - dd_dev_err(dd, - "[%u:%u] Failed to allocate SDMA request struct\n", - uctxt->ctxt, subctxt_fp(fp)); + if (!pq) goto pq_nomem; - } + memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size; pq->reqs = kmalloc(memsize, GFP_KERNEL); - if (!pq->reqs) { - dd_dev_err(dd, - "[%u:%u] Failed to allocate SDMA request queue (%u)\n", - uctxt->ctxt, subctxt_fp(fp), memsize); + if (!pq->reqs) goto pq_reqs_nomem; - } + INIT_LIST_HEAD(&pq->list); pq->dd = dd; pq->ctxt = uctxt->ctxt; @@ -417,22 +406,15 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) } user_sdma_pkt_fp(fp) = pq; cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) { - dd_dev_err(dd, - "[%u:%u] Failed to allocate SDMA completion queue\n", - uctxt->ctxt, subctxt_fp(fp)); + if (!cq) goto cq_nomem; - } memsize = ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size, PAGE_SIZE); cq->comps = vmalloc_user(memsize); - if (!cq->comps) { - dd_dev_err(dd, - "[%u:%u] Failed to allocate SDMA completion queue entries\n", - uctxt->ctxt, subctxt_fp(fp)); + if (!cq->comps) goto cq_comps_nomem; - } + cq->nentries = hfi1_sdma_comp_ring_size; user_sdma_comp_fp(fp) = cq; @@ -486,8 +468,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) } kfree(pq->reqs); } - if (pq->txreq_cache) - kmem_cache_destroy(pq->txreq_cache); + kmem_cache_destroy(pq->txreq_cache); kfree(pq); fd->pq = NULL; } @@ -839,11 +820,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) tx->flags = 0; tx->req = req; tx->busycount = 0; - tx->iovec1 = NULL; - tx->iovec2 = NULL; + tx->idx = -1; + memset(tx->iovecs, 0, sizeof(tx->iovecs)); if (req->seqnum == req->info.npkts - 1) - tx->flags |= USER_SDMA_TXREQ_FLAGS_LAST_PKT; + tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT; /* * Calculate the payload size - this is min of the fragment @@ -872,7 +853,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) goto free_tx; } - tx->iovec1 = iovec; + tx->iovecs[++tx->idx].vec = iovec; datalen = compute_data_length(req, tx); if (!datalen) { SDMA_DBG(req, @@ -962,10 +943,17 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) iovec->pages[pageidx], offset, len); if (ret) { + int i; + dd_dev_err(pq->dd, "SDMA txreq add page failed %d\n", ret); - iovec_set_complete(iovec); + /* Mark all assigned vectors as complete so they + * are unpinned in the callback. */ + for (i = tx->idx; i >= 0; i--) { + tx->iovecs[i].flags |= + TXREQ_FLAGS_IOVEC_LAST_PKT; + } goto free_txreq; } iov_offset += len; @@ -973,8 +961,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) data_sent += len; if (unlikely(queued < datalen && pageidx == iovec->npages && - req->iov_idx < req->data_iovs - 1)) { + req->iov_idx < req->data_iovs - 1 && + tx->idx < ARRAY_SIZE(tx->iovecs))) { iovec->offset += iov_offset; + tx->iovecs[tx->idx].flags |= + TXREQ_FLAGS_IOVEC_LAST_PKT; iovec = &req->iovs[++req->iov_idx]; if (!iovec->pages) { ret = pin_vector_pages(req, iovec); @@ -982,8 +973,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) goto free_txreq; } iov_offset = 0; - tx->iovec2 = iovec; - + tx->iovecs[++tx->idx].vec = iovec; } } /* @@ -995,11 +985,15 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) req->tidoffset += datalen; req->sent += data_sent; if (req->data_len) { - if (tx->iovec1 && !tx->iovec2) - tx->iovec1->offset += iov_offset; - else if (tx->iovec2) - tx->iovec2->offset += iov_offset; + tx->iovecs[tx->idx].vec->offset += iov_offset; + /* If we've reached the end of the io vector, mark it + * so the callback can unpin the pages and free it. */ + if (tx->iovecs[tx->idx].vec->offset == + tx->iovecs[tx->idx].vec->iov.iov_len) + tx->iovecs[tx->idx].flags |= + TXREQ_FLAGS_IOVEC_LAST_PKT; } + /* * It is important to increment this here as it is used to * generate the BTH.PSN and, therefore, can't be bulk-updated @@ -1051,8 +1045,8 @@ static int pin_vector_pages(struct user_sdma_request *req, unsigned pinned; iovec->npages = num_user_pages(&iovec->iov); - iovec->pages = kzalloc(sizeof(*iovec->pages) * - iovec->npages, GFP_KERNEL); + iovec->pages = kcalloc(iovec->npages, sizeof(*iovec->pages), + GFP_KERNEL); if (!iovec->pages) { SDMA_DBG(req, "Failed page array alloc"); ret = -ENOMEM; @@ -1228,7 +1222,7 @@ static int set_txreq_header(struct user_sdma_request *req, req->seqnum)); /* Set ACK request on last packet */ - if (unlikely(tx->flags & USER_SDMA_TXREQ_FLAGS_LAST_PKT)) + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) hdr->bth[2] |= cpu_to_be32(1UL<<31); /* Set the new offset */ @@ -1260,7 +1254,7 @@ static int set_txreq_header(struct user_sdma_request *req, KDETH_SET(hdr->kdeth.ver_tid_offset, TID, EXP_TID_GET(tidval, IDX)); /* Clear KDETH.SH only on the last packet */ - if (unlikely(tx->flags & USER_SDMA_TXREQ_FLAGS_LAST_PKT)) + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0); /* * Set the KDETH.OFFSET and KDETH.OM based on size of @@ -1304,7 +1298,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, /* BTH.PSN and BTH.A */ val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) & (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); - if (unlikely(tx->flags & USER_SDMA_TXREQ_FLAGS_LAST_PKT)) + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) val32 |= 1UL << 31; AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); @@ -1345,7 +1339,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | (EXP_TID_GET(tidval, IDX) & 0x3ff)); /* Clear KDETH.SH on last packet */ - if (unlikely(tx->flags & USER_SDMA_TXREQ_FLAGS_LAST_PKT)) { + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) { val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset, INTR) >> 16); val &= cpu_to_le16(~(1U << 13)); @@ -1372,10 +1366,16 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status, if (unlikely(!req || !pq)) return; - if (tx->iovec1) - iovec_may_free(tx->iovec1, unpin_vector_pages); - if (tx->iovec2) - iovec_may_free(tx->iovec2, unpin_vector_pages); + /* If we have any io vectors associated with this txreq, + * check whether they need to be 'freed'. */ + if (tx->idx != -1) { + int i; + + for (i = tx->idx; i >= 0; i--) { + if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) + unpin_vector_pages(tx->iovecs[i].vec); + } + } tx_seqnum = tx->seqnum; kmem_cache_free(pq->txreq_cache, tx); diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 41bb59eb0..9beb0aa87 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -129,6 +129,9 @@ static void verbs_sdma_complete( int status, int drained); +/* Length of buffer to create verbs txreq cache name */ +#define TXREQ_NAME_LEN 24 + /* * Note that it is OK to post send work requests in the SQE and ERR * states; hfi1_do_send() will process them and generate error @@ -380,9 +383,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) * undefined operations. * Make sure buffer is large enough to hold the result for atomics. */ - if (wr->opcode == IB_WR_FAST_REG_MR) { - return -EINVAL; - } else if (qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_UC) { if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) return -EINVAL; } else if (qp->ibqp.qp_type != IB_QPT_RC) { @@ -391,7 +392,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) wr->opcode != IB_WR_SEND_WITH_IMM) return -EINVAL; /* Check UD destination address PD */ - if (qp->ibqp.pd != wr->wr.ud.ah->pd) + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) return -EINVAL; } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) return -EINVAL; @@ -412,7 +413,21 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) rkt = &to_idev(qp->ibqp.device)->lk_table; pd = to_ipd(qp->ibqp.pd); wqe = get_swqe_ptr(qp, qp->s_head); - wqe->wr = *wr; + + + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) + memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE || + wr->opcode == IB_WR_RDMA_READ) + memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); + else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); + else + memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + wqe->length = 0; j = 0; if (wr->num_sge) { @@ -438,7 +453,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) if (wqe->length > 0x80000000U) goto bail_inval_free; } else { - struct hfi1_ah *ah = to_iah(wr->wr.ud.ah); + struct hfi1_ah *ah = to_iah(ud_wr(wr)->ah); atomic_inc(&ah->refcount); } @@ -597,6 +612,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; + unsigned long flags; u32 qp_num; int lnh; u8 opcode; @@ -639,10 +655,10 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; - spin_lock(&packet->qp->r_lock); + spin_lock_irqsave(&packet->qp->r_lock, flags); if (likely((qp_ok(opcode, packet)))) opcode_handler_tbl[opcode](packet); - spin_unlock(&packet->qp->r_lock); + spin_unlock_irqrestore(&packet->qp->r_lock, flags); } /* * Notify hfi1_multicast_detach() if it is waiting for us @@ -657,10 +673,10 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) rcu_read_unlock(); goto drop; } - spin_lock(&packet->qp->r_lock); + spin_lock_irqsave(&packet->qp->r_lock, flags); if (likely((qp_ok(opcode, packet)))) opcode_handler_tbl[opcode](packet); - spin_unlock(&packet->qp->r_lock); + spin_unlock_irqrestore(&packet->qp->r_lock, flags); rcu_read_unlock(); } return; @@ -1199,6 +1215,7 @@ pio_bail: } return 0; } + /* * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent * being an entry from the ingress partition key table), return 0 @@ -1884,7 +1901,7 @@ static void init_ibport(struct hfi1_pportdata *ppd) static void verbs_txreq_kmem_cache_ctor(void *obj) { - struct verbs_txreq *tx = (struct verbs_txreq *)obj; + struct verbs_txreq *tx = obj; memset(tx, 0, sizeof(*tx)); } @@ -1903,6 +1920,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) int ret; size_t lcpysz = IB_DEVICE_NAME_MAX; u16 descq_cnt; + char buf[TXREQ_NAME_LEN]; ret = hfi1_qp_init(dev); if (ret) @@ -1956,8 +1974,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) descq_cnt = sdma_get_descq_cnt(); + snprintf(buf, sizeof(buf), "hfi1_%u_vtxreq_cache", dd->unit); /* SLAB_HWCACHE_ALIGN for AHG */ - dev->verbs_txreq_cache = kmem_cache_create("hfi1_vtxreq_cache", + dev->verbs_txreq_cache = kmem_cache_create(buf, sizeof(struct verbs_txreq), 0, SLAB_HWCACHE_ALIGN, verbs_txreq_kmem_cache_ctor); @@ -2048,8 +2067,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->reg_user_mr = hfi1_reg_user_mr; ibdev->dereg_mr = hfi1_dereg_mr; ibdev->alloc_mr = hfi1_alloc_mr; - ibdev->alloc_fast_reg_page_list = hfi1_alloc_fast_reg_page_list; - ibdev->free_fast_reg_page_list = hfi1_free_fast_reg_page_list; ibdev->alloc_fmr = hfi1_alloc_fmr; ibdev->map_phys_fmr = hfi1_map_phys_fmr; ibdev->unmap_fmr = hfi1_unmap_fmr; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index ed903a93b..041ad07ee 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -348,7 +348,12 @@ struct hfi1_mr { * in qp->s_max_sge. */ struct hfi1_swqe { - struct ib_send_wr wr; /* don't use wr.sg_list */ + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + struct ib_ud_wr ud_wr; + }; u32 psn; /* first packet sequence number */ u32 lpsn; /* last packet sequence number */ u32 ssn; /* send sequence number */ @@ -754,6 +759,7 @@ struct hfi1_ibdev { u64 n_piowait; u64 n_txwait; u64 n_kmem_wait; + u64 n_send_schedule; u32 n_pds_allocated; /* number of PDs allocated for device */ spinlock_t n_pds_lock; @@ -1020,13 +1026,6 @@ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_entries); -struct ib_fast_reg_page_list *hfi1_alloc_fast_reg_page_list( - struct ib_device *ibdev, int page_list_len); - -void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl); - -int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr); - struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); @@ -1078,8 +1077,6 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); -void clear_ahg(struct hfi1_qp *qp); - void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr, u32 bth0, u32 bth2, int middle); |