diff options
Diffstat (limited to 'drivers/infiniband/hw/ipath')
37 files changed, 0 insertions, 30491 deletions
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig deleted file mode 100644 index 8fe54ff00..000000000 --- a/drivers/infiniband/hw/ipath/Kconfig +++ /dev/null @@ -1,14 +0,0 @@ -config INFINIBAND_IPATH - tristate "QLogic HTX HCA support" - depends on 64BIT && NET && HT_IRQ - ---help--- - This is a driver for the obsolete QLogic Hyper-Transport - IB host channel adapter (model QHT7140), - including InfiniBand verbs support. This driver allows these - devices to be used with both kernel upper level protocols such - as IP-over-InfiniBand as well as with userspace applications - (in conjunction with InfiniBand userspace access). - For QLogic PCIe QLE based cards, use the QIB driver instead. - - If you have this hardware you will need to boot with PAT disabled - on your x86-64 systems, use the nopat kernel parameter. diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile deleted file mode 100644 index 4496f2820..000000000 --- a/drivers/infiniband/hw/ipath/Makefile +++ /dev/null @@ -1,37 +0,0 @@ -ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \ - -DIPATH_KERN_TYPE=0 - -obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o - -ib_ipath-y := \ - ipath_cq.o \ - ipath_diag.o \ - ipath_dma.o \ - ipath_driver.o \ - ipath_eeprom.o \ - ipath_file_ops.o \ - ipath_fs.o \ - ipath_init_chip.o \ - ipath_intr.o \ - ipath_keys.o \ - ipath_mad.o \ - ipath_mmap.o \ - ipath_mr.o \ - ipath_qp.o \ - ipath_rc.o \ - ipath_ruc.o \ - ipath_sdma.o \ - ipath_srq.o \ - ipath_stats.o \ - ipath_sysfs.o \ - ipath_uc.o \ - ipath_ud.o \ - ipath_user_pages.o \ - ipath_user_sdma.o \ - ipath_verbs_mcast.o \ - ipath_verbs.o - -ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o - -ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o -ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h deleted file mode 100644 index 28cfe97cf..000000000 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ /dev/null @@ -1,851 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _IPATH_COMMON_H -#define _IPATH_COMMON_H - -/* - * This file contains defines, structures, etc. that are used - * to communicate between kernel and user code. - */ - - -/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */ -#define IPATH_SRC_OUI_1 0x00 -#define IPATH_SRC_OUI_2 0x11 -#define IPATH_SRC_OUI_3 0x75 - -/* version of protocol header (known to chip also). In the long run, - * we should be able to generate and accept a range of version numbers; - * for now we only accept one, and it's compiled in. - */ -#define IPS_PROTO_VERSION 2 - -/* - * These are compile time constants that you may want to enable or disable - * if you are trying to debug problems with code or performance. - * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in - * fastpath code - * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be - * traced in faspath code - * _IPATH_TRACING define as 0 if you want to remove all tracing in a - * compilation unit - * _IPATH_DEBUGGING define as 0 if you want to remove debug prints - */ - -/* - * The value in the BTH QP field that InfiniPath uses to differentiate - * an infinipath protocol IB packet vs standard IB transport - */ -#define IPATH_KD_QP 0x656b79 - -/* - * valid states passed to ipath_set_linkstate() user call - */ -#define IPATH_IB_LINKDOWN 0 -#define IPATH_IB_LINKARM 1 -#define IPATH_IB_LINKACTIVE 2 -#define IPATH_IB_LINKDOWN_ONLY 3 -#define IPATH_IB_LINKDOWN_SLEEP 4 -#define IPATH_IB_LINKDOWN_DISABLE 5 -#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */ -#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */ -#define IPATH_IB_LINK_NO_HRTBT 8 /* disable Heartbeat, e.g. for loopback */ -#define IPATH_IB_LINK_HRTBT 9 /* enable heartbeat, normal, non-loopback */ - -/* - * These 3 values (SDR and DDR may be ORed for auto-speed - * negotiation) are used for the 3rd argument to path_f_set_ib_cfg - * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs. They - * are also the the possible values for ipath_link_speed_enabled and active - * The values were chosen to match values used within the IB spec. - */ -#define IPATH_IB_SDR 1 -#define IPATH_IB_DDR 2 - -/* - * stats maintained by the driver. For now, at least, this is global - * to all minor devices. - */ -struct infinipath_stats { - /* number of interrupts taken */ - __u64 sps_ints; - /* number of interrupts for errors */ - __u64 sps_errints; - /* number of errors from chip (not incl. packet errors or CRC) */ - __u64 sps_errs; - /* number of packet errors from chip other than CRC */ - __u64 sps_pkterrs; - /* number of packets with CRC errors (ICRC and VCRC) */ - __u64 sps_crcerrs; - /* number of hardware errors reported (parity, etc.) */ - __u64 sps_hwerrs; - /* number of times IB link changed state unexpectedly */ - __u64 sps_iblink; - __u64 sps_unused; /* was fastrcvint, no longer implemented */ - /* number of kernel (port0) packets received */ - __u64 sps_port0pkts; - /* number of "ethernet" packets sent by driver */ - __u64 sps_ether_spkts; - /* number of "ethernet" packets received by driver */ - __u64 sps_ether_rpkts; - /* number of SMA packets sent by driver. Obsolete. */ - __u64 sps_sma_spkts; - /* number of SMA packets received by driver. Obsolete. */ - __u64 sps_sma_rpkts; - /* number of times all ports rcvhdrq was full and packet dropped */ - __u64 sps_hdrqfull; - /* number of times all ports egrtid was full and packet dropped */ - __u64 sps_etidfull; - /* - * number of times we tried to send from driver, but no pio buffers - * avail - */ - __u64 sps_nopiobufs; - /* number of ports currently open */ - __u64 sps_ports; - /* list of pkeys (other than default) accepted (0 means not set) */ - __u16 sps_pkeys[4]; - __u16 sps_unused16[4]; /* available; maintaining compatible layout */ - /* number of user ports per chip (not IB ports) */ - __u32 sps_nports; - /* not our interrupt, or already handled */ - __u32 sps_nullintr; - /* max number of packets handled per receive call */ - __u32 sps_maxpkts_call; - /* avg number of packets handled per receive call */ - __u32 sps_avgpkts_call; - /* total number of pages locked */ - __u64 sps_pagelocks; - /* total number of pages unlocked */ - __u64 sps_pageunlocks; - /* - * Number of packets dropped in kernel other than errors (ether - * packets if ipath not configured, etc.) - */ - __u64 sps_krdrops; - __u64 sps_txeparity; /* PIO buffer parity error, recovered */ - /* pad for future growth */ - __u64 __sps_pad[45]; -}; - -/* - * These are the status bits readable (in ascii form, 64bit value) - * from the "status" sysfs file. - */ -#define IPATH_STATUS_INITTED 0x1 /* basic initialization done */ -#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */ -/* Device has been disabled via admin request */ -#define IPATH_STATUS_ADMIN_DISABLED 0x4 -/* Chip has been found and initted */ -#define IPATH_STATUS_CHIP_PRESENT 0x20 -/* IB link is at ACTIVE, usable for data traffic */ -#define IPATH_STATUS_IB_READY 0x40 -/* link is configured, LID, MTU, etc. have been set */ -#define IPATH_STATUS_IB_CONF 0x80 -/* no link established, probably no cable */ -#define IPATH_STATUS_IB_NOCABLE 0x100 -/* A Fatal hardware error has occurred. */ -#define IPATH_STATUS_HWERROR 0x200 - -/* - * The list of usermode accessible registers. Also see Reg_* later in file. - */ -typedef enum _ipath_ureg { - /* (RO) DMA RcvHdr to be used next. */ - ur_rcvhdrtail = 0, - /* (RW) RcvHdr entry to be processed next by host. */ - ur_rcvhdrhead = 1, - /* (RO) Index of next Eager index to use. */ - ur_rcvegrindextail = 2, - /* (RW) Eager TID to be processed next */ - ur_rcvegrindexhead = 3, - /* For internal use only; max register number. */ - _IPATH_UregMax -} ipath_ureg; - -/* bit values for spi_runtime_flags */ -#define IPATH_RUNTIME_HT 0x1 -#define IPATH_RUNTIME_PCIE 0x2 -#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4 -#define IPATH_RUNTIME_RCVHDR_COPY 0x8 -#define IPATH_RUNTIME_MASTER 0x10 -#define IPATH_RUNTIME_NODMA_RTAIL 0x80 -#define IPATH_RUNTIME_SDMA 0x200 -#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400 -#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800 - -/* - * This structure is returned by ipath_userinit() immediately after - * open to get implementation-specific info, and info specific to this - * instance. - * - * This struct must have explict pad fields where type sizes - * may result in different alignments between 32 and 64 bit - * programs, since the 64 bit * bit kernel requires the user code - * to have matching offsets - */ -struct ipath_base_info { - /* version of hardware, for feature checking. */ - __u32 spi_hw_version; - /* version of software, for feature checking. */ - __u32 spi_sw_version; - /* InfiniPath port assigned, goes into sent packets */ - __u16 spi_port; - __u16 spi_subport; - /* - * IB MTU, packets IB data must be less than this. - * The MTU is in bytes, and will be a multiple of 4 bytes. - */ - __u32 spi_mtu; - /* - * Size of a PIO buffer. Any given packet's total size must be less - * than this (in words). Included is the starting control word, so - * if 513 is returned, then total pkt size is 512 words or less. - */ - __u32 spi_piosize; - /* size of the TID cache in infinipath, in entries */ - __u32 spi_tidcnt; - /* size of the TID Eager list in infinipath, in entries */ - __u32 spi_tidegrcnt; - /* size of a single receive header queue entry in words. */ - __u32 spi_rcvhdrent_size; - /* - * Count of receive header queue entries allocated. - * This may be less than the spu_rcvhdrcnt passed in!. - */ - __u32 spi_rcvhdr_cnt; - - /* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */ - __u32 spi_runtime_flags; - - /* address where receive buffer queue is mapped into */ - __u64 spi_rcvhdr_base; - - /* user program. */ - - /* base address of eager TID receive buffers. */ - __u64 spi_rcv_egrbufs; - - /* Allocated by initialization code, not by protocol. */ - - /* - * Size of each TID buffer in host memory, starting at - * spi_rcv_egrbufs. The buffers are virtually contiguous. - */ - __u32 spi_rcv_egrbufsize; - /* - * The special QP (queue pair) value that identifies an infinipath - * protocol packet from standard IB packets. More, probably much - * more, to be added. - */ - __u32 spi_qpair; - - /* - * User register base for init code, not to be used directly by - * protocol or applications. - */ - __u64 __spi_uregbase; - /* - * Maximum buffer size in bytes that can be used in a single TID - * entry (assuming the buffer is aligned to this boundary). This is - * the minimum of what the hardware and software support Guaranteed - * to be a power of 2. - */ - __u32 spi_tid_maxsize; - /* - * alignment of each pio send buffer (byte count - * to add to spi_piobufbase to get to second buffer) - */ - __u32 spi_pioalign; - /* - * The index of the first pio buffer available to this process; - * needed to do lookup in spi_pioavailaddr; not added to - * spi_piobufbase. - */ - __u32 spi_pioindex; - /* number of buffers mapped for this process */ - __u32 spi_piocnt; - - /* - * Base address of writeonly pio buffers for this process. - * Each buffer has spi_piosize words, and is aligned on spi_pioalign - * boundaries. spi_piocnt buffers are mapped from this address - */ - __u64 spi_piobufbase; - - /* - * Base address of readonly memory copy of the pioavail registers. - * There are 2 bits for each buffer. - */ - __u64 spi_pioavailaddr; - - /* - * Address where driver updates a copy of the interface and driver - * status (IPATH_STATUS_*) as a 64 bit value. It's followed by a - * string indicating hardware error, if there was one. - */ - __u64 spi_status; - - /* number of chip ports available to user processes */ - __u32 spi_nports; - /* unit number of chip we are using */ - __u32 spi_unit; - /* num bufs in each contiguous set */ - __u32 spi_rcv_egrperchunk; - /* size in bytes of each contiguous set */ - __u32 spi_rcv_egrchunksize; - /* total size of mmap to cover full rcvegrbuffers */ - __u32 spi_rcv_egrbuftotlen; - __u32 spi_filler_for_align; - /* address of readonly memory copy of the rcvhdrq tail register. */ - __u64 spi_rcvhdr_tailaddr; - - /* shared memory pages for subports if port is shared */ - __u64 spi_subport_uregbase; - __u64 spi_subport_rcvegrbuf; - __u64 spi_subport_rcvhdr_base; - - /* shared memory page for hardware port if it is shared */ - __u64 spi_port_uregbase; - __u64 spi_port_rcvegrbuf; - __u64 spi_port_rcvhdr_base; - __u64 spi_port_rcvhdr_tailaddr; - -} __attribute__ ((aligned(8))); - - -/* - * This version number is given to the driver by the user code during - * initialization in the spu_userversion field of ipath_user_info, so - * the driver can check for compatibility with user code. - * - * The major version changes when data structures - * change in an incompatible way. The driver must be the same or higher - * for initialization to succeed. In some cases, a higher version - * driver will not interoperate with older software, and initialization - * will return an error. - */ -#define IPATH_USER_SWMAJOR 1 - -/* - * Minor version differences are always compatible - * a within a major version, however if user software is larger - * than driver software, some new features and/or structure fields - * may not be implemented; the user code must deal with this if it - * cares, or it must abort after initialization reports the difference. - */ -#define IPATH_USER_SWMINOR 6 - -#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) - -#define IPATH_KERN_TYPE 0 - -/* - * Similarly, this is the kernel version going back to the user. It's - * slightly different, in that we want to tell if the driver was built as - * part of a QLogic release, or from the driver from openfabrics.org, - * kernel.org, or a standard distribution, for support reasons. - * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied. - * - * It's returned by the driver to the user code during initialization in the - * spi_sw_version field of ipath_base_info, so the user code can in turn - * check for compatibility with the kernel. -*/ -#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION) - -/* - * This structure is passed to ipath_userinit() to tell the driver where - * user code buffers are, sizes, etc. The offsets and sizes of the - * fields must remain unchanged, for binary compatibility. It can - * be extended, if userversion is changed so user code can tell, if needed - */ -struct ipath_user_info { - /* - * version of user software, to detect compatibility issues. - * Should be set to IPATH_USER_SWVERSION. - */ - __u32 spu_userversion; - - /* desired number of receive header queue entries */ - __u32 spu_rcvhdrcnt; - - /* size of struct base_info to write to */ - __u32 spu_base_info_size; - - /* - * number of words in KD protocol header - * This tells InfiniPath how many words to copy to rcvhdrq. If 0, - * kernel uses a default. Once set, attempts to set any other value - * are an error (EAGAIN) until driver is reloaded. - */ - __u32 spu_rcvhdrsize; - - /* - * If two or more processes wish to share a port, each process - * must set the spu_subport_cnt and spu_subport_id to the same - * values. The only restriction on the spu_subport_id is that - * it be unique for a given node. - */ - __u16 spu_subport_cnt; - __u16 spu_subport_id; - - __u32 spu_unused; /* kept for compatible layout */ - - /* - * address of struct base_info to write to - */ - __u64 spu_base_info; - -} __attribute__ ((aligned(8))); - -/* User commands. */ - -#define IPATH_CMD_MIN 16 - -#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */ -#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */ -#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */ -#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ -#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ -#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ -#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */ -#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */ -#define IPATH_CMD_USER_INIT 24 /* set up userspace */ -#define IPATH_CMD_UNUSED_1 25 -#define IPATH_CMD_UNUSED_2 26 -#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ -#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */ -#define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */ -/* 30 is unused */ -#define IPATH_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */ -#define IPATH_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */ - -/* - * Poll types - */ -#define IPATH_POLL_TYPE_URGENT 0x01 -#define IPATH_POLL_TYPE_OVERFLOW 0x02 - -struct ipath_port_info { - __u32 num_active; /* number of active units */ - __u32 unit; /* unit (chip) assigned to caller */ - __u16 port; /* port on unit assigned to caller */ - __u16 subport; /* subport on unit assigned to caller */ - __u16 num_ports; /* number of ports available on unit */ - __u16 num_subports; /* number of subports opened on port */ -}; - -struct ipath_tid_info { - __u32 tidcnt; - /* make structure same size in 32 and 64 bit */ - __u32 tid__unused; - /* virtual address of first page in transfer */ - __u64 tidvaddr; - /* pointer (same size 32/64 bit) to __u16 tid array */ - __u64 tidlist; - - /* - * pointer (same size 32/64 bit) to bitmap of TIDs used - * for this call; checked for being large enough at open - */ - __u64 tidmap; -}; - -struct ipath_cmd { - __u32 type; /* command type */ - union { - struct ipath_tid_info tid_info; - struct ipath_user_info user_info; - - /* - * address in userspace where we should put the sdma - * inflight counter - */ - __u64 sdma_inflight; - /* - * address in userspace where we should put the sdma - * completion counter - */ - __u64 sdma_complete; - /* address in userspace of struct ipath_port_info to - write result to */ - __u64 port_info; - /* enable/disable receipt of packets */ - __u32 recv_ctrl; - /* enable/disable armlaunch errors (non-zero to enable) */ - __u32 armlaunch_ctrl; - /* partition key to set */ - __u16 part_key; - /* user address of __u32 bitmask of active slaves */ - __u64 slave_mask_addr; - /* type of polling we want */ - __u16 poll_type; - } cmd; -}; - -struct ipath_iovec { - /* Pointer to data, but same size 32 and 64 bit */ - __u64 iov_base; - - /* - * Length of data; don't need 64 bits, but want - * ipath_sendpkt to remain same size as before 32 bit changes, so... - */ - __u64 iov_len; -}; - -/* - * Describes a single packet for send. Each packet can have one or more - * buffers, but the total length (exclusive of IB headers) must be less - * than the MTU, and if using the PIO method, entire packet length, - * including IB headers, must be less than the ipath_piosize value (words). - * Use of this necessitates including sys/uio.h - */ -struct __ipath_sendpkt { - __u32 sps_flags; /* flags for packet (TBD) */ - __u32 sps_cnt; /* number of entries to use in sps_iov */ - /* array of iov's describing packet. TEMPORARY */ - struct ipath_iovec sps_iov[4]; -}; - -/* - * diagnostics can send a packet by "writing" one of the following - * two structs to diag data special file - * The first is the legacy version for backward compatibility - */ -struct ipath_diag_pkt { - __u32 unit; - __u64 data; - __u32 len; -}; - -/* The second diag_pkt struct is the expanded version that allows - * more control over the packet, specifically, by allowing a custom - * pbc (+ static rate) qword, so that special modes and deliberate - * changes to CRCs can be used. The elements were also re-ordered - * for better alignment and to avoid padding issues. - */ -struct ipath_diag_xpkt { - __u64 data; - __u64 pbc_wd; - __u32 unit; - __u32 len; -}; - -/* - * Data layout in I2C flash (for GUID, etc.) - * All fields are little-endian binary unless otherwise stated - */ -#define IPATH_FLASH_VERSION 2 -struct ipath_flash { - /* flash layout version (IPATH_FLASH_VERSION) */ - __u8 if_fversion; - /* checksum protecting if_length bytes */ - __u8 if_csum; - /* - * valid length (in use, protected by if_csum), including - * if_fversion and if_csum themselves) - */ - __u8 if_length; - /* the GUID, in network order */ - __u8 if_guid[8]; - /* number of GUIDs to use, starting from if_guid */ - __u8 if_numguid; - /* the (last 10 characters of) board serial number, in ASCII */ - char if_serial[12]; - /* board mfg date (YYYYMMDD ASCII) */ - char if_mfgdate[8]; - /* last board rework/test date (YYYYMMDD ASCII) */ - char if_testdate[8]; - /* logging of error counts, TBD */ - __u8 if_errcntp[4]; - /* powered on hours, updated at driver unload */ - __u8 if_powerhour[2]; - /* ASCII free-form comment field */ - char if_comment[32]; - /* Backwards compatible prefix for longer QLogic Serial Numbers */ - char if_sprefix[4]; - /* 82 bytes used, min flash size is 128 bytes */ - __u8 if_future[46]; -}; - -/* - * These are the counters implemented in the chip, and are listed in order. - * The InterCaps naming is taken straight from the chip spec. - */ -struct infinipath_counters { - __u64 LBIntCnt; - __u64 LBFlowStallCnt; - __u64 TxSDmaDescCnt; /* was Reserved1 */ - __u64 TxUnsupVLErrCnt; - __u64 TxDataPktCnt; - __u64 TxFlowPktCnt; - __u64 TxDwordCnt; - __u64 TxLenErrCnt; - __u64 TxMaxMinLenErrCnt; - __u64 TxUnderrunCnt; - __u64 TxFlowStallCnt; - __u64 TxDroppedPktCnt; - __u64 RxDroppedPktCnt; - __u64 RxDataPktCnt; - __u64 RxFlowPktCnt; - __u64 RxDwordCnt; - __u64 RxLenErrCnt; - __u64 RxMaxMinLenErrCnt; - __u64 RxICRCErrCnt; - __u64 RxVCRCErrCnt; - __u64 RxFlowCtrlErrCnt; - __u64 RxBadFormatCnt; - __u64 RxLinkProblemCnt; - __u64 RxEBPCnt; - __u64 RxLPCRCErrCnt; - __u64 RxBufOvflCnt; - __u64 RxTIDFullErrCnt; - __u64 RxTIDValidErrCnt; - __u64 RxPKeyMismatchCnt; - __u64 RxP0HdrEgrOvflCnt; - __u64 RxP1HdrEgrOvflCnt; - __u64 RxP2HdrEgrOvflCnt; - __u64 RxP3HdrEgrOvflCnt; - __u64 RxP4HdrEgrOvflCnt; - __u64 RxP5HdrEgrOvflCnt; - __u64 RxP6HdrEgrOvflCnt; - __u64 RxP7HdrEgrOvflCnt; - __u64 RxP8HdrEgrOvflCnt; - __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */ - __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */ - __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 IBStatusChangeCnt; - __u64 IBLinkErrRecoveryCnt; - __u64 IBLinkDownedCnt; - __u64 IBSymbolErrCnt; - /* The following are new for IBA7220 */ - __u64 RxVL15DroppedPktCnt; - __u64 RxOtherLocalPhyErrCnt; - __u64 PcieRetryBufDiagQwordCnt; - __u64 ExcessBufferOvflCnt; - __u64 LocalLinkIntegrityErrCnt; - __u64 RxVlErrCnt; - __u64 RxDlidFltrCnt; -}; - -/* - * The next set of defines are for packet headers, and chip register - * and memory bits that are visible to and/or used by user-mode software - * The other bits that are used only by the driver or diags are in - * ipath_registers.h - */ - -/* RcvHdrFlags bits */ -#define INFINIPATH_RHF_LENGTH_MASK 0x7FF -#define INFINIPATH_RHF_LENGTH_SHIFT 0 -#define INFINIPATH_RHF_RCVTYPE_MASK 0x7 -#define INFINIPATH_RHF_RCVTYPE_SHIFT 11 -#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF -#define INFINIPATH_RHF_EGRINDEX_SHIFT 16 -#define INFINIPATH_RHF_SEQ_MASK 0xF -#define INFINIPATH_RHF_SEQ_SHIFT 0 -#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF -#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4 -#define INFINIPATH_RHF_H_ICRCERR 0x80000000 -#define INFINIPATH_RHF_H_VCRCERR 0x40000000 -#define INFINIPATH_RHF_H_PARITYERR 0x20000000 -#define INFINIPATH_RHF_H_LENERR 0x10000000 -#define INFINIPATH_RHF_H_MTUERR 0x08000000 -#define INFINIPATH_RHF_H_IHDRERR 0x04000000 -#define INFINIPATH_RHF_H_TIDERR 0x02000000 -#define INFINIPATH_RHF_H_MKERR 0x01000000 -#define INFINIPATH_RHF_H_IBERR 0x00800000 -#define INFINIPATH_RHF_H_ERR_MASK 0xFF800000 -#define INFINIPATH_RHF_L_USE_EGR 0x80000000 -#define INFINIPATH_RHF_L_SWA 0x00008000 -#define INFINIPATH_RHF_L_SWB 0x00004000 - -/* infinipath header fields */ -#define INFINIPATH_I_VERS_MASK 0xF -#define INFINIPATH_I_VERS_SHIFT 28 -#define INFINIPATH_I_PORT_MASK 0xF -#define INFINIPATH_I_PORT_SHIFT 24 -#define INFINIPATH_I_TID_MASK 0x7FF -#define INFINIPATH_I_TID_SHIFT 13 -#define INFINIPATH_I_OFFSET_MASK 0x1FFF -#define INFINIPATH_I_OFFSET_SHIFT 0 - -/* K_PktFlags bits */ -#define INFINIPATH_KPF_INTR 0x1 -#define INFINIPATH_KPF_SUBPORT_MASK 0x3 -#define INFINIPATH_KPF_SUBPORT_SHIFT 1 - -#define INFINIPATH_MAX_SUBPORT 4 - -/* SendPIO per-buffer control */ -#define INFINIPATH_SP_TEST 0x40 -#define INFINIPATH_SP_TESTEBP 0x20 -#define INFINIPATH_SP_TRIGGER_SHIFT 15 - -/* SendPIOAvail bits */ -#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1 -#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0 - -/* infinipath header format */ -struct ipath_header { - /* - * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset - - * 14 bits before ECO change ~28 Dec 03. After that, Vers 4, - * Port 4, TID 11, offset 13. - */ - __le32 ver_port_tid_offset; - __le16 chksum; - __le16 pkt_flags; -}; - -/* infinipath user message header format. - * This structure contains the first 4 fields common to all protocols - * that employ infinipath. - */ -struct ipath_message_header { - __be16 lrh[4]; - __be32 bth[3]; - /* fields below this point are in host byte order */ - struct ipath_header iph; - __u8 sub_opcode; -}; - -/* infinipath ethernet header format */ -struct ether_header { - __be16 lrh[4]; - __be32 bth[3]; - struct ipath_header iph; - __u8 sub_opcode; - __u8 cmd; - __be16 lid; - __u16 mac[3]; - __u8 frag_num; - __u8 seq_num; - __le32 len; - /* MUST be of word size due to PIO write requirements */ - __le32 csum; - __le16 csum_offset; - __le16 flags; - __u16 first_2_bytes; - __u8 unused[2]; /* currently unused */ -}; - - -/* IB - LRH header consts */ -#define IPATH_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ -#define IPATH_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ - -/* misc. */ -#define SIZE_OF_CRC 1 - -#define IPATH_DEFAULT_P_KEY 0xFFFF -#define IPATH_PERMISSIVE_LID 0xFFFF -#define IPATH_AETH_CREDIT_SHIFT 24 -#define IPATH_AETH_CREDIT_MASK 0x1F -#define IPATH_AETH_CREDIT_INVAL 0x1F -#define IPATH_PSN_MASK 0xFFFFFF -#define IPATH_MSN_MASK 0xFFFFFF -#define IPATH_QPN_MASK 0xFFFFFF -#define IPATH_MULTICAST_LID_BASE 0xC000 -#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK -#define IPATH_MULTICAST_QPN 0xFFFFFF - -/* Receive Header Queue: receive type (from infinipath) */ -#define RCVHQ_RCV_TYPE_EXPECTED 0 -#define RCVHQ_RCV_TYPE_EAGER 1 -#define RCVHQ_RCV_TYPE_NON_KD 2 -#define RCVHQ_RCV_TYPE_ERROR 3 - - -/* sub OpCodes - ith4x */ -#define IPATH_ITH4X_OPCODE_ENCAP 0x81 -#define IPATH_ITH4X_OPCODE_LID_ARP 0x82 - -#define IPATH_HEADER_QUEUE_WORDS 9 - -/* functions for extracting fields from rcvhdrq entries for the driver. - */ -static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf) -{ - return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK; -} - -static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf) -{ - return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT) - & INFINIPATH_RHF_RCVTYPE_MASK; -} - -static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf) -{ - return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT) - & INFINIPATH_RHF_LENGTH_MASK) << 2; -} - -static inline __u32 ipath_hdrget_index(const __le32 * rbuf) -{ - return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT) - & INFINIPATH_RHF_EGRINDEX_MASK; -} - -static inline __u32 ipath_hdrget_seq(const __le32 *rbuf) -{ - return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT) - & INFINIPATH_RHF_SEQ_MASK; -} - -static inline __u32 ipath_hdrget_offset(const __le32 *rbuf) -{ - return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT) - & INFINIPATH_RHF_HDRQ_OFFSET_MASK; -} - -static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf) -{ - return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR; -} - -static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword) -{ - return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT) - & INFINIPATH_I_VERS_MASK; -} - -#endif /* _IPATH_COMMON_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c deleted file mode 100644 index e9dd9112e..000000000 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ /dev/null @@ -1,483 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/err.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> - -#include "ipath_verbs.h" - -/** - * ipath_cq_enter - add a new entry to the completion queue - * @cq: completion queue - * @entry: work completion entry to add - * @sig: true if @entry is a solicitated entry - * - * This may be called with qp->s_lock held. - */ -void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) -{ - struct ipath_cq_wc *wc; - unsigned long flags; - u32 head; - u32 next; - - spin_lock_irqsave(&cq->lock, flags); - - /* - * Note that the head pointer might be writable by user processes. - * Take care to verify it is a sane value. - */ - wc = cq->queue; - head = wc->head; - if (head >= (unsigned) cq->ibcq.cqe) { - head = cq->ibcq.cqe; - next = 0; - } else - next = head + 1; - if (unlikely(next == wc->tail)) { - spin_unlock_irqrestore(&cq->lock, flags); - if (cq->ibcq.event_handler) { - struct ib_event ev; - - ev.device = cq->ibcq.device; - ev.element.cq = &cq->ibcq; - ev.event = IB_EVENT_CQ_ERR; - cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); - } - return; - } - if (cq->ip) { - wc->uqueue[head].wr_id = entry->wr_id; - wc->uqueue[head].status = entry->status; - wc->uqueue[head].opcode = entry->opcode; - wc->uqueue[head].vendor_err = entry->vendor_err; - wc->uqueue[head].byte_len = entry->byte_len; - wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data; - wc->uqueue[head].qp_num = entry->qp->qp_num; - wc->uqueue[head].src_qp = entry->src_qp; - wc->uqueue[head].wc_flags = entry->wc_flags; - wc->uqueue[head].pkey_index = entry->pkey_index; - wc->uqueue[head].slid = entry->slid; - wc->uqueue[head].sl = entry->sl; - wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; - wc->uqueue[head].port_num = entry->port_num; - /* Make sure entry is written before the head index. */ - smp_wmb(); - } else - wc->kqueue[head] = *entry; - wc->head = next; - - if (cq->notify == IB_CQ_NEXT_COMP || - (cq->notify == IB_CQ_SOLICITED && solicited)) { - cq->notify = IB_CQ_NONE; - cq->triggered++; - /* - * This will cause send_complete() to be called in - * another thread. - */ - tasklet_hi_schedule(&cq->comptask); - } - - spin_unlock_irqrestore(&cq->lock, flags); - - if (entry->status != IB_WC_SUCCESS) - to_idev(cq->ibcq.device)->n_wqe_errs++; -} - -/** - * ipath_poll_cq - poll for work completion entries - * @ibcq: the completion queue to poll - * @num_entries: the maximum number of entries to return - * @entry: pointer to array where work completions are placed - * - * Returns the number of completion entries polled. - * - * This may be called from interrupt context. Also called by ib_poll_cq() - * in the generic verbs code. - */ -int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) -{ - struct ipath_cq *cq = to_icq(ibcq); - struct ipath_cq_wc *wc; - unsigned long flags; - int npolled; - u32 tail; - - /* The kernel can only poll a kernel completion queue */ - if (cq->ip) { - npolled = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&cq->lock, flags); - - wc = cq->queue; - tail = wc->tail; - if (tail > (u32) cq->ibcq.cqe) - tail = (u32) cq->ibcq.cqe; - for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { - if (tail == wc->head) - break; - /* The kernel doesn't need a RMB since it has the lock. */ - *entry = wc->kqueue[tail]; - if (tail >= cq->ibcq.cqe) - tail = 0; - else - tail++; - } - wc->tail = tail; - - spin_unlock_irqrestore(&cq->lock, flags); - -bail: - return npolled; -} - -static void send_complete(unsigned long data) -{ - struct ipath_cq *cq = (struct ipath_cq *)data; - - /* - * The completion handler will most likely rearm the notification - * and poll for all pending entries. If a new completion entry - * is added while we are in this routine, tasklet_hi_schedule() - * won't call us again until we return so we check triggered to - * see if we need to call the handler again. - */ - for (;;) { - u8 triggered = cq->triggered; - - cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); - - if (cq->triggered == triggered) - return; - } -} - -/** - * ipath_create_cq - create a completion queue - * @ibdev: the device this completion queue is attached to - * @attr: creation attributes - * @context: unused by the InfiniPath driver - * @udata: unused by the InfiniPath driver - * - * Returns a pointer to the completion queue or negative errno values - * for failure. - * - * Called by ib_create_cq() in the generic verbs code. - */ -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - int entries = attr->cqe; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_cq *cq; - struct ipath_cq_wc *wc; - struct ib_cq *ret; - u32 sz; - - if (attr->flags) - return ERR_PTR(-EINVAL); - - if (entries < 1 || entries > ib_ipath_max_cqes) { - ret = ERR_PTR(-EINVAL); - goto done; - } - - /* Allocate the completion queue structure. */ - cq = kmalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) { - ret = ERR_PTR(-ENOMEM); - goto done; - } - - /* - * Allocate the completion queue entries and head/tail pointers. - * This is allocated separately so that it can be resized and - * also mapped into user space. - * We need to use vmalloc() in order to support mmap and large - * numbers of entries. - */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (entries + 1); - else - sz += sizeof(struct ib_wc) * (entries + 1); - wc = vmalloc_user(sz); - if (!wc) { - ret = ERR_PTR(-ENOMEM); - goto bail_cq; - } - - /* - * Return the address of the WC as the offset to mmap. - * See ipath_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - int err; - - cq->ip = ipath_create_mmap_info(dev, sz, context, wc); - if (!cq->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wc; - } - - err = ib_copy_to_udata(udata, &cq->ip->offset, - sizeof(cq->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else - cq->ip = NULL; - - spin_lock(&dev->n_cqs_lock); - if (dev->n_cqs_allocated == ib_ipath_max_cqs) { - spin_unlock(&dev->n_cqs_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_cqs_allocated++; - spin_unlock(&dev->n_cqs_lock); - - if (cq->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - } - - /* - * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. - * The number of entries should be >= the number requested or return - * an error. - */ - cq->ibcq.cqe = entries; - cq->notify = IB_CQ_NONE; - cq->triggered = 0; - spin_lock_init(&cq->lock); - tasklet_init(&cq->comptask, send_complete, (unsigned long)cq); - wc->head = 0; - wc->tail = 0; - cq->queue = wc; - - ret = &cq->ibcq; - - goto done; - -bail_ip: - kfree(cq->ip); -bail_wc: - vfree(wc); -bail_cq: - kfree(cq); -done: - return ret; -} - -/** - * ipath_destroy_cq - destroy a completion queue - * @ibcq: the completion queue to destroy. - * - * Returns 0 for success. - * - * Called by ib_destroy_cq() in the generic verbs code. - */ -int ipath_destroy_cq(struct ib_cq *ibcq) -{ - struct ipath_ibdev *dev = to_idev(ibcq->device); - struct ipath_cq *cq = to_icq(ibcq); - - tasklet_kill(&cq->comptask); - spin_lock(&dev->n_cqs_lock); - dev->n_cqs_allocated--; - spin_unlock(&dev->n_cqs_lock); - if (cq->ip) - kref_put(&cq->ip->ref, ipath_release_mmap_info); - else - vfree(cq->queue); - kfree(cq); - - return 0; -} - -/** - * ipath_req_notify_cq - change the notification type for a completion queue - * @ibcq: the completion queue - * @notify_flags: the type of notification to request - * - * Returns 0 for success. - * - * This may be called from interrupt context. Also called by - * ib_req_notify_cq() in the generic verbs code. - */ -int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) -{ - struct ipath_cq *cq = to_icq(ibcq); - unsigned long flags; - int ret = 0; - - spin_lock_irqsave(&cq->lock, flags); - /* - * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow - * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). - */ - if (cq->notify != IB_CQ_NEXT_COMP) - cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; - - if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && - cq->queue->head != cq->queue->tail) - ret = 1; - - spin_unlock_irqrestore(&cq->lock, flags); - - return ret; -} - -/** - * ipath_resize_cq - change the size of the CQ - * @ibcq: the completion queue - * - * Returns 0 for success. - */ -int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) -{ - struct ipath_cq *cq = to_icq(ibcq); - struct ipath_cq_wc *old_wc; - struct ipath_cq_wc *wc; - u32 head, tail, n; - int ret; - u32 sz; - - if (cqe < 1 || cqe > ib_ipath_max_cqes) { - ret = -EINVAL; - goto bail; - } - - /* - * Need to use vmalloc() if we want to support large #s of entries. - */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); - else - sz += sizeof(struct ib_wc) * (cqe + 1); - wc = vmalloc_user(sz); - if (!wc) { - ret = -ENOMEM; - goto bail; - } - - /* Check that we can write the offset to mmap. */ - if (udata && udata->outlen >= sizeof(__u64)) { - __u64 offset = 0; - - ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (ret) - goto bail_free; - } - - spin_lock_irq(&cq->lock); - /* - * Make sure head and tail are sane since they - * might be user writable. - */ - old_wc = cq->queue; - head = old_wc->head; - if (head > (u32) cq->ibcq.cqe) - head = (u32) cq->ibcq.cqe; - tail = old_wc->tail; - if (tail > (u32) cq->ibcq.cqe) - tail = (u32) cq->ibcq.cqe; - if (head < tail) - n = cq->ibcq.cqe + 1 + head - tail; - else - n = head - tail; - if (unlikely((u32)cqe < n)) { - ret = -EINVAL; - goto bail_unlock; - } - for (n = 0; tail != head; n++) { - if (cq->ip) - wc->uqueue[n] = old_wc->uqueue[tail]; - else - wc->kqueue[n] = old_wc->kqueue[tail]; - if (tail == (u32) cq->ibcq.cqe) - tail = 0; - else - tail++; - } - cq->ibcq.cqe = cqe; - wc->head = n; - wc->tail = 0; - cq->queue = wc; - spin_unlock_irq(&cq->lock); - - vfree(old_wc); - - if (cq->ip) { - struct ipath_ibdev *dev = to_idev(ibcq->device); - struct ipath_mmap_info *ip = cq->ip; - - ipath_update_mmap_info(dev, ip, sz, wc); - - /* - * Return the offset to mmap. - * See ipath_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - ret = ib_copy_to_udata(udata, &ip->offset, - sizeof(ip->offset)); - if (ret) - goto bail; - } - - spin_lock_irq(&dev->pending_lock); - if (list_empty(&ip->pending_mmaps)) - list_add(&ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - } - - ret = 0; - goto bail; - -bail_unlock: - spin_unlock_irq(&cq->lock); -bail_free: - vfree(wc); -bail: - return ret; -} diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h deleted file mode 100644 index 65926cd35..000000000 --- a/drivers/infiniband/hw/ipath/ipath_debug.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _IPATH_DEBUG_H -#define _IPATH_DEBUG_H - -#ifndef _IPATH_DEBUGGING /* debugging enabled or not */ -#define _IPATH_DEBUGGING 1 -#endif - -#if _IPATH_DEBUGGING - -/* - * Mask values for debugging. The scheme allows us to compile out any - * of the debug tracing stuff, and if compiled in, to enable or disable - * dynamically. This can be set at modprobe time also: - * modprobe infinipath.ko infinipath_debug=7 - */ - -#define __IPATH_INFO 0x1 /* generic low verbosity stuff */ -#define __IPATH_DBG 0x2 /* generic debug */ -#define __IPATH_TRSAMPLE 0x8 /* generate trace buffer sample entries */ -/* leave some low verbosity spots open */ -#define __IPATH_VERBDBG 0x40 /* very verbose debug */ -#define __IPATH_PKTDBG 0x80 /* print packet data */ -/* print process startup (init)/exit messages */ -#define __IPATH_PROCDBG 0x100 -/* print mmap/fault stuff, not using VDBG any more */ -#define __IPATH_MMDBG 0x200 -#define __IPATH_ERRPKTDBG 0x400 -#define __IPATH_USER_SEND 0x1000 /* use user mode send */ -#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ -#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ -#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */ -#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */ -#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */ -#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump */ -#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump */ -#define __IPATH_LINKVERBDBG 0x200000 /* very verbose linkchange debug */ - -#else /* _IPATH_DEBUGGING */ - -/* - * define all of these even with debugging off, for the few places that do - * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the - * compiler eliminate the code - */ - -#define __IPATH_INFO 0x0 /* generic low verbosity stuff */ -#define __IPATH_DBG 0x0 /* generic debug */ -#define __IPATH_TRSAMPLE 0x0 /* generate trace buffer sample entries */ -#define __IPATH_VERBDBG 0x0 /* very verbose debug */ -#define __IPATH_PKTDBG 0x0 /* print packet data */ -#define __IPATH_PROCDBG 0x0 /* process startup (init)/exit messages */ -/* print mmap/fault stuff, not using VDBG any more */ -#define __IPATH_MMDBG 0x0 -#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */ -#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */ -#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */ -#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */ -#define __IPATH_IPATHPD 0x0 /* Ethernet (IPATH) packet dump on */ -#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on */ -#define __IPATH_LINKVERBDBG 0x0 /* very verbose linkchange debug */ - -#endif /* _IPATH_DEBUGGING */ - -#define __IPATH_VERBOSEDBG __IPATH_VERBDBG - -#endif /* _IPATH_DEBUG_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c deleted file mode 100644 index 45802e973..000000000 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ /dev/null @@ -1,551 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * This file contains support for diagnostic functions. It is accessed by - * opening the ipath_diag device, normally minor number 129. Diagnostic use - * of the InfiniPath chip may render the chip or board unusable until the - * driver is unloaded, or in some cases, until the system is rebooted. - * - * Accesses to the chip through this interface are not similar to going - * through the /sys/bus/pci resource mmap interface. - */ - -#include <linux/io.h> -#include <linux/pci.h> -#include <linux/vmalloc.h> -#include <linux/fs.h> -#include <linux/export.h> -#include <asm/uaccess.h> - -#include "ipath_kernel.h" -#include "ipath_common.h" - -int ipath_diag_inuse; -static int diag_set_link; - -static int ipath_diag_open(struct inode *in, struct file *fp); -static int ipath_diag_release(struct inode *in, struct file *fp); -static ssize_t ipath_diag_read(struct file *fp, char __user *data, - size_t count, loff_t *off); -static ssize_t ipath_diag_write(struct file *fp, const char __user *data, - size_t count, loff_t *off); - -static const struct file_operations diag_file_ops = { - .owner = THIS_MODULE, - .write = ipath_diag_write, - .read = ipath_diag_read, - .open = ipath_diag_open, - .release = ipath_diag_release, - .llseek = default_llseek, -}; - -static ssize_t ipath_diagpkt_write(struct file *fp, - const char __user *data, - size_t count, loff_t *off); - -static const struct file_operations diagpkt_file_ops = { - .owner = THIS_MODULE, - .write = ipath_diagpkt_write, - .llseek = noop_llseek, -}; - -static atomic_t diagpkt_count = ATOMIC_INIT(0); -static struct cdev *diagpkt_cdev; -static struct device *diagpkt_dev; - -int ipath_diag_add(struct ipath_devdata *dd) -{ - char name[16]; - int ret = 0; - - if (atomic_inc_return(&diagpkt_count) == 1) { - ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR, - "ipath_diagpkt", &diagpkt_file_ops, - &diagpkt_cdev, &diagpkt_dev); - - if (ret) { - ipath_dev_err(dd, "Couldn't create ipath_diagpkt " - "device: %d", ret); - goto done; - } - } - - snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit); - - ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name, - &diag_file_ops, &dd->diag_cdev, - &dd->diag_dev); - if (ret) - ipath_dev_err(dd, "Couldn't create %s device: %d", - name, ret); - -done: - return ret; -} - -void ipath_diag_remove(struct ipath_devdata *dd) -{ - if (atomic_dec_and_test(&diagpkt_count)) - ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev); - - ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev); -} - -/** - * ipath_read_umem64 - read a 64-bit quantity from the chip into user space - * @dd: the infinipath device - * @uaddr: the location to store the data in user memory - * @caddr: the source chip address (full pointer, not offset) - * @count: number of bytes to copy (multiple of 32 bits) - * - * This function also localizes all chip memory accesses. - * The copy should be written such that we read full cacheline packets - * from the chip. This is usually used for a single qword - * - * NOTE: This assumes the chip address is 64-bit aligned. - */ -static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr, - const void __iomem *caddr, size_t count) -{ - const u64 __iomem *reg_addr = caddr; - const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64)); - int ret; - - /* not very efficient, but it works for now */ - if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) { - ret = -EINVAL; - goto bail; - } - while (reg_addr < reg_end) { - u64 data = readq(reg_addr); - if (copy_to_user(uaddr, &data, sizeof(u64))) { - ret = -EFAULT; - goto bail; - } - reg_addr++; - uaddr += sizeof(u64); - } - ret = 0; -bail: - return ret; -} - -/** - * ipath_write_umem64 - write a 64-bit quantity to the chip from user space - * @dd: the infinipath device - * @caddr: the destination chip address (full pointer, not offset) - * @uaddr: the source of the data in user memory - * @count: the number of bytes to copy (multiple of 32 bits) - * - * This is usually used for a single qword - * NOTE: This assumes the chip address is 64-bit aligned. - */ - -static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr, - const void __user *uaddr, size_t count) -{ - u64 __iomem *reg_addr = caddr; - const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64)); - int ret; - - /* not very efficient, but it works for now */ - if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) { - ret = -EINVAL; - goto bail; - } - while (reg_addr < reg_end) { - u64 data; - if (copy_from_user(&data, uaddr, sizeof(data))) { - ret = -EFAULT; - goto bail; - } - writeq(data, reg_addr); - - reg_addr++; - uaddr += sizeof(u64); - } - ret = 0; -bail: - return ret; -} - -/** - * ipath_read_umem32 - read a 32-bit quantity from the chip into user space - * @dd: the infinipath device - * @uaddr: the location to store the data in user memory - * @caddr: the source chip address (full pointer, not offset) - * @count: number of bytes to copy - * - * read 32 bit values, not 64 bit; for memories that only - * support 32 bit reads; usually a single dword. - */ -static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr, - const void __iomem *caddr, size_t count) -{ - const u32 __iomem *reg_addr = caddr; - const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32)); - int ret; - - if (reg_addr < (u32 __iomem *) dd->ipath_kregbase || - reg_end > (u32 __iomem *) dd->ipath_kregend) { - ret = -EINVAL; - goto bail; - } - /* not very efficient, but it works for now */ - while (reg_addr < reg_end) { - u32 data = readl(reg_addr); - if (copy_to_user(uaddr, &data, sizeof(data))) { - ret = -EFAULT; - goto bail; - } - - reg_addr++; - uaddr += sizeof(u32); - - } - ret = 0; -bail: - return ret; -} - -/** - * ipath_write_umem32 - write a 32-bit quantity to the chip from user space - * @dd: the infinipath device - * @caddr: the destination chip address (full pointer, not offset) - * @uaddr: the source of the data in user memory - * @count: number of bytes to copy - * - * write 32 bit values, not 64 bit; for memories that only - * support 32 bit write; usually a single dword. - */ - -static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr, - const void __user *uaddr, size_t count) -{ - u32 __iomem *reg_addr = caddr; - const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32)); - int ret; - - if (reg_addr < (u32 __iomem *) dd->ipath_kregbase || - reg_end > (u32 __iomem *) dd->ipath_kregend) { - ret = -EINVAL; - goto bail; - } - while (reg_addr < reg_end) { - u32 data; - if (copy_from_user(&data, uaddr, sizeof(data))) { - ret = -EFAULT; - goto bail; - } - writel(data, reg_addr); - - reg_addr++; - uaddr += sizeof(u32); - } - ret = 0; -bail: - return ret; -} - -static int ipath_diag_open(struct inode *in, struct file *fp) -{ - int unit = iminor(in) - IPATH_DIAG_MINOR_BASE; - struct ipath_devdata *dd; - int ret; - - mutex_lock(&ipath_mutex); - - if (ipath_diag_inuse) { - ret = -EBUSY; - goto bail; - } - - dd = ipath_lookup(unit); - - if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) || - !dd->ipath_kregbase) { - ret = -ENODEV; - goto bail; - } - - fp->private_data = dd; - ipath_diag_inuse = -2; - diag_set_link = 0; - ret = 0; - - /* Only expose a way to reset the device if we - make it into diag mode. */ - ipath_expose_reset(&dd->pcidev->dev); - -bail: - mutex_unlock(&ipath_mutex); - - return ret; -} - -/** - * ipath_diagpkt_write - write an IB packet - * @fp: the diag data device file pointer - * @data: ipath_diag_pkt structure saying where to get the packet - * @count: size of data to write - * @off: unused by this code - */ -static ssize_t ipath_diagpkt_write(struct file *fp, - const char __user *data, - size_t count, loff_t *off) -{ - u32 __iomem *piobuf; - u32 plen, pbufn, maxlen_reserve; - struct ipath_diag_pkt odp; - struct ipath_diag_xpkt dp; - u32 *tmpbuf = NULL; - struct ipath_devdata *dd; - ssize_t ret = 0; - u64 val; - u32 l_state, lt_state; /* LinkState, LinkTrainingState */ - - - if (count == sizeof(dp)) { - if (copy_from_user(&dp, data, sizeof(dp))) { - ret = -EFAULT; - goto bail; - } - } else if (count == sizeof(odp)) { - if (copy_from_user(&odp, data, sizeof(odp))) { - ret = -EFAULT; - goto bail; - } - dp.len = odp.len; - dp.unit = odp.unit; - dp.data = odp.data; - dp.pbc_wd = 0; - } else { - ret = -EINVAL; - goto bail; - } - - /* send count must be an exact number of dwords */ - if (dp.len & 3) { - ret = -EINVAL; - goto bail; - } - - plen = dp.len >> 2; - - dd = ipath_lookup(dp.unit); - if (!dd || !(dd->ipath_flags & IPATH_PRESENT) || - !dd->ipath_kregbase) { - ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n", - dp.unit); - ret = -ENODEV; - goto bail; - } - - if (ipath_diag_inuse && !diag_set_link && - !(dd->ipath_flags & IPATH_LINKACTIVE)) { - diag_set_link = 1; - ipath_cdbg(VERBOSE, "Trying to set to set link active for " - "diag pkt\n"); - ipath_set_linkstate(dd, IPATH_IB_LINKARM); - ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE); - } - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit); - ret = -ENODEV; - goto bail; - } - /* - * Want to skip check for l_state if using custom PBC, - * because we might be trying to force an SM packet out. - * first-cut, skip _all_ state checking in that case. - */ - val = ipath_ib_state(dd, dd->ipath_lastibcstat); - lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat); - l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat); - if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP || - (val != dd->ib_init && val != dd->ib_arm && - val != dd->ib_active))) { - ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n", - dd->ipath_unit, (unsigned long long) val); - ret = -EINVAL; - goto bail; - } - - /* - * need total length before first word written, plus 2 Dwords. One Dword - * is for padding so we get the full user data when not aligned on - * a word boundary. The other Dword is to make sure we have room for the - * ICRC which gets tacked on later. - */ - maxlen_reserve = 2 * sizeof(u32); - if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) { - ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n", - dp.len, dd->ipath_ibmaxlen); - ret = -EINVAL; - goto bail; - } - - plen = sizeof(u32) + dp.len; - - tmpbuf = vmalloc(plen); - if (!tmpbuf) { - dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, " - "failing\n"); - ret = -ENOMEM; - goto bail; - } - - if (copy_from_user(tmpbuf, - (const void __user *) (unsigned long) dp.data, - dp.len)) { - ret = -EFAULT; - goto bail; - } - - plen >>= 2; /* in dwords */ - - piobuf = ipath_getpiobuf(dd, plen, &pbufn); - if (!piobuf) { - ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n", - dd->ipath_unit); - ret = -EBUSY; - goto bail; - } - /* disarm it just to be extra sure */ - ipath_disarm_piobufs(dd, pbufn, 1); - - if (ipath_debug & __IPATH_PKTDBG) - ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n", - dd->ipath_unit, plen - 1, pbufn); - - if (dp.pbc_wd == 0) - dp.pbc_wd = plen; - writeq(dp.pbc_wd, piobuf); - /* - * Copy all by the trigger word, then flush, so it's written - * to chip before trigger word, then write trigger word, then - * flush again, so packet is sent. - */ - if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { - ipath_flush_wc(); - __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1); - ipath_flush_wc(); - __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); - } else - __iowrite32_copy(piobuf + 2, tmpbuf, plen); - - ipath_flush_wc(); - - ret = sizeof(dp); - -bail: - vfree(tmpbuf); - return ret; -} - -static int ipath_diag_release(struct inode *in, struct file *fp) -{ - mutex_lock(&ipath_mutex); - ipath_diag_inuse = 0; - fp->private_data = NULL; - mutex_unlock(&ipath_mutex); - return 0; -} - -static ssize_t ipath_diag_read(struct file *fp, char __user *data, - size_t count, loff_t *off) -{ - struct ipath_devdata *dd = fp->private_data; - void __iomem *kreg_base; - ssize_t ret; - - kreg_base = dd->ipath_kregbase; - - if (count == 0) - ret = 0; - else if ((count % 4) || (*off % 4)) - /* address or length is not 32-bit aligned, hence invalid */ - ret = -EINVAL; - else if (ipath_diag_inuse < 1 && (*off || count != 8)) - ret = -EINVAL; /* prevent cat /dev/ipath_diag* */ - else if ((count % 8) || (*off % 8)) - /* address or length not 64-bit aligned; do 32-bit reads */ - ret = ipath_read_umem32(dd, data, kreg_base + *off, count); - else - ret = ipath_read_umem64(dd, data, kreg_base + *off, count); - - if (ret >= 0) { - *off += count; - ret = count; - if (ipath_diag_inuse == -2) - ipath_diag_inuse++; - } - - return ret; -} - -static ssize_t ipath_diag_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - struct ipath_devdata *dd = fp->private_data; - void __iomem *kreg_base; - ssize_t ret; - - kreg_base = dd->ipath_kregbase; - - if (count == 0) - ret = 0; - else if ((count % 4) || (*off % 4)) - /* address or length is not 32-bit aligned, hence invalid */ - ret = -EINVAL; - else if ((ipath_diag_inuse == -1 && (*off || count != 8)) || - ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */ - ret = -EINVAL; /* before any other write allowed */ - else if ((count % 8) || (*off % 8)) - /* address or length not 64-bit aligned; do 32-bit writes */ - ret = ipath_write_umem32(dd, kreg_base + *off, data, count); - else - ret = ipath_write_umem64(dd, kreg_base + *off, data, count); - - if (ret >= 0) { - *off += count; - ret = count; - if (ipath_diag_inuse == -1) - ipath_diag_inuse = 1; /* all read/write OK now */ - } - - return ret; -} diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c deleted file mode 100644 index 123a8c053..000000000 --- a/drivers/infiniband/hw/ipath/ipath_dma.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) 2006 QLogic, Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/scatterlist.h> -#include <linux/gfp.h> -#include <rdma/ib_verbs.h> - -#include "ipath_verbs.h" - -#define BAD_DMA_ADDRESS ((u64) 0) - -/* - * The following functions implement driver specific replacements - * for the ib_dma_*() functions. - * - * These functions return kernel virtual addresses instead of - * device bus addresses since the driver uses the CPU to copy - * data instead of using hardware DMA. - */ - -static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr) -{ - return dma_addr == BAD_DMA_ADDRESS; -} - -static u64 ipath_dma_map_single(struct ib_device *dev, - void *cpu_addr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); - return (u64) cpu_addr; -} - -static void ipath_dma_unmap_single(struct ib_device *dev, - u64 addr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); -} - -static u64 ipath_dma_map_page(struct ib_device *dev, - struct page *page, - unsigned long offset, - size_t size, - enum dma_data_direction direction) -{ - u64 addr; - - BUG_ON(!valid_dma_direction(direction)); - - if (offset + size > PAGE_SIZE) { - addr = BAD_DMA_ADDRESS; - goto done; - } - - addr = (u64) page_address(page); - if (addr) - addr += offset; - /* TODO: handle highmem pages */ - -done: - return addr; -} - -static void ipath_dma_unmap_page(struct ib_device *dev, - u64 addr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); -} - -static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction) -{ - struct scatterlist *sg; - u64 addr; - int i; - int ret = nents; - - BUG_ON(!valid_dma_direction(direction)); - - for_each_sg(sgl, sg, nents, i) { - addr = (u64) page_address(sg_page(sg)); - /* TODO: handle highmem pages */ - if (!addr) { - ret = 0; - break; - } - sg->dma_address = addr + sg->offset; -#ifdef CONFIG_NEED_SG_DMA_LENGTH - sg->dma_length = sg->length; -#endif - } - return ret; -} - -static void ipath_unmap_sg(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); -} - -static void ipath_sync_single_for_cpu(struct ib_device *dev, - u64 addr, - size_t size, - enum dma_data_direction dir) -{ -} - -static void ipath_sync_single_for_device(struct ib_device *dev, - u64 addr, - size_t size, - enum dma_data_direction dir) -{ -} - -static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size, - u64 *dma_handle, gfp_t flag) -{ - struct page *p; - void *addr = NULL; - - p = alloc_pages(flag, get_order(size)); - if (p) - addr = page_address(p); - if (dma_handle) - *dma_handle = (u64) addr; - return addr; -} - -static void ipath_dma_free_coherent(struct ib_device *dev, size_t size, - void *cpu_addr, u64 dma_handle) -{ - free_pages((unsigned long) cpu_addr, get_order(size)); -} - -struct ib_dma_mapping_ops ipath_dma_mapping_ops = { - .mapping_error = ipath_mapping_error, - .map_single = ipath_dma_map_single, - .unmap_single = ipath_dma_unmap_single, - .map_page = ipath_dma_map_page, - .unmap_page = ipath_dma_unmap_page, - .map_sg = ipath_map_sg, - .unmap_sg = ipath_unmap_sg, - .sync_single_for_cpu = ipath_sync_single_for_cpu, - .sync_single_for_device = ipath_sync_single_for_device, - .alloc_coherent = ipath_dma_alloc_coherent, - .free_coherent = ipath_dma_free_coherent -}; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c deleted file mode 100644 index 871dbe562..000000000 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ /dev/null @@ -1,2789 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/sched.h> -#include <linux/spinlock.h> -#include <linux/idr.h> -#include <linux/pci.h> -#include <linux/io.h> -#include <linux/delay.h> -#include <linux/netdevice.h> -#include <linux/vmalloc.h> -#include <linux/bitmap.h> -#include <linux/slab.h> -#include <linux/module.h> -#ifdef CONFIG_X86_64 -#include <asm/pat.h> -#endif - -#include "ipath_kernel.h" -#include "ipath_verbs.h" - -static void ipath_update_pio_bufs(struct ipath_devdata *); - -const char *ipath_get_unit_name(int unit) -{ - static char iname[16]; - snprintf(iname, sizeof iname, "infinipath%u", unit); - return iname; -} - -#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: " -#define PFX IPATH_DRV_NAME ": " - -/* - * The size has to be longer than this string, so we can append - * board/chip information to it in the init code. - */ -const char ib_ipath_version[] = IPATH_IDSTR "\n"; - -static struct idr unit_table; -DEFINE_SPINLOCK(ipath_devs_lock); -LIST_HEAD(ipath_dev_list); - -wait_queue_head_t ipath_state_wait; - -unsigned ipath_debug = __IPATH_INFO; - -module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(debug, "mask for debug prints"); -EXPORT_SYMBOL_GPL(ipath_debug); - -unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */ -module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO); -MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported"); - -static unsigned ipath_hol_timeout_ms = 13000; -module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO); -MODULE_PARM_DESC(hol_timeout_ms, - "duration of user app suspension after link failure"); - -unsigned ipath_linkrecovery = 1; -module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue"); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("QLogic <support@qlogic.com>"); -MODULE_DESCRIPTION("QLogic InfiniPath driver"); - -/* - * Table to translate the LINKTRAININGSTATE portion of - * IBCStatus to a human-readable form. - */ -const char *ipath_ibcstatus_str[] = { - "Disabled", - "LinkUp", - "PollActive", - "PollQuiet", - "SleepDelay", - "SleepQuiet", - "LState6", /* unused */ - "LState7", /* unused */ - "CfgDebounce", - "CfgRcvfCfg", - "CfgWaitRmt", - "CfgIdle", - "RecovRetrain", - "CfgTxRevLane", /* unused before IBA7220 */ - "RecovWaitRmt", - "RecovIdle", - /* below were added for IBA7220 */ - "CfgEnhanced", - "CfgTest", - "CfgWaitRmtTest", - "CfgWaitCfgEnhanced", - "SendTS_T", - "SendTstIdles", - "RcvTS_T", - "SendTst_TS1s", - "LTState18", "LTState19", "LTState1A", "LTState1B", - "LTState1C", "LTState1D", "LTState1E", "LTState1F" -}; - -static void ipath_remove_one(struct pci_dev *); -static int ipath_init_one(struct pci_dev *, const struct pci_device_id *); - -/* Only needed for registration, nothing else needs this info */ -#define PCI_VENDOR_ID_PATHSCALE 0x1fc1 -#define PCI_DEVICE_ID_INFINIPATH_HT 0xd - -/* Number of seconds before our card status check... */ -#define STATUS_TIMEOUT 60 - -static const struct pci_device_id ipath_pci_tbl[] = { - { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, - { 0, } -}; - -MODULE_DEVICE_TABLE(pci, ipath_pci_tbl); - -static struct pci_driver ipath_driver = { - .name = IPATH_DRV_NAME, - .probe = ipath_init_one, - .remove = ipath_remove_one, - .id_table = ipath_pci_tbl, - .driver = { - .groups = ipath_driver_attr_groups, - }, -}; - -static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, - u32 *bar0, u32 *bar1) -{ - int ret; - - ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0); - if (ret) - ipath_dev_err(dd, "failed to read bar0 before enable: " - "error %d\n", -ret); - - ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1); - if (ret) - ipath_dev_err(dd, "failed to read bar1 before enable: " - "error %d\n", -ret); - - ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1); -} - -static void ipath_free_devdata(struct pci_dev *pdev, - struct ipath_devdata *dd) -{ - unsigned long flags; - - pci_set_drvdata(pdev, NULL); - - if (dd->ipath_unit != -1) { - spin_lock_irqsave(&ipath_devs_lock, flags); - idr_remove(&unit_table, dd->ipath_unit); - list_del(&dd->ipath_list); - spin_unlock_irqrestore(&ipath_devs_lock, flags); - } - vfree(dd); -} - -static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) -{ - unsigned long flags; - struct ipath_devdata *dd; - int ret; - - dd = vzalloc(sizeof(*dd)); - if (!dd) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } - dd->ipath_unit = -1; - - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&ipath_devs_lock, flags); - - ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME - ": Could not allocate unit ID: error %d\n", -ret); - ipath_free_devdata(pdev, dd); - dd = ERR_PTR(ret); - goto bail_unlock; - } - dd->ipath_unit = ret; - - dd->pcidev = pdev; - pci_set_drvdata(pdev, dd); - - list_add(&dd->ipath_list, &ipath_dev_list); - -bail_unlock: - spin_unlock_irqrestore(&ipath_devs_lock, flags); - idr_preload_end(); -bail: - return dd; -} - -static inline struct ipath_devdata *__ipath_lookup(int unit) -{ - return idr_find(&unit_table, unit); -} - -struct ipath_devdata *ipath_lookup(int unit) -{ - struct ipath_devdata *dd; - unsigned long flags; - - spin_lock_irqsave(&ipath_devs_lock, flags); - dd = __ipath_lookup(unit); - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - return dd; -} - -int ipath_count_units(int *npresentp, int *nupp, int *maxportsp) -{ - int nunits, npresent, nup; - struct ipath_devdata *dd; - unsigned long flags; - int maxports; - - nunits = npresent = nup = maxports = 0; - - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry(dd, &ipath_dev_list, ipath_list) { - nunits++; - if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase) - npresent++; - if (dd->ipath_lid && - !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN - | IPATH_LINKUNK))) - nup++; - if (dd->ipath_cfgports > maxports) - maxports = dd->ipath_cfgports; - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - if (npresentp) - *npresentp = npresent; - if (nupp) - *nupp = nup; - if (maxportsp) - *maxportsp = maxports; - - return nunits; -} - -/* - * These next two routines are placeholders in case we don't have per-arch - * code for controlling write combining. If explicit control of write - * combining is not available, performance will probably be awful. - */ - -int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd) -{ - return -EOPNOTSUPP; -} - -void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd) -{ -} - -/* - * Perform a PIO buffer bandwidth write test, to verify proper system - * configuration. Even when all the setup calls work, occasionally - * BIOS or other issues can prevent write combining from working, or - * can cause other bandwidth problems to the chip. - * - * This test simply writes the same buffer over and over again, and - * measures close to the peak bandwidth to the chip (not testing - * data bandwidth to the wire). On chips that use an address-based - * trigger to send packets to the wire, this is easy. On chips that - * use a count to trigger, we want to make sure that the packet doesn't - * go out on the wire, or trigger flow control checks. - */ -static void ipath_verify_pioperf(struct ipath_devdata *dd) -{ - u32 pbnum, cnt, lcnt; - u32 __iomem *piobuf; - u32 *addr; - u64 msecs, emsecs; - - piobuf = ipath_getpiobuf(dd, 0, &pbnum); - if (!piobuf) { - dev_info(&dd->pcidev->dev, - "No PIObufs for checking perf, skipping\n"); - return; - } - - /* - * Enough to give us a reasonable test, less than piobuf size, and - * likely multiple of store buffer length. - */ - cnt = 1024; - - addr = vmalloc(cnt); - if (!addr) { - dev_info(&dd->pcidev->dev, - "Couldn't get memory for checking PIO perf," - " skipping\n"); - goto done; - } - - preempt_disable(); /* we want reasonably accurate elapsed time */ - msecs = 1 + jiffies_to_msecs(jiffies); - for (lcnt = 0; lcnt < 10000U; lcnt++) { - /* wait until we cross msec boundary */ - if (jiffies_to_msecs(jiffies) >= msecs) - break; - udelay(1); - } - - ipath_disable_armlaunch(dd); - - /* - * length 0, no dwords actually sent, and mark as VL15 - * on chips where that may matter (due to IB flowcontrol) - */ - if ((dd->ipath_flags & IPATH_HAS_PBC_CNT)) - writeq(1UL << 63, piobuf); - else - writeq(0, piobuf); - ipath_flush_wc(); - - /* - * this is only roughly accurate, since even with preempt we - * still take interrupts that could take a while. Running for - * >= 5 msec seems to get us "close enough" to accurate values - */ - msecs = jiffies_to_msecs(jiffies); - for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { - __iowrite32_copy(piobuf + 64, addr, cnt >> 2); - emsecs = jiffies_to_msecs(jiffies) - msecs; - } - - /* 1 GiB/sec, slightly over IB SDR line rate */ - if (lcnt < (emsecs * 1024U)) - ipath_dev_err(dd, - "Performance problem: bandwidth to PIO buffers is " - "only %u MiB/sec\n", - lcnt / (u32) emsecs); - else - ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n", - lcnt / (u32) emsecs); - - preempt_enable(); - - vfree(addr); - -done: - /* disarm piobuf, so it's available again */ - ipath_disarm_piobufs(dd, pbnum, 1); - ipath_enable_armlaunch(dd); -} - -static void cleanup_device(struct ipath_devdata *dd); - -static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) -{ - int ret, len, j; - struct ipath_devdata *dd; - unsigned long long addr; - u32 bar0 = 0, bar1 = 0; - -#ifdef CONFIG_X86_64 - if (pat_enabled()) { - pr_warn("ipath needs PAT disabled, boot with nopat kernel parameter\n"); - ret = -ENODEV; - goto bail; - } -#endif - - dd = ipath_alloc_devdata(pdev); - if (IS_ERR(dd)) { - ret = PTR_ERR(dd); - printk(KERN_ERR IPATH_DRV_NAME - ": Could not allocate devdata: error %d\n", -ret); - goto bail; - } - - ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit); - - ret = pci_enable_device(pdev); - if (ret) { - /* This can happen iff: - * - * We did a chip reset, and then failed to reprogram the - * BAR, or the chip reset due to an internal error. We then - * unloaded the driver and reloaded it. - * - * Both reset cases set the BAR back to initial state. For - * the latter case, the AER sticky error bit at offset 0x718 - * should be set, but the Linux kernel doesn't yet know - * about that, it appears. If the original BAR was retained - * in the kernel data structures, this may be OK. - */ - ipath_dev_err(dd, "enable unit %d failed: error %d\n", - dd->ipath_unit, -ret); - goto bail_devdata; - } - addr = pci_resource_start(pdev, 0); - len = pci_resource_len(pdev, 0); - ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x " - "driver_data %lx\n", addr, len, pdev->irq, ent->vendor, - ent->device, ent->driver_data); - - read_bars(dd, pdev, &bar0, &bar1); - - if (!bar1 && !(bar0 & ~0xf)) { - if (addr) { - dev_info(&pdev->dev, "BAR is 0 (probable RESET), " - "rewriting as %llx\n", addr); - ret = pci_write_config_dword( - pdev, PCI_BASE_ADDRESS_0, addr); - if (ret) { - ipath_dev_err(dd, "rewrite of BAR0 " - "failed: err %d\n", -ret); - goto bail_disable; - } - ret = pci_write_config_dword( - pdev, PCI_BASE_ADDRESS_1, addr >> 32); - if (ret) { - ipath_dev_err(dd, "rewrite of BAR1 " - "failed: err %d\n", -ret); - goto bail_disable; - } - } else { - ipath_dev_err(dd, "BAR is 0 (probable RESET), " - "not usable until reboot\n"); - ret = -ENODEV; - goto bail_disable; - } - } - - ret = pci_request_regions(pdev, IPATH_DRV_NAME); - if (ret) { - dev_info(&pdev->dev, "pci_request_regions unit %u fails: " - "err %d\n", dd->ipath_unit, -ret); - goto bail_disable; - } - - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - /* - * if the 64 bit setup fails, try 32 bit. Some systems - * do not setup 64 bit maps on systems with 2GB or less - * memory installed. - */ - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (ret) { - dev_info(&pdev->dev, - "Unable to set DMA mask for unit %u: %d\n", - dd->ipath_unit, ret); - goto bail_regions; - } - else { - ipath_dbg("No 64bit DMA mask, used 32 bit mask\n"); - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (ret) - dev_info(&pdev->dev, - "Unable to set DMA consistent mask " - "for unit %u: %d\n", - dd->ipath_unit, ret); - - } - } - else { - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) - dev_info(&pdev->dev, - "Unable to set DMA consistent mask " - "for unit %u: %d\n", - dd->ipath_unit, ret); - } - - pci_set_master(pdev); - - /* - * Save BARs to rewrite after device reset. Save all 64 bits of - * BAR, just in case. - */ - dd->ipath_pcibar0 = addr; - dd->ipath_pcibar1 = addr >> 32; - dd->ipath_deviceid = ent->device; /* save for later use */ - dd->ipath_vendorid = ent->vendor; - - /* setup the chip-specific functions, as early as possible. */ - switch (ent->device) { - case PCI_DEVICE_ID_INFINIPATH_HT: - ipath_init_iba6110_funcs(dd); - break; - - default: - ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " - "failing\n", ent->device); - return -ENODEV; - } - - for (j = 0; j < 6; j++) { - if (!pdev->resource[j].start) - continue; - ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n", - j, &pdev->resource[j], - (unsigned long long)pci_resource_len(pdev, j)); - } - - if (!addr) { - ipath_dev_err(dd, "No valid address in BAR 0!\n"); - ret = -ENODEV; - goto bail_regions; - } - - dd->ipath_pcirev = pdev->revision; - -#if defined(__powerpc__) - /* There isn't a generic way to specify writethrough mappings */ - dd->ipath_kregbase = __ioremap(addr, len, - (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); -#else - /* XXX: split this properly to enable on PAT */ - dd->ipath_kregbase = ioremap_nocache(addr, len); -#endif - - if (!dd->ipath_kregbase) { - ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", - addr); - ret = -ENOMEM; - goto bail_iounmap; - } - dd->ipath_kregend = (u64 __iomem *) - ((void __iomem *)dd->ipath_kregbase + len); - dd->ipath_physaddr = addr; /* used for io_remap, etc. */ - /* for user mmap */ - ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n", - addr, dd->ipath_kregbase); - - if (dd->ipath_f_bus(dd, pdev)) - ipath_dev_err(dd, "Failed to setup config space; " - "continuing anyway\n"); - - /* - * set up our interrupt handler; IRQF_SHARED probably not needed, - * since MSI interrupts shouldn't be shared but won't hurt for now. - * check 0 irq after we return from chip-specific bus setup, since - * that can affect this due to setup - */ - if (!dd->ipath_irq) - ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " - "work\n"); - else { - ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED, - IPATH_DRV_NAME, dd); - if (ret) { - ipath_dev_err(dd, "Couldn't setup irq handler, " - "irq=%d: %d\n", dd->ipath_irq, ret); - goto bail_iounmap; - } - } - - ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ - if (ret) - goto bail_irqsetup; - - ret = ipath_enable_wc(dd); - - if (ret) - ret = 0; - - ipath_verify_pioperf(dd); - - ipath_device_create_group(&pdev->dev, dd); - ipathfs_add_device(dd); - ipath_user_add(dd); - ipath_diag_add(dd); - ipath_register_ib_device(dd); - - goto bail; - -bail_irqsetup: - cleanup_device(dd); - - if (dd->ipath_irq) - dd->ipath_f_free_irq(dd); - - if (dd->ipath_f_cleanup) - dd->ipath_f_cleanup(dd); - -bail_iounmap: - iounmap((volatile void __iomem *) dd->ipath_kregbase); - -bail_regions: - pci_release_regions(pdev); - -bail_disable: - pci_disable_device(pdev); - -bail_devdata: - ipath_free_devdata(pdev, dd); - -bail: - return ret; -} - -static void cleanup_device(struct ipath_devdata *dd) -{ - int port; - struct ipath_portdata **tmp; - unsigned long flags; - - if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { - /* can't do anything more with chip; needs re-init */ - *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; - if (dd->ipath_kregbase) { - /* - * if we haven't already cleaned up before these are - * to ensure any register reads/writes "fail" until - * re-init - */ - dd->ipath_kregbase = NULL; - dd->ipath_uregbase = 0; - dd->ipath_sregbase = 0; - dd->ipath_cregbase = 0; - dd->ipath_kregsize = 0; - } - ipath_disable_wc(dd); - } - - if (dd->ipath_spectriggerhit) - dev_info(&dd->pcidev->dev, "%lu special trigger hits\n", - dd->ipath_spectriggerhit); - - if (dd->ipath_pioavailregs_dma) { - dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - (void *) dd->ipath_pioavailregs_dma, - dd->ipath_pioavailregs_phys); - dd->ipath_pioavailregs_dma = NULL; - } - if (dd->ipath_dummy_hdrq) { - dma_free_coherent(&dd->pcidev->dev, - dd->ipath_pd[0]->port_rcvhdrq_size, - dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); - dd->ipath_dummy_hdrq = NULL; - } - - if (dd->ipath_pageshadow) { - struct page **tmpp = dd->ipath_pageshadow; - dma_addr_t *tmpd = dd->ipath_physshadow; - int i, cnt = 0; - - ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " - "locked\n"); - for (port = 0; port < dd->ipath_cfgports; port++) { - int port_tidbase = port * dd->ipath_rcvtidcnt; - int maxtid = port_tidbase + dd->ipath_rcvtidcnt; - for (i = port_tidbase; i < maxtid; i++) { - if (!tmpp[i]) - continue; - pci_unmap_page(dd->pcidev, tmpd[i], - PAGE_SIZE, PCI_DMA_FROMDEVICE); - ipath_release_user_pages(&tmpp[i], 1); - tmpp[i] = NULL; - cnt++; - } - } - if (cnt) { - ipath_stats.sps_pageunlocks += cnt; - ipath_cdbg(VERBOSE, "There were still %u expTID " - "entries locked\n", cnt); - } - if (ipath_stats.sps_pagelocks || - ipath_stats.sps_pageunlocks) - ipath_cdbg(VERBOSE, "%llu pages locked, %llu " - "unlocked via ipath_m{un}lock\n", - (unsigned long long) - ipath_stats.sps_pagelocks, - (unsigned long long) - ipath_stats.sps_pageunlocks); - - ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", - dd->ipath_pageshadow); - tmpp = dd->ipath_pageshadow; - dd->ipath_pageshadow = NULL; - vfree(tmpp); - - dd->ipath_egrtidbase = NULL; - } - - /* - * free any resources still in use (usually just kernel ports) - * at unload; we do for portcnt, because that's what we allocate. - * We acquire lock to be really paranoid that ipath_pd isn't being - * accessed from some interrupt-related code (that should not happen, - * but best to be sure). - */ - spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); - tmp = dd->ipath_pd; - dd->ipath_pd = NULL; - spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); - for (port = 0; port < dd->ipath_portcnt; port++) { - struct ipath_portdata *pd = tmp[port]; - tmp[port] = NULL; /* debugging paranoia */ - ipath_free_pddata(dd, pd); - } - kfree(tmp); -} - -static void ipath_remove_one(struct pci_dev *pdev) -{ - struct ipath_devdata *dd = pci_get_drvdata(pdev); - - ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); - - /* - * disable the IB link early, to be sure no new packets arrive, which - * complicates the shutdown process - */ - ipath_shutdown_device(dd); - - flush_workqueue(ib_wq); - - if (dd->verbs_dev) - ipath_unregister_ib_device(dd->verbs_dev); - - ipath_diag_remove(dd); - ipath_user_remove(dd); - ipathfs_remove_device(dd); - ipath_device_remove_group(&pdev->dev, dd); - - ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, " - "unit %u\n", dd, (u32) dd->ipath_unit); - - cleanup_device(dd); - - /* - * turn off rcv, send, and interrupts for all ports, all drivers - * should also hard reset the chip here? - * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs - * for all versions of the driver, if they were allocated - */ - if (dd->ipath_irq) { - ipath_cdbg(VERBOSE, "unit %u free irq %d\n", - dd->ipath_unit, dd->ipath_irq); - dd->ipath_f_free_irq(dd); - } else - ipath_dbg("irq is 0, not doing free_irq " - "for unit %u\n", dd->ipath_unit); - /* - * we check for NULL here, because it's outside - * the kregbase check, and we need to call it - * after the free_irq. Thus it's possible that - * the function pointers were never initialized. - */ - if (dd->ipath_f_cleanup) - /* clean up chip-specific stuff */ - dd->ipath_f_cleanup(dd); - - ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase); - iounmap((volatile void __iomem *) dd->ipath_kregbase); - pci_release_regions(pdev); - ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); - pci_disable_device(pdev); - - ipath_free_devdata(pdev, dd); -} - -/* general driver use */ -DEFINE_MUTEX(ipath_mutex); - -static DEFINE_SPINLOCK(ipath_pioavail_lock); - -/** - * ipath_disarm_piobufs - cancel a range of PIO buffers - * @dd: the infinipath device - * @first: the first PIO buffer to cancel - * @cnt: the number of PIO buffers to cancel - * - * cancel a range of PIO buffers, used when they might be armed, but - * not triggered. Used at init to ensure buffer state, and also user - * process close, in case it died while writing to a PIO buffer - * Also after errors. - */ -void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, - unsigned cnt) -{ - unsigned i, last = first + cnt; - unsigned long flags; - - ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); - for (i = first; i < last; i++) { - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - /* - * The disarm-related bits are write-only, so it - * is ok to OR them in with our copy of sendctrl - * while we hold the lock. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl | INFINIPATH_S_DISARM | - (i << INFINIPATH_S_DISARMPIOBUF_SHIFT)); - /* can't disarm bufs back-to-back per iba7220 spec */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - } - /* on some older chips, update may not happen after cancel */ - ipath_force_pio_avail_update(dd); -} - -/** - * ipath_wait_linkstate - wait for an IB link state change to occur - * @dd: the infinipath device - * @state: the state to wait for - * @msecs: the number of milliseconds to wait - * - * wait up to msecs milliseconds for IB link state change to occur for - * now, take the easy polling route. Currently used only by - * ipath_set_linkstate. Returns 0 if state reached, otherwise - * -ETIMEDOUT state can have multiple states set, for any of several - * transitions. - */ -int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) -{ - dd->ipath_state_wanted = state; - wait_event_interruptible_timeout(ipath_state_wait, - (dd->ipath_flags & state), - msecs_to_jiffies(msecs)); - dd->ipath_state_wanted = 0; - - if (!(dd->ipath_flags & state)) { - u64 val; - ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u" - " ms\n", - /* test INIT ahead of DOWN, both can be set */ - (state & IPATH_LINKINIT) ? "INIT" : - ((state & IPATH_LINKDOWN) ? "DOWN" : - ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")), - msecs); - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); - ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n", - (unsigned long long) ipath_read_kreg64( - dd, dd->ipath_kregs->kr_ibcctrl), - (unsigned long long) val, - ipath_ibcstatus_str[val & dd->ibcs_lts_mask]); - } - return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; -} - -static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err, - char *buf, size_t blen) -{ - static const struct { - ipath_err_t err; - const char *msg; - } errs[] = { - { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" }, - { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" }, - { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" }, - { INFINIPATH_E_SDMABASE, "SDmaBase" }, - { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" }, - { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" }, - { INFINIPATH_E_SDMADWEN, "SDmaDwEn" }, - { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" }, - { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" }, - { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" }, - { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" }, - { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" }, - }; - int i; - int expected; - size_t bidx = 0; - - for (i = 0; i < ARRAY_SIZE(errs); i++) { - expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 : - test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); - if ((err & errs[i].err) && !expected) - bidx += snprintf(buf + bidx, blen - bidx, - "%s ", errs[i].msg); - } -} - -/* - * Decode the error status into strings, deciding whether to always - * print * it or not depending on "normal packet errors" vs everything - * else. Return 1 if "real" errors, otherwise 0 if only packet - * errors, so caller can decide what to print with the string. - */ -int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, - ipath_err_t err) -{ - int iserr = 1; - *buf = '\0'; - if (err & INFINIPATH_E_PKTERRS) { - if (!(err & ~INFINIPATH_E_PKTERRS)) - iserr = 0; // if only packet errors. - if (ipath_debug & __IPATH_ERRPKTDBG) { - if (err & INFINIPATH_E_REBP) - strlcat(buf, "EBP ", blen); - if (err & INFINIPATH_E_RVCRC) - strlcat(buf, "VCRC ", blen); - if (err & INFINIPATH_E_RICRC) { - strlcat(buf, "CRC ", blen); - // clear for check below, so only once - err &= INFINIPATH_E_RICRC; - } - if (err & INFINIPATH_E_RSHORTPKTLEN) - strlcat(buf, "rshortpktlen ", blen); - if (err & INFINIPATH_E_SDROPPEDDATAPKT) - strlcat(buf, "sdroppeddatapkt ", blen); - if (err & INFINIPATH_E_SPKTLEN) - strlcat(buf, "spktlen ", blen); - } - if ((err & INFINIPATH_E_RICRC) && - !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP))) - strlcat(buf, "CRC ", blen); - if (!iserr) - goto done; - } - if (err & INFINIPATH_E_RHDRLEN) - strlcat(buf, "rhdrlen ", blen); - if (err & INFINIPATH_E_RBADTID) - strlcat(buf, "rbadtid ", blen); - if (err & INFINIPATH_E_RBADVERSION) - strlcat(buf, "rbadversion ", blen); - if (err & INFINIPATH_E_RHDR) - strlcat(buf, "rhdr ", blen); - if (err & INFINIPATH_E_SENDSPECIALTRIGGER) - strlcat(buf, "sendspecialtrigger ", blen); - if (err & INFINIPATH_E_RLONGPKTLEN) - strlcat(buf, "rlongpktlen ", blen); - if (err & INFINIPATH_E_RMAXPKTLEN) - strlcat(buf, "rmaxpktlen ", blen); - if (err & INFINIPATH_E_RMINPKTLEN) - strlcat(buf, "rminpktlen ", blen); - if (err & INFINIPATH_E_SMINPKTLEN) - strlcat(buf, "sminpktlen ", blen); - if (err & INFINIPATH_E_RFORMATERR) - strlcat(buf, "rformaterr ", blen); - if (err & INFINIPATH_E_RUNSUPVL) - strlcat(buf, "runsupvl ", blen); - if (err & INFINIPATH_E_RUNEXPCHAR) - strlcat(buf, "runexpchar ", blen); - if (err & INFINIPATH_E_RIBFLOW) - strlcat(buf, "ribflow ", blen); - if (err & INFINIPATH_E_SUNDERRUN) - strlcat(buf, "sunderrun ", blen); - if (err & INFINIPATH_E_SPIOARMLAUNCH) - strlcat(buf, "spioarmlaunch ", blen); - if (err & INFINIPATH_E_SUNEXPERRPKTNUM) - strlcat(buf, "sunexperrpktnum ", blen); - if (err & INFINIPATH_E_SDROPPEDSMPPKT) - strlcat(buf, "sdroppedsmppkt ", blen); - if (err & INFINIPATH_E_SMAXPKTLEN) - strlcat(buf, "smaxpktlen ", blen); - if (err & INFINIPATH_E_SUNSUPVL) - strlcat(buf, "sunsupVL ", blen); - if (err & INFINIPATH_E_INVALIDADDR) - strlcat(buf, "invalidaddr ", blen); - if (err & INFINIPATH_E_RRCVEGRFULL) - strlcat(buf, "rcvegrfull ", blen); - if (err & INFINIPATH_E_RRCVHDRFULL) - strlcat(buf, "rcvhdrfull ", blen); - if (err & INFINIPATH_E_IBSTATUSCHANGED) - strlcat(buf, "ibcstatuschg ", blen); - if (err & INFINIPATH_E_RIBLOSTLINK) - strlcat(buf, "riblostlink ", blen); - if (err & INFINIPATH_E_HARDWARE) - strlcat(buf, "hardware ", blen); - if (err & INFINIPATH_E_RESET) - strlcat(buf, "reset ", blen); - if (err & INFINIPATH_E_SDMAERRS) - decode_sdma_errs(dd, err, buf, blen); - if (err & INFINIPATH_E_INVALIDEEPCMD) - strlcat(buf, "invalideepromcmd ", blen); -done: - return iserr; -} - -/** - * get_rhf_errstring - decode RHF errors - * @err: the err number - * @msg: the output buffer - * @len: the length of the output buffer - * - * only used one place now, may want more later - */ -static void get_rhf_errstring(u32 err, char *msg, size_t len) -{ - /* if no errors, and so don't need to check what's first */ - *msg = '\0'; - - if (err & INFINIPATH_RHF_H_ICRCERR) - strlcat(msg, "icrcerr ", len); - if (err & INFINIPATH_RHF_H_VCRCERR) - strlcat(msg, "vcrcerr ", len); - if (err & INFINIPATH_RHF_H_PARITYERR) - strlcat(msg, "parityerr ", len); - if (err & INFINIPATH_RHF_H_LENERR) - strlcat(msg, "lenerr ", len); - if (err & INFINIPATH_RHF_H_MTUERR) - strlcat(msg, "mtuerr ", len); - if (err & INFINIPATH_RHF_H_IHDRERR) - /* infinipath hdr checksum error */ - strlcat(msg, "ipathhdrerr ", len); - if (err & INFINIPATH_RHF_H_TIDERR) - strlcat(msg, "tiderr ", len); - if (err & INFINIPATH_RHF_H_MKERR) - /* bad port, offset, etc. */ - strlcat(msg, "invalid ipathhdr ", len); - if (err & INFINIPATH_RHF_H_IBERR) - strlcat(msg, "iberr ", len); - if (err & INFINIPATH_RHF_L_SWA) - strlcat(msg, "swA ", len); - if (err & INFINIPATH_RHF_L_SWB) - strlcat(msg, "swB ", len); -} - -/** - * ipath_get_egrbuf - get an eager buffer - * @dd: the infinipath device - * @bufnum: the eager buffer to get - * - * must only be called if ipath_pd[port] is known to be allocated - */ -static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum) -{ - return dd->ipath_port0_skbinfo ? - (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; -} - -/** - * ipath_alloc_skb - allocate an skb and buffer with possible constraints - * @dd: the infinipath device - * @gfp_mask: the sk_buff SFP mask - */ -struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, - gfp_t gfp_mask) -{ - struct sk_buff *skb; - u32 len; - - /* - * Only fully supported way to handle this is to allocate lots - * extra, align as needed, and then do skb_reserve(). That wastes - * a lot of memory... I'll have to hack this into infinipath_copy - * also. - */ - - /* - * We need 2 extra bytes for ipath_ether data sent in the - * key header. In order to keep everything dword aligned, - * we'll reserve 4 bytes. - */ - len = dd->ipath_ibmaxlen + 4; - - if (dd->ipath_flags & IPATH_4BYTE_TID) { - /* We need a 2KB multiple alignment, and there is no way - * to do it except to allocate extra and then skb_reserve - * enough to bring it up to the right alignment. - */ - len += 2047; - } - - skb = __dev_alloc_skb(len, gfp_mask); - if (!skb) { - ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n", - len); - goto bail; - } - - skb_reserve(skb, 4); - - if (dd->ipath_flags & IPATH_4BYTE_TID) { - u32 una = (unsigned long)skb->data & 2047; - if (una) - skb_reserve(skb, 2048 - una); - } - -bail: - return skb; -} - -static void ipath_rcv_hdrerr(struct ipath_devdata *dd, - u32 eflags, - u32 l, - u32 etail, - __le32 *rhf_addr, - struct ipath_message_header *hdr) -{ - char emsg[128]; - - get_rhf_errstring(eflags, emsg, sizeof emsg); - ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u " - "tlen=%x opcode=%x egridx=%x: %s\n", - eflags, l, - ipath_hdrget_rcv_type(rhf_addr), - ipath_hdrget_length_in_bytes(rhf_addr), - be32_to_cpu(hdr->bth[0]) >> 24, - etail, emsg); - - /* Count local link integrity errors. */ - if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) { - u8 n = (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; - - if (++dd->ipath_lli_counter > n) { - dd->ipath_lli_counter = 0; - dd->ipath_lli_errors++; - } - } -} - -/* - * ipath_kreceive - receive a packet - * @pd: the infinipath port - * - * called from interrupt handler for errors or receive interrupt - */ -void ipath_kreceive(struct ipath_portdata *pd) -{ - struct ipath_devdata *dd = pd->port_dd; - __le32 *rhf_addr; - void *ebuf; - const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ - const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ - u32 etail = -1, l, hdrqtail; - struct ipath_message_header *hdr; - u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0; - static u64 totcalls; /* stats, may eventually remove */ - int last; - - l = pd->port_head; - rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset; - if (dd->ipath_flags & IPATH_NODMA_RTAIL) { - u32 seq = ipath_hdrget_seq(rhf_addr); - - if (seq != pd->port_seq_cnt) - goto bail; - hdrqtail = 0; - } else { - hdrqtail = ipath_get_rcvhdrtail(pd); - if (l == hdrqtail) - goto bail; - smp_rmb(); - } - -reloop: - for (last = 0, i = 1; !last; i += !last) { - hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); - eflags = ipath_hdrget_err_flags(rhf_addr); - etype = ipath_hdrget_rcv_type(rhf_addr); - /* total length */ - tlen = ipath_hdrget_length_in_bytes(rhf_addr); - ebuf = NULL; - if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ? - ipath_hdrget_use_egr_buf(rhf_addr) : - (etype != RCVHQ_RCV_TYPE_EXPECTED)) { - /* - * It turns out that the chip uses an eager buffer - * for all non-expected packets, whether it "needs" - * one or not. So always get the index, but don't - * set ebuf (so we try to copy data) unless the - * length requires it. - */ - etail = ipath_hdrget_index(rhf_addr); - updegr = 1; - if (tlen > sizeof(*hdr) || - etype == RCVHQ_RCV_TYPE_NON_KD) - ebuf = ipath_get_egrbuf(dd, etail); - } - - /* - * both tiderr and ipathhdrerr are set for all plain IB - * packets; only ipathhdrerr should be set. - */ - - if (etype != RCVHQ_RCV_TYPE_NON_KD && - etype != RCVHQ_RCV_TYPE_ERROR && - ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) != - IPS_PROTO_VERSION) - ipath_cdbg(PKT, "Bad InfiniPath protocol version " - "%x\n", etype); - - if (unlikely(eflags)) - ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr); - else if (etype == RCVHQ_RCV_TYPE_NON_KD) { - ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen); - if (dd->ipath_lli_counter) - dd->ipath_lli_counter--; - } else if (etype == RCVHQ_RCV_TYPE_EAGER) { - u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24; - u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff; - ipath_cdbg(PKT, "typ %x, opcode %x (eager, " - "qp=%x), len %x; ignored\n", - etype, opcode, qp, tlen); - } - else if (etype == RCVHQ_RCV_TYPE_EXPECTED) - ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", - be32_to_cpu(hdr->bth[0]) >> 24); - else { - /* - * error packet, type of error unknown. - * Probably type 3, but we don't know, so don't - * even try to print the opcode, etc. - * Usually caused by a "bad packet", that has no - * BTH, when the LRH says it should. - */ - ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf" - " %x, len %x hdrq+%x rhf: %Lx\n", - etail, tlen, l, (unsigned long long) - le64_to_cpu(*(__le64 *) rhf_addr)); - if (ipath_debug & __IPATH_ERRPKTDBG) { - u32 j, *d, dw = rsize-2; - if (rsize > (tlen>>2)) - dw = tlen>>2; - d = (u32 *)hdr; - printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n", - dw); - for (j = 0; j < dw; j++) - printk(KERN_DEBUG "%8x%s", d[j], - (j%8) == 7 ? "\n" : " "); - printk(KERN_DEBUG ".\n"); - } - } - l += rsize; - if (l >= maxcnt) - l = 0; - rhf_addr = (__le32 *) pd->port_rcvhdrq + - l + dd->ipath_rhf_offset; - if (dd->ipath_flags & IPATH_NODMA_RTAIL) { - u32 seq = ipath_hdrget_seq(rhf_addr); - - if (++pd->port_seq_cnt > 13) - pd->port_seq_cnt = 1; - if (seq != pd->port_seq_cnt) - last = 1; - } else if (l == hdrqtail) - last = 1; - /* - * update head regs on last packet, and every 16 packets. - * Reduce bus traffic, while still trying to prevent - * rcvhdrq overflows, for when the queue is nearly full - */ - if (last || !(i & 0xf)) { - u64 lval = l; - - /* request IBA6120 and 7220 interrupt only on last */ - if (last) - lval |= dd->ipath_rhdrhead_intr_off; - ipath_write_ureg(dd, ur_rcvhdrhead, lval, - pd->port_port); - if (updegr) { - ipath_write_ureg(dd, ur_rcvegrindexhead, - etail, pd->port_port); - updegr = 0; - } - } - } - - if (!dd->ipath_rhdrhead_intr_off && !reloop && - !(dd->ipath_flags & IPATH_NODMA_RTAIL)) { - /* IBA6110 workaround; we can have a race clearing chip - * interrupt with another interrupt about to be delivered, - * and can clear it before it is delivered on the GPIO - * workaround. By doing the extra check here for the - * in-memory tail register updating while we were doing - * earlier packets, we "almost" guarantee we have covered - * that case. - */ - u32 hqtail = ipath_get_rcvhdrtail(pd); - if (hqtail != hdrqtail) { - hdrqtail = hqtail; - reloop = 1; /* loop 1 extra time at most */ - goto reloop; - } - } - - pkttot += i; - - pd->port_head = l; - - if (pkttot > ipath_stats.sps_maxpkts_call) - ipath_stats.sps_maxpkts_call = pkttot; - ipath_stats.sps_port0pkts += pkttot; - ipath_stats.sps_avgpkts_call = - ipath_stats.sps_port0pkts / ++totcalls; - -bail:; -} - -/** - * ipath_update_pio_bufs - update shadow copy of the PIO availability map - * @dd: the infinipath device - * - * called whenever our local copy indicates we have run out of send buffers - * NOTE: This can be called from interrupt context by some code - * and from non-interrupt context by ipath_getpiobuf(). - */ - -static void ipath_update_pio_bufs(struct ipath_devdata *dd) -{ - unsigned long flags; - int i; - const unsigned piobregs = (unsigned)dd->ipath_pioavregs; - - /* If the generation (check) bits have changed, then we update the - * busy bit for the corresponding PIO buffer. This algorithm will - * modify positions to the value they already have in some cases - * (i.e., no change), but it's faster than changing only the bits - * that have changed. - * - * We would like to do this atomicly, to avoid spinlocks in the - * critical send path, but that's not really possible, given the - * type of changes, and that this routine could be called on - * multiple cpu's simultaneously, so we lock in this routine only, - * to avoid conflicting updates; all we change is the shadow, and - * it's a single 64 bit memory location, so by definition the update - * is atomic in terms of what other cpu's can see in testing the - * bits. The spin_lock overhead isn't too bad, since it only - * happens when all buffers are in use, so only cpu overhead, not - * latency or bandwidth is affected. - */ - if (!dd->ipath_pioavailregs_dma) { - ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n"); - return; - } - if (ipath_debug & __IPATH_VERBDBG) { - /* only if packet debug and verbose */ - volatile __le64 *dma = dd->ipath_pioavailregs_dma; - unsigned long *shadow = dd->ipath_pioavailshadow; - - ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, " - "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx " - "s3=%lx\n", - (unsigned long long) le64_to_cpu(dma[0]), - shadow[0], - (unsigned long long) le64_to_cpu(dma[1]), - shadow[1], - (unsigned long long) le64_to_cpu(dma[2]), - shadow[2], - (unsigned long long) le64_to_cpu(dma[3]), - shadow[3]); - if (piobregs > 4) - ipath_cdbg( - PKT, "2nd group, dma4=%llx shad4=%lx, " - "d5=%llx s5=%lx, d6=%llx s6=%lx, " - "d7=%llx s7=%lx\n", - (unsigned long long) le64_to_cpu(dma[4]), - shadow[4], - (unsigned long long) le64_to_cpu(dma[5]), - shadow[5], - (unsigned long long) le64_to_cpu(dma[6]), - shadow[6], - (unsigned long long) le64_to_cpu(dma[7]), - shadow[7]); - } - spin_lock_irqsave(&ipath_pioavail_lock, flags); - for (i = 0; i < piobregs; i++) { - u64 pchbusy, pchg, piov, pnew; - /* - * Chip Errata: bug 6641; even and odd qwords>3 are swapped - */ - if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) - piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]); - else - piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); - pchg = dd->ipath_pioavailkernel[i] & - ~(dd->ipath_pioavailshadow[i] ^ piov); - pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT; - if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) { - pnew = dd->ipath_pioavailshadow[i] & ~pchbusy; - pnew |= piov & pchbusy; - dd->ipath_pioavailshadow[i] = pnew; - } - } - spin_unlock_irqrestore(&ipath_pioavail_lock, flags); -} - -/* - * used to force update of pioavailshadow if we can't get a pio buffer. - * Needed primarily due to exitting freeze mode after recovering - * from errors. Done lazily, because it's safer (known to not - * be writing pio buffers). - */ -static void ipath_reset_availshadow(struct ipath_devdata *dd) -{ - int i, im; - unsigned long flags; - - spin_lock_irqsave(&ipath_pioavail_lock, flags); - for (i = 0; i < dd->ipath_pioavregs; i++) { - u64 val, oldval; - /* deal with 6110 chip bug on high register #s */ - im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? - i ^ 1 : i; - val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]); - /* - * busy out the buffers not in the kernel avail list, - * without changing the generation bits. - */ - oldval = dd->ipath_pioavailshadow[i]; - dd->ipath_pioavailshadow[i] = val | - ((~dd->ipath_pioavailkernel[i] << - INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) & - 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ - if (oldval != dd->ipath_pioavailshadow[i]) - ipath_dbg("shadow[%d] was %Lx, now %lx\n", - i, (unsigned long long) oldval, - dd->ipath_pioavailshadow[i]); - } - spin_unlock_irqrestore(&ipath_pioavail_lock, flags); -} - -/** - * ipath_setrcvhdrsize - set the receive header size - * @dd: the infinipath device - * @rhdrsize: the receive header size - * - * called from user init code, and also layered driver init - */ -int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) -{ - int ret = 0; - - if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) { - if (dd->ipath_rcvhdrsize != rhdrsize) { - dev_info(&dd->pcidev->dev, - "Error: can't set protocol header " - "size %u, already %u\n", - rhdrsize, dd->ipath_rcvhdrsize); - ret = -EAGAIN; - } else - ipath_cdbg(VERBOSE, "Reuse same protocol header " - "size %u\n", dd->ipath_rcvhdrsize); - } else if (rhdrsize > (dd->ipath_rcvhdrentsize - - (sizeof(u64) / sizeof(u32)))) { - ipath_dbg("Error: can't set protocol header size %u " - "(> max %u)\n", rhdrsize, - dd->ipath_rcvhdrentsize - - (u32) (sizeof(u64) / sizeof(u32))); - ret = -EOVERFLOW; - } else { - dd->ipath_flags |= IPATH_RCVHDRSZ_SET; - dd->ipath_rcvhdrsize = rhdrsize; - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, - dd->ipath_rcvhdrsize); - ipath_cdbg(VERBOSE, "Set protocol header size to %u\n", - dd->ipath_rcvhdrsize); - } - return ret; -} - -/* - * debugging code and stats updates if no pio buffers available. - */ -static noinline void no_pio_bufs(struct ipath_devdata *dd) -{ - unsigned long *shadow = dd->ipath_pioavailshadow; - __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma; - - dd->ipath_upd_pio_shadow = 1; - - /* - * not atomic, but if we lose a stat count in a while, that's OK - */ - ipath_stats.sps_nopiobufs++; - if (!(++dd->ipath_consec_nopiobuf % 100000)) { - ipath_force_pio_avail_update(dd); /* at start */ - ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: " - "%llx %llx %llx %llx\n" - "ipath shadow: %lx %lx %lx %lx\n", - dd->ipath_consec_nopiobuf, - (unsigned long)get_cycles(), - (unsigned long long) le64_to_cpu(dma[0]), - (unsigned long long) le64_to_cpu(dma[1]), - (unsigned long long) le64_to_cpu(dma[2]), - (unsigned long long) le64_to_cpu(dma[3]), - shadow[0], shadow[1], shadow[2], shadow[3]); - /* - * 4 buffers per byte, 4 registers above, cover rest - * below - */ - if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > - (sizeof(shadow[0]) * 4 * 4)) - ipath_dbg("2nd group: dmacopy: " - "%llx %llx %llx %llx\n" - "ipath shadow: %lx %lx %lx %lx\n", - (unsigned long long)le64_to_cpu(dma[4]), - (unsigned long long)le64_to_cpu(dma[5]), - (unsigned long long)le64_to_cpu(dma[6]), - (unsigned long long)le64_to_cpu(dma[7]), - shadow[4], shadow[5], shadow[6], shadow[7]); - - /* at end, so update likely happened */ - ipath_reset_availshadow(dd); - } -} - -/* - * common code for normal driver pio buffer allocation, and reserved - * allocation. - * - * do appropriate marking as busy, etc. - * returns buffer number if one found (>=0), negative number is error. - */ -static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd, - u32 *pbufnum, u32 first, u32 last, u32 firsti) -{ - int i, j, updated = 0; - unsigned piobcnt; - unsigned long flags; - unsigned long *shadow = dd->ipath_pioavailshadow; - u32 __iomem *buf; - - piobcnt = last - first; - if (dd->ipath_upd_pio_shadow) { - /* - * Minor optimization. If we had no buffers on last call, - * start out by doing the update; continue and do scan even - * if no buffers were updated, to be paranoid - */ - ipath_update_pio_bufs(dd); - updated++; - i = first; - } else - i = firsti; -rescan: - /* - * while test_and_set_bit() is atomic, we do that and then the - * change_bit(), and the pair is not. See if this is the cause - * of the remaining armlaunch errors. - */ - spin_lock_irqsave(&ipath_pioavail_lock, flags); - for (j = 0; j < piobcnt; j++, i++) { - if (i >= last) - i = first; - if (__test_and_set_bit((2 * i) + 1, shadow)) - continue; - /* flip generation bit */ - __change_bit(2 * i, shadow); - break; - } - spin_unlock_irqrestore(&ipath_pioavail_lock, flags); - - if (j == piobcnt) { - if (!updated) { - /* - * first time through; shadow exhausted, but may be - * buffers available, try an update and then rescan. - */ - ipath_update_pio_bufs(dd); - updated++; - i = first; - goto rescan; - } else if (updated == 1 && piobcnt <= - ((dd->ipath_sendctrl - >> INFINIPATH_S_UPDTHRESH_SHIFT) & - INFINIPATH_S_UPDTHRESH_MASK)) { - /* - * for chips supporting and using the update - * threshold we need to force an update of the - * in-memory copy if the count is less than the - * thershold, then check one more time. - */ - ipath_force_pio_avail_update(dd); - ipath_update_pio_bufs(dd); - updated++; - i = first; - goto rescan; - } - - no_pio_bufs(dd); - buf = NULL; - } else { - if (i < dd->ipath_piobcnt2k) - buf = (u32 __iomem *) (dd->ipath_pio2kbase + - i * dd->ipath_palign); - else - buf = (u32 __iomem *) - (dd->ipath_pio4kbase + - (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign); - if (pbufnum) - *pbufnum = i; - } - - return buf; -} - -/** - * ipath_getpiobuf - find an available pio buffer - * @dd: the infinipath device - * @plen: the size of the PIO buffer needed in 32-bit words - * @pbufnum: the buffer number is placed here - */ -u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum) -{ - u32 __iomem *buf; - u32 pnum, nbufs; - u32 first, lasti; - - if (plen + 1 >= IPATH_SMALLBUF_DWORDS) { - first = dd->ipath_piobcnt2k; - lasti = dd->ipath_lastpioindexl; - } else { - first = 0; - lasti = dd->ipath_lastpioindex; - } - nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; - buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti); - - if (buf) { - /* - * Set next starting place. It's just an optimization, - * it doesn't matter who wins on this, so no locking - */ - if (plen + 1 >= IPATH_SMALLBUF_DWORDS) - dd->ipath_lastpioindexl = pnum + 1; - else - dd->ipath_lastpioindex = pnum + 1; - if (dd->ipath_upd_pio_shadow) - dd->ipath_upd_pio_shadow = 0; - if (dd->ipath_consec_nopiobuf) - dd->ipath_consec_nopiobuf = 0; - ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n", - pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf); - if (pbufnum) - *pbufnum = pnum; - - } - return buf; -} - -/** - * ipath_chg_pioavailkernel - change which send buffers are available for kernel - * @dd: the infinipath device - * @start: the starting send buffer number - * @len: the number of send buffers - * @avail: true if the buffers are available for kernel use, false otherwise - */ -void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, - unsigned len, int avail) -{ - unsigned long flags; - unsigned end, cnt = 0; - - /* There are two bits per send buffer (busy and generation) */ - start *= 2; - end = start + len * 2; - - spin_lock_irqsave(&ipath_pioavail_lock, flags); - /* Set or clear the busy bit in the shadow. */ - while (start < end) { - if (avail) { - unsigned long dma; - int i, im; - /* - * the BUSY bit will never be set, because we disarm - * the user buffers before we hand them back to the - * kernel. We do have to make sure the generation - * bit is set correctly in shadow, since it could - * have changed many times while allocated to user. - * We can't use the bitmap functions on the full - * dma array because it is always little-endian, so - * we have to flip to host-order first. - * BITS_PER_LONG is slightly wrong, since it's - * always 64 bits per register in chip... - * We only work on 64 bit kernels, so that's OK. - */ - /* deal with 6110 chip bug on high register #s */ - i = start / BITS_PER_LONG; - im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? - i ^ 1 : i; - __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT - + start, dd->ipath_pioavailshadow); - dma = (unsigned long) le64_to_cpu( - dd->ipath_pioavailregs_dma[im]); - if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT - + start) % BITS_PER_LONG, &dma)) - __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT - + start, dd->ipath_pioavailshadow); - else - __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT - + start, dd->ipath_pioavailshadow); - __set_bit(start, dd->ipath_pioavailkernel); - } else { - __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, - dd->ipath_pioavailshadow); - __clear_bit(start, dd->ipath_pioavailkernel); - } - start += 2; - } - - if (dd->ipath_pioupd_thresh) { - end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); - cnt = bitmap_weight(dd->ipath_pioavailkernel, end); - } - spin_unlock_irqrestore(&ipath_pioavail_lock, flags); - - /* - * When moving buffers from kernel to user, if number assigned to - * the user is less than the pio update threshold, and threshold - * is supported (cnt was computed > 0), drop the update threshold - * so we update at least once per allocated number of buffers. - * In any case, if the kernel buffers are less than the threshold, - * drop the threshold. We don't bother increasing it, having once - * decreased it, since it would typically just cycle back and forth. - * If we don't decrease below buffers in use, we can wait a long - * time for an update, until some other context uses PIO buffers. - */ - if (!avail && len < cnt) - cnt = len; - if (cnt < dd->ipath_pioupd_thresh) { - dd->ipath_pioupd_thresh = cnt; - ipath_dbg("Decreased pio update threshold to %u\n", - dd->ipath_pioupd_thresh); - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK - << INFINIPATH_S_UPDTHRESH_SHIFT); - dd->ipath_sendctrl |= dd->ipath_pioupd_thresh - << INFINIPATH_S_UPDTHRESH_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - } -} - -/** - * ipath_create_rcvhdrq - create a receive header queue - * @dd: the infinipath device - * @pd: the port data - * - * this must be contiguous memory (from an i/o perspective), and must be - * DMA'able (which means for some systems, it will go through an IOMMU, - * or be forced into a low address range). - */ -int ipath_create_rcvhdrq(struct ipath_devdata *dd, - struct ipath_portdata *pd) -{ - int ret = 0; - - if (!pd->port_rcvhdrq) { - dma_addr_t phys_hdrqtail; - gfp_t gfp_flags = GFP_USER | __GFP_COMP; - int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * - sizeof(u32), PAGE_SIZE); - - pd->port_rcvhdrq = dma_alloc_coherent( - &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys, - gfp_flags); - - if (!pd->port_rcvhdrq) { - ipath_dev_err(dd, "attempt to allocate %d bytes " - "for port %u rcvhdrq failed\n", - amt, pd->port_port); - ret = -ENOMEM; - goto bail; - } - - if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { - pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent( - &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, - GFP_KERNEL); - if (!pd->port_rcvhdrtail_kvaddr) { - ipath_dev_err(dd, "attempt to allocate 1 page " - "for port %u rcvhdrqtailaddr " - "failed\n", pd->port_port); - ret = -ENOMEM; - dma_free_coherent(&dd->pcidev->dev, amt, - pd->port_rcvhdrq, - pd->port_rcvhdrq_phys); - pd->port_rcvhdrq = NULL; - goto bail; - } - pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; - ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx " - "physical\n", pd->port_port, - (unsigned long long) phys_hdrqtail); - } - - pd->port_rcvhdrq_size = amt; - - ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu " - "for port %u rcvhdr Q\n", - amt >> PAGE_SHIFT, pd->port_rcvhdrq, - (unsigned long) pd->port_rcvhdrq_phys, - (unsigned long) pd->port_rcvhdrq_size, - pd->port_port); - } - else - ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; " - "hdrtailaddr@%p %llx physical\n", - pd->port_port, pd->port_rcvhdrq, - (unsigned long long) pd->port_rcvhdrq_phys, - pd->port_rcvhdrtail_kvaddr, (unsigned long long) - pd->port_rcvhdrqtailaddr_phys); - - /* clear for security and sanity on each use */ - memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); - if (pd->port_rcvhdrtail_kvaddr) - memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); - - /* - * tell chip each time we init it, even if we are re-using previous - * memory (we zero the register at process close) - */ - ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr, - pd->port_port, pd->port_rcvhdrqtailaddr_phys); - ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, - pd->port_port, pd->port_rcvhdrq_phys); - -bail: - return ret; -} - - -/* - * Flush all sends that might be in the ready to send state, as well as any - * that are in the process of being sent. Used whenever we need to be - * sure the send side is idle. Cleans up all buffer state by canceling - * all pio buffers, and issuing an abort, which cleans up anything in the - * launch fifo. The cancel is superfluous on some chip versions, but - * it's safer to always do it. - * PIOAvail bits are updated by the chip as if normal send had happened. - */ -void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) -{ - unsigned long flags; - - if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { - ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n"); - goto bail; - } - /* - * If we have SDMA, and it's not disabled, we have to kick off the - * abort state machine, provided we aren't already aborting. - * If we are in the process of aborting SDMA (!DISABLED, but ABORTING), - * we skip the rest of this routine. It is already "in progress" - */ - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { - int skip_cancel; - unsigned long *statp = &dd->ipath_sdma_status; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - skip_cancel = - test_and_set_bit(IPATH_SDMA_ABORTING, statp) - && !test_bit(IPATH_SDMA_DISABLED, statp); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (skip_cancel) - goto bail; - } - - ipath_dbg("Cancelling all in-progress send buffers\n"); - - /* skip armlaunch errs for a while */ - dd->ipath_lastcancel = jiffies + HZ / 2; - - /* - * The abort bit is auto-clearing. We also don't want pioavail - * update happening during this, and we don't want any other - * sends going out, so turn those off for the duration. We read - * the scratch register to be sure that cancels and the abort - * have taken effect in the chip. Otherwise two parts are same - * as ipath_force_pio_avail_update() - */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD - | INFINIPATH_S_PIOENABLE); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl | INFINIPATH_S_ABORT); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* disarm all send buffers */ - ipath_disarm_piobufs(dd, 0, - dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); - - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); - - if (restore_sendctrl) { - /* else done by caller later if needed */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD | - INFINIPATH_S_PIOENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - /* and again, be sure all have hit the chip */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - } - - if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) && - !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) && - test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) { - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - /* only wait so long for intr */ - dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; - dd->ipath_sdma_reset_wait = 200; - if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - tasklet_hi_schedule(&dd->ipath_sdma_abort_task); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - } -bail:; -} - -/* - * Force an update of in-memory copy of the pioavail registers, when - * needed for any of a variety of reasons. We read the scratch register - * to make it highly likely that the update will have happened by the - * time we return. If already off (as in cancel_sends above), this - * routine is a nop, on the assumption that the caller will "do the - * right thing". - */ -void ipath_force_pio_avail_update(struct ipath_devdata *dd) -{ - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - } - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); -} - -static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, - int linitcmd) -{ - u64 mod_wd; - static const char *what[4] = { - [0] = "NOP", - [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN", - [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", - [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" - }; - - if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) { - /* - * If we are told to disable, note that so link-recovery - * code does not attempt to bring us back up. - */ - preempt_disable(); - dd->ipath_flags |= IPATH_IB_LINK_DISABLED; - preempt_enable(); - } else if (linitcmd) { - /* - * Any other linkinitcmd will lead to LINKDOWN and then - * to INIT (if all is well), so clear flag to let - * link-recovery code attempt to bring us back up. - */ - preempt_disable(); - dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; - preempt_enable(); - } - - mod_wd = (linkcmd << dd->ibcc_lc_shift) | - (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT); - ipath_cdbg(VERBOSE, - "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n", - dd->ipath_unit, what[linkcmd], linitcmd, - ipath_ibcstatus_str[ipath_ib_linktrstate(dd, - ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl | mod_wd); - /* read from chip so write is flushed */ - (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); -} - -int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) -{ - u32 lstate; - int ret; - - switch (newstate) { - case IPATH_IB_LINKDOWN_ONLY: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, - INFINIPATH_IBCC_LINKINITCMD_POLL); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN_SLEEP: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, - INFINIPATH_IBCC_LINKINITCMD_SLEEP); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN_DISABLE: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, - INFINIPATH_IBCC_LINKINITCMD_DISABLE); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKARM: - if (dd->ipath_flags & IPATH_LINKARMED) { - ret = 0; - goto bail; - } - if (!(dd->ipath_flags & - (IPATH_LINKINIT | IPATH_LINKACTIVE))) { - ret = -EINVAL; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0); - - /* - * Since the port can transition to ACTIVE by receiving - * a non VL 15 packet, wait for either state. - */ - lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; - break; - - case IPATH_IB_LINKACTIVE: - if (dd->ipath_flags & IPATH_LINKACTIVE) { - ret = 0; - goto bail; - } - if (!(dd->ipath_flags & IPATH_LINKARMED)) { - ret = -EINVAL; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0); - lstate = IPATH_LINKACTIVE; - break; - - case IPATH_IB_LINK_LOOPBACK: - dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n"); - dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - - /* turn heartbeat off, as it causes loopback to fail */ - dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, - IPATH_IB_HRTBT_OFF); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINK_EXTERNAL: - dev_info(&dd->pcidev->dev, - "Disabling IB local loopback (normal)\n"); - dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, - IPATH_IB_HRTBT_ON); - dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - /* don't wait */ - ret = 0; - goto bail; - - /* - * Heartbeat can be explicitly enabled by the user via - * "hrtbt_enable" "file", and if disabled, trying to enable here - * will have no effect. Implicit changes (heartbeat off when - * loopback on, and vice versa) are included to ease testing. - */ - case IPATH_IB_LINK_HRTBT: - ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, - IPATH_IB_HRTBT_ON); - goto bail; - - case IPATH_IB_LINK_NO_HRTBT: - ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, - IPATH_IB_HRTBT_OFF); - goto bail; - - default: - ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); - ret = -EINVAL; - goto bail; - } - ret = ipath_wait_linkstate(dd, lstate, 2000); - -bail: - return ret; -} - -/** - * ipath_set_mtu - set the MTU - * @dd: the infinipath device - * @arg: the new MTU - * - * we can handle "any" incoming size, the issue here is whether we - * need to restrict our outgoing size. For now, we don't do any - * sanity checking on this, and we don't deal with what happens to - * programs that are already running when the size changes. - * NOTE: changing the MTU will usually cause the IBC to go back to - * link INIT state... - */ -int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) -{ - u32 piosize; - int changed = 0; - int ret; - - /* - * mtu is IB data payload max. It's the largest power of 2 less - * than piosize (or even larger, since it only really controls the - * largest we can receive; we can send the max of the mtu and - * piosize). We check that it's one of the valid IB sizes. - */ - if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && - (arg != 4096 || !ipath_mtu4096)) { - ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); - ret = -EINVAL; - goto bail; - } - if (dd->ipath_ibmtu == arg) { - ret = 0; /* same as current */ - goto bail; - } - - piosize = dd->ipath_ibmaxlen; - dd->ipath_ibmtu = arg; - - if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { - /* Only if it's not the initial value (or reset to it) */ - if (piosize != dd->ipath_init_ibmaxlen) { - if (arg > piosize && arg <= dd->ipath_init_ibmaxlen) - piosize = dd->ipath_init_ibmaxlen; - dd->ipath_ibmaxlen = piosize; - changed = 1; - } - } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { - piosize = arg + IPATH_PIO_MAXIBHDR; - ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " - "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, - arg); - dd->ipath_ibmaxlen = piosize; - changed = 1; - } - - if (changed) { - u64 ibc = dd->ipath_ibcctrl, ibdw; - /* - * update our housekeeping variables, and set IBC max - * size, same as init code; max IBC is max we allow in - * buffer, less the qword pbc, plus 1 for ICRC, in dwords - */ - dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32); - ibdw = (dd->ipath_ibmaxlen >> 2) + 1; - ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << - dd->ibcc_mpl_shift); - ibc |= ibdw << dd->ibcc_mpl_shift; - dd->ipath_ibcctrl = ibc; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - dd->ipath_f_tidtemplate(dd); - } - - ret = 0; - -bail: - return ret; -} - -int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc) -{ - dd->ipath_lid = lid; - dd->ipath_lmc = lmc; - - dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid | - (~((1U << lmc) - 1)) << 16); - - dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid); - - return 0; -} - - -/** - * ipath_write_kreg_port - write a device's per-port 64-bit kernel register - * @dd: the infinipath device - * @regno: the register number to write - * @port: the port containing the register - * @value: the value to write - * - * Registers that vary with the chip implementation constants (port) - * use this routine. - */ -void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, - unsigned port, u64 value) -{ - u16 where; - - if (port < dd->ipath_portcnt && - (regno == dd->ipath_kregs->kr_rcvhdraddr || - regno == dd->ipath_kregs->kr_rcvhdrtailaddr)) - where = regno + port; - else - where = -1; - - ipath_write_kreg(dd, where, value); -} - -/* - * Following deal with the "obviously simple" task of overriding the state - * of the LEDS, which normally indicate link physical and logical status. - * The complications arise in dealing with different hardware mappings - * and the board-dependent routine being called from interrupts. - * and then there's the requirement to _flash_ them. - */ -#define LED_OVER_FREQ_SHIFT 8 -#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT) -/* Below is "non-zero" to force override, but both actual LEDs are off */ -#define LED_OVER_BOTH_OFF (8) - -static void ipath_run_led_override(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *)opaque; - int timeoff; - int pidx; - u64 lstate, ltstate, val; - - if (!(dd->ipath_flags & IPATH_INITTED)) - return; - - pidx = dd->ipath_led_override_phase++ & 1; - dd->ipath_led_override = dd->ipath_led_override_vals[pidx]; - timeoff = dd->ipath_led_override_timeoff; - - /* - * below potentially restores the LED values per current status, - * should also possibly setup the traffic-blink register, - * but leave that to per-chip functions. - */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); - ltstate = ipath_ib_linktrstate(dd, val); - lstate = ipath_ib_linkstate(dd, val); - - dd->ipath_f_setextled(dd, lstate, ltstate); - mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); -} - -void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) -{ - int timeoff, freq; - - if (!(dd->ipath_flags & IPATH_INITTED)) - return; - - /* First check if we are blinking. If not, use 1HZ polling */ - timeoff = HZ; - freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; - - if (freq) { - /* For blink, set each phase from one nybble of val */ - dd->ipath_led_override_vals[0] = val & 0xF; - dd->ipath_led_override_vals[1] = (val >> 4) & 0xF; - timeoff = (HZ << 4)/freq; - } else { - /* Non-blink set both phases the same. */ - dd->ipath_led_override_vals[0] = val & 0xF; - dd->ipath_led_override_vals[1] = val & 0xF; - } - dd->ipath_led_override_timeoff = timeoff; - - /* - * If the timer has not already been started, do so. Use a "quick" - * timeout so the function will be called soon, to look at our request. - */ - if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) { - /* Need to start timer */ - init_timer(&dd->ipath_led_override_timer); - dd->ipath_led_override_timer.function = - ipath_run_led_override; - dd->ipath_led_override_timer.data = (unsigned long) dd; - dd->ipath_led_override_timer.expires = jiffies + 1; - add_timer(&dd->ipath_led_override_timer); - } else - atomic_dec(&dd->ipath_led_override_timer_active); -} - -/** - * ipath_shutdown_device - shut down a device - * @dd: the infinipath device - * - * This is called to make the device quiet when we are about to - * unload the driver, and also when the device is administratively - * disabled. It does not free any data structures. - * Everything it does has to be setup again by ipath_init_chip(dd,1) - */ -void ipath_shutdown_device(struct ipath_devdata *dd) -{ - unsigned long flags; - - ipath_dbg("Shutting down the device\n"); - - ipath_hol_up(dd); /* make sure user processes aren't suspended */ - - dd->ipath_flags |= IPATH_LINKUNK; - dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN | - IPATH_LINKINIT | IPATH_LINKARMED | - IPATH_LINKACTIVE); - *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF | - IPATH_STATUS_IB_READY); - - /* mask interrupts, but not errors */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); - - dd->ipath_rcvctrl = 0; - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - teardown_sdma(dd); - - /* - * gracefully stop all sends allowing any in progress to trickle out - * first. - */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl = 0; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - /* flush it */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* - * enough for anything that's going to trickle out to have actually - * done so. - */ - udelay(5); - - dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */ - - ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE); - ipath_cancel_sends(dd, 0); - - /* - * we are shutting down, so tell components that care. We don't do - * this on just a link state change, much like ethernet, a cable - * unplug, etc. doesn't change driver state - */ - signal_ib_event(dd, IB_EVENT_PORT_ERR); - - /* disable IBC */ - dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control | INFINIPATH_C_FREEZEMODE); - - /* - * clear SerdesEnable and turn the leds off; do this here because - * we are unloading, so don't count on interrupts to move along - * Turn the LEDs off explicitly for the same reason. - */ - dd->ipath_f_quiet_serdes(dd); - - /* stop all the timers that might still be running */ - del_timer_sync(&dd->ipath_hol_timer); - if (dd->ipath_stats_timer_active) { - del_timer_sync(&dd->ipath_stats_timer); - dd->ipath_stats_timer_active = 0; - } - if (dd->ipath_intrchk_timer.data) { - del_timer_sync(&dd->ipath_intrchk_timer); - dd->ipath_intrchk_timer.data = 0; - } - if (atomic_read(&dd->ipath_led_override_timer_active)) { - del_timer_sync(&dd->ipath_led_override_timer); - atomic_set(&dd->ipath_led_override_timer_active, 0); - } - - /* - * clear all interrupts and errors, so that the next time the driver - * is loaded or device is enabled, we know that whatever is set - * happened while we were unloaded - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); - - ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n"); - ipath_update_eeprom_log(dd); -} - -/** - * ipath_free_pddata - free a port's allocated data - * @dd: the infinipath device - * @pd: the portdata structure - * - * free up any allocated data for a port - * This should not touch anything that would affect a simultaneous - * re-allocation of port data, because it is called after ipath_mutex - * is released (and can be called from reinit as well). - * It should never change any chip state, or global driver state. - * (The only exception to global state is freeing the port0 port0_skbs.) - */ -void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) -{ - if (!pd) - return; - - if (pd->port_rcvhdrq) { - ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p " - "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq, - (unsigned long) pd->port_rcvhdrq_size); - dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size, - pd->port_rcvhdrq, pd->port_rcvhdrq_phys); - pd->port_rcvhdrq = NULL; - if (pd->port_rcvhdrtail_kvaddr) { - dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - pd->port_rcvhdrtail_kvaddr, - pd->port_rcvhdrqtailaddr_phys); - pd->port_rcvhdrtail_kvaddr = NULL; - } - } - if (pd->port_port && pd->port_rcvegrbuf) { - unsigned e; - - for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { - void *base = pd->port_rcvegrbuf[e]; - size_t size = pd->port_rcvegrbuf_size; - - ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), " - "chunk %u/%u\n", base, - (unsigned long) size, - e, pd->port_rcvegrbuf_chunks); - dma_free_coherent(&dd->pcidev->dev, size, - base, pd->port_rcvegrbuf_phys[e]); - } - kfree(pd->port_rcvegrbuf); - pd->port_rcvegrbuf = NULL; - kfree(pd->port_rcvegrbuf_phys); - pd->port_rcvegrbuf_phys = NULL; - pd->port_rcvegrbuf_chunks = 0; - } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) { - unsigned e; - struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo; - - dd->ipath_port0_skbinfo = NULL; - ipath_cdbg(VERBOSE, "free closed port %d " - "ipath_port0_skbinfo @ %p\n", pd->port_port, - skbinfo); - for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++) - if (skbinfo[e].skb) { - pci_unmap_single(dd->pcidev, skbinfo[e].phys, - dd->ipath_ibmaxlen, - PCI_DMA_FROMDEVICE); - dev_kfree_skb(skbinfo[e].skb); - } - vfree(skbinfo); - } - kfree(pd->port_tid_pg_list); - vfree(pd->subport_uregbase); - vfree(pd->subport_rcvegrbuf); - vfree(pd->subport_rcvhdr_base); - kfree(pd); -} - -static int __init infinipath_init(void) -{ - int ret; - - if (ipath_debug & __IPATH_DBG) - printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); - - /* - * These must be called before the driver is registered with - * the PCI subsystem. - */ - idr_init(&unit_table); - - ret = pci_register_driver(&ipath_driver); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME - ": Unable to register driver: error %d\n", -ret); - goto bail_unit; - } - - ret = ipath_init_ipathfs(); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " - "ipathfs: error %d\n", -ret); - goto bail_pci; - } - - goto bail; - -bail_pci: - pci_unregister_driver(&ipath_driver); - -bail_unit: - idr_destroy(&unit_table); - -bail: - return ret; -} - -static void __exit infinipath_cleanup(void) -{ - ipath_exit_ipathfs(); - - ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); - pci_unregister_driver(&ipath_driver); - - idr_destroy(&unit_table); -} - -/** - * ipath_reset_device - reset the chip if possible - * @unit: the device to reset - * - * Whether or not reset is successful, we attempt to re-initialize the chip - * (that is, much like a driver unload/reload). We clear the INITTED flag - * so that the various entry points will fail until we reinitialize. For - * now, we only allow this if no user ports are open that use chip resources - */ -int ipath_reset_device(int unit) -{ - int ret, i; - struct ipath_devdata *dd = ipath_lookup(unit); - unsigned long flags; - - if (!dd) { - ret = -ENODEV; - goto bail; - } - - if (atomic_read(&dd->ipath_led_override_timer_active)) { - /* Need to stop LED timer, _then_ shut off LEDs */ - del_timer_sync(&dd->ipath_led_override_timer); - atomic_set(&dd->ipath_led_override_timer_active, 0); - } - - /* Shut off LEDs after we are sure timer is not running */ - dd->ipath_led_override = LED_OVER_BOTH_OFF; - dd->ipath_f_setextled(dd, 0, 0); - - dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit); - - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) { - dev_info(&dd->pcidev->dev, "Invalid unit number %u or " - "not initialized or not present\n", unit); - ret = -ENXIO; - goto bail; - } - - spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); - if (dd->ipath_pd) - for (i = 1; i < dd->ipath_cfgports; i++) { - if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) - continue; - spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); - ipath_dbg("unit %u port %d is in use " - "(PID %u cmd %s), can't reset\n", - unit, i, - pid_nr(dd->ipath_pd[i]->port_pid), - dd->ipath_pd[i]->port_comm); - ret = -EBUSY; - goto bail; - } - spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); - - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - teardown_sdma(dd); - - dd->ipath_flags &= ~IPATH_INITTED; - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); - ret = dd->ipath_f_reset(dd); - if (ret == 1) { - ipath_dbg("Reinitializing unit %u after reset attempt\n", - unit); - ret = ipath_init_chip(dd, 1); - } else - ret = -EAGAIN; - if (ret) - ipath_dev_err(dd, "Reinitialize unit %u after " - "reset failed with %d\n", unit, ret); - else - dev_info(&dd->pcidev->dev, "Reinitialized unit %u after " - "resetting\n", unit); - -bail: - return ret; -} - -/* - * send a signal to all the processes that have the driver open - * through the normal interfaces (i.e., everything other than diags - * interface). Returns number of signalled processes. - */ -static int ipath_signal_procs(struct ipath_devdata *dd, int sig) -{ - int i, sub, any = 0; - struct pid *pid; - unsigned long flags; - - if (!dd->ipath_pd) - return 0; - - spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); - for (i = 1; i < dd->ipath_cfgports; i++) { - if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) - continue; - pid = dd->ipath_pd[i]->port_pid; - if (!pid) - continue; - - dev_info(&dd->pcidev->dev, "context %d in use " - "(PID %u), sending signal %d\n", - i, pid_nr(pid), sig); - kill_pid(pid, sig, 1); - any++; - for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { - pid = dd->ipath_pd[i]->port_subpid[sub]; - if (!pid) - continue; - dev_info(&dd->pcidev->dev, "sub-context " - "%d:%d in use (PID %u), sending " - "signal %d\n", i, sub, pid_nr(pid), sig); - kill_pid(pid, sig, 1); - any++; - } - } - spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); - return any; -} - -static void ipath_hol_signal_down(struct ipath_devdata *dd) -{ - if (ipath_signal_procs(dd, SIGSTOP)) - ipath_dbg("Stopped some processes\n"); - ipath_cancel_sends(dd, 1); -} - - -static void ipath_hol_signal_up(struct ipath_devdata *dd) -{ - if (ipath_signal_procs(dd, SIGCONT)) - ipath_dbg("Continued some processes\n"); -} - -/* - * link is down, stop any users processes, and flush pending sends - * to prevent HoL blocking, then start the HoL timer that - * periodically continues, then stop procs, so they can detect - * link down if they want, and do something about it. - * Timer may already be running, so use mod_timer, not add_timer. - */ -void ipath_hol_down(struct ipath_devdata *dd) -{ - dd->ipath_hol_state = IPATH_HOL_DOWN; - ipath_hol_signal_down(dd); - dd->ipath_hol_next = IPATH_HOL_DOWNCONT; - dd->ipath_hol_timer.expires = jiffies + - msecs_to_jiffies(ipath_hol_timeout_ms); - mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); -} - -/* - * link is up, continue any user processes, and ensure timer - * is a nop, if running. Let timer keep running, if set; it - * will nop when it sees the link is up - */ -void ipath_hol_up(struct ipath_devdata *dd) -{ - ipath_hol_signal_up(dd); - dd->ipath_hol_state = IPATH_HOL_UP; -} - -/* - * toggle the running/not running state of user proceses - * to prevent HoL blocking on chip resources, but still allow - * user processes to do link down special case handling. - * Should only be called via the timer - */ -void ipath_hol_event(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *)opaque; - - if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP - && dd->ipath_hol_state != IPATH_HOL_UP) { - dd->ipath_hol_next = IPATH_HOL_DOWNCONT; - ipath_dbg("Stopping processes\n"); - ipath_hol_signal_down(dd); - } else { /* may do "extra" if also in ipath_hol_up() */ - dd->ipath_hol_next = IPATH_HOL_DOWNSTOP; - ipath_dbg("Continuing processes\n"); - ipath_hol_signal_up(dd); - } - if (dd->ipath_hol_state == IPATH_HOL_UP) - ipath_dbg("link's up, don't resched timer\n"); - else { - dd->ipath_hol_timer.expires = jiffies + - msecs_to_jiffies(ipath_hol_timeout_ms); - mod_timer(&dd->ipath_hol_timer, - dd->ipath_hol_timer.expires); - } -} - -int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) -{ - u64 val; - - if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK) - return -1; - if (dd->ipath_rx_pol_inv != new_pol_inv) { - dd->ipath_rx_pol_inv = new_pol_inv; - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - val &= ~(INFINIPATH_XGXS_RX_POL_MASK << - INFINIPATH_XGXS_RX_POL_SHIFT); - val |= ((u64)dd->ipath_rx_pol_inv) << - INFINIPATH_XGXS_RX_POL_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); - } - return 0; -} - -/* - * Disable and enable the armlaunch error. Used for PIO bandwidth testing on - * the 7220, which is count-based, rather than trigger-based. Safe for the - * driver check, since it's at init. Not completely safe when used for - * user-mode checking, since some error checking can be lost, but not - * particularly risky, and only has problematic side-effects in the face of - * very buggy user code. There is no reference counting, but that's also - * fine, given the intended use. - */ -void ipath_enable_armlaunch(struct ipath_devdata *dd) -{ - dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH; - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, - INFINIPATH_E_SPIOARMLAUNCH); - dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH; - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); -} - -void ipath_disable_armlaunch(struct ipath_devdata *dd) -{ - /* so don't re-enable if already set */ - dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH; - dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH; - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); -} - -module_init(infinipath_init); -module_exit(infinipath_cleanup); diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c deleted file mode 100644 index fc7181985..000000000 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ /dev/null @@ -1,1183 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/delay.h> -#include <linux/pci.h> -#include <linux/vmalloc.h> - -#include "ipath_kernel.h" - -/* - * InfiniPath I2C driver for a serial eeprom. This is not a generic - * I2C interface. For a start, the device we're using (Atmel AT24C11) - * doesn't work like a regular I2C device. It looks like one - * electrically, but not logically. Normal I2C devices have a single - * 7-bit or 10-bit I2C address that they respond to. Valid 7-bit - * addresses range from 0x03 to 0x77. Addresses 0x00 to 0x02 and 0x78 - * to 0x7F are special reserved addresses (e.g. 0x00 is the "general - * call" address.) The Atmel device, on the other hand, responds to ALL - * 7-bit addresses. It's designed to be the only device on a given I2C - * bus. A 7-bit address corresponds to the memory address within the - * Atmel device itself. - * - * Also, the timing requirements mean more than simple software - * bitbanging, with readbacks from chip to ensure timing (simple udelay - * is not enough). - * - * This all means that accessing the device is specialized enough - * that using the standard kernel I2C bitbanging interface would be - * impossible. For example, the core I2C eeprom driver expects to find - * a device at one or more of a limited set of addresses only. It doesn't - * allow writing to an eeprom. It also doesn't provide any means of - * accessing eeprom contents from within the kernel, only via sysfs. - */ - -/* Added functionality for IBA7220-based cards */ -#define IPATH_EEPROM_DEV_V1 0xA0 -#define IPATH_EEPROM_DEV_V2 0xA2 -#define IPATH_TEMP_DEV 0x98 -#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2) -#define IPATH_NO_DEV (0xFF) - -/* - * The number of I2C chains is proliferating. Table below brings - * some order to the madness. The basic principle is that the - * table is scanned from the top, and a "probe" is made to the - * device probe_dev. If that succeeds, the chain is considered - * to be of that type, and dd->i2c_chain_type is set to the index+1 - * of the entry. - * The +1 is so static initialization can mean "unknown, do probe." - */ -static struct i2c_chain_desc { - u8 probe_dev; /* If seen at probe, chain is this type */ - u8 eeprom_dev; /* Dev addr (if any) for EEPROM */ - u8 temp_dev; /* Dev Addr (if any) for Temp-sense */ -} i2c_chains[] = { - { IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */ - { IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */ - { IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */ - { IPATH_NO_DEV } -}; - -enum i2c_type { - i2c_line_scl = 0, - i2c_line_sda -}; - -enum i2c_state { - i2c_line_low = 0, - i2c_line_high -}; - -#define READ_CMD 1 -#define WRITE_CMD 0 - -/** - * i2c_gpio_set - set a GPIO line - * @dd: the infinipath device - * @line: the line to set - * @new_line_state: the state to set - * - * Returns 0 if the line was set to the new state successfully, non-zero - * on error. - */ -static int i2c_gpio_set(struct ipath_devdata *dd, - enum i2c_type line, - enum i2c_state new_line_state) -{ - u64 out_mask, dir_mask, *gpioval; - unsigned long flags = 0; - - gpioval = &dd->ipath_gpio_out; - - if (line == i2c_line_scl) { - dir_mask = dd->ipath_gpio_scl; - out_mask = (1UL << dd->ipath_gpio_scl_num); - } else { - dir_mask = dd->ipath_gpio_sda; - out_mask = (1UL << dd->ipath_gpio_sda_num); - } - - spin_lock_irqsave(&dd->ipath_gpio_lock, flags); - if (new_line_state == i2c_line_high) { - /* tri-state the output rather than force high */ - dd->ipath_extctrl &= ~dir_mask; - } else { - /* config line to be an output */ - dd->ipath_extctrl |= dir_mask; - } - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl); - - /* set output as well (no real verify) */ - if (new_line_state == i2c_line_high) - *gpioval |= out_mask; - else - *gpioval &= ~out_mask; - - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval); - spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); - - return 0; -} - -/** - * i2c_gpio_get - get a GPIO line state - * @dd: the infinipath device - * @line: the line to get - * @curr_statep: where to put the line state - * - * Returns 0 if the line was set to the new state successfully, non-zero - * on error. curr_state is not set on error. - */ -static int i2c_gpio_get(struct ipath_devdata *dd, - enum i2c_type line, - enum i2c_state *curr_statep) -{ - u64 read_val, mask; - int ret; - unsigned long flags = 0; - - /* check args */ - if (curr_statep == NULL) { - ret = 1; - goto bail; - } - - /* config line to be an input */ - if (line == i2c_line_scl) - mask = dd->ipath_gpio_scl; - else - mask = dd->ipath_gpio_sda; - - spin_lock_irqsave(&dd->ipath_gpio_lock, flags); - dd->ipath_extctrl &= ~mask; - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl); - /* - * Below is very unlikely to reflect true input state if Output - * Enable actually changed. - */ - read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); - spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); - - if (read_val & mask) - *curr_statep = i2c_line_high; - else - *curr_statep = i2c_line_low; - - ret = 0; - -bail: - return ret; -} - -/** - * i2c_wait_for_writes - wait for a write - * @dd: the infinipath device - * - * We use this instead of udelay directly, so we can make sure - * that previous register writes have been flushed all the way - * to the chip. Since we are delaying anyway, the cost doesn't - * hurt, and makes the bit twiddling more regular - */ -static void i2c_wait_for_writes(struct ipath_devdata *dd) -{ - (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - rmb(); -} - -static void scl_out(struct ipath_devdata *dd, u8 bit) -{ - udelay(1); - i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low); - - i2c_wait_for_writes(dd); -} - -static void sda_out(struct ipath_devdata *dd, u8 bit) -{ - i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low); - - i2c_wait_for_writes(dd); -} - -static u8 sda_in(struct ipath_devdata *dd, int wait) -{ - enum i2c_state bit; - - if (i2c_gpio_get(dd, i2c_line_sda, &bit)) - ipath_dbg("get bit failed!\n"); - - if (wait) - i2c_wait_for_writes(dd); - - return bit == i2c_line_high ? 1U : 0; -} - -/** - * i2c_ackrcv - see if ack following write is true - * @dd: the infinipath device - */ -static int i2c_ackrcv(struct ipath_devdata *dd) -{ - u8 ack_received; - - /* AT ENTRY SCL = LOW */ - /* change direction, ignore data */ - ack_received = sda_in(dd, 1); - scl_out(dd, i2c_line_high); - ack_received = sda_in(dd, 1) == 0; - scl_out(dd, i2c_line_low); - return ack_received; -} - -/** - * rd_byte - read a byte, leaving ACK, STOP, etc up to caller - * @dd: the infinipath device - * - * Returns byte shifted out of device - */ -static int rd_byte(struct ipath_devdata *dd) -{ - int bit_cntr, data; - - data = 0; - - for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) { - data <<= 1; - scl_out(dd, i2c_line_high); - data |= sda_in(dd, 0); - scl_out(dd, i2c_line_low); - } - return data; -} - -/** - * wr_byte - write a byte, one bit at a time - * @dd: the infinipath device - * @data: the byte to write - * - * Returns 0 if we got the following ack, otherwise 1 - */ -static int wr_byte(struct ipath_devdata *dd, u8 data) -{ - int bit_cntr; - u8 bit; - - for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) { - bit = (data >> bit_cntr) & 1; - sda_out(dd, bit); - scl_out(dd, i2c_line_high); - scl_out(dd, i2c_line_low); - } - return (!i2c_ackrcv(dd)) ? 1 : 0; -} - -static void send_ack(struct ipath_devdata *dd) -{ - sda_out(dd, i2c_line_low); - scl_out(dd, i2c_line_high); - scl_out(dd, i2c_line_low); - sda_out(dd, i2c_line_high); -} - -/** - * i2c_startcmd - transmit the start condition, followed by address/cmd - * @dd: the infinipath device - * @offset_dir: direction byte - * - * (both clock/data high, clock high, data low while clock is high) - */ -static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir) -{ - int res; - - /* issue start sequence */ - sda_out(dd, i2c_line_high); - scl_out(dd, i2c_line_high); - sda_out(dd, i2c_line_low); - scl_out(dd, i2c_line_low); - - /* issue length and direction byte */ - res = wr_byte(dd, offset_dir); - - if (res) - ipath_cdbg(VERBOSE, "No ack to complete start\n"); - - return res; -} - -/** - * stop_cmd - transmit the stop condition - * @dd: the infinipath device - * - * (both clock/data low, clock high, data high while clock is high) - */ -static void stop_cmd(struct ipath_devdata *dd) -{ - scl_out(dd, i2c_line_low); - sda_out(dd, i2c_line_low); - scl_out(dd, i2c_line_high); - sda_out(dd, i2c_line_high); - udelay(2); -} - -/** - * eeprom_reset - reset I2C communication - * @dd: the infinipath device - */ - -static int eeprom_reset(struct ipath_devdata *dd) -{ - int clock_cycles_left = 9; - u64 *gpioval = &dd->ipath_gpio_out; - int ret; - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_gpio_lock, flags); - /* Make sure shadows are consistent */ - dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); - *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out); - spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); - - ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg " - "is %llx\n", (unsigned long long) *gpioval); - - /* - * This is to get the i2c into a known state, by first going low, - * then tristate sda (and then tristate scl as first thing - * in loop) - */ - scl_out(dd, i2c_line_low); - sda_out(dd, i2c_line_high); - - /* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */ - while (clock_cycles_left--) { - scl_out(dd, i2c_line_high); - - /* SDA seen high, issue START by dropping it while SCL high */ - if (sda_in(dd, 0)) { - sda_out(dd, i2c_line_low); - scl_out(dd, i2c_line_low); - /* ATMEL spec says must be followed by STOP. */ - scl_out(dd, i2c_line_high); - sda_out(dd, i2c_line_high); - ret = 0; - goto bail; - } - - scl_out(dd, i2c_line_low); - } - - ret = 1; - -bail: - return ret; -} - -/* - * Probe for I2C device at specified address. Returns 0 for "success" - * to match rest of this file. - * Leave bus in "reasonable" state for further commands. - */ -static int i2c_probe(struct ipath_devdata *dd, int devaddr) -{ - int ret = 0; - - ret = eeprom_reset(dd); - if (ret) { - ipath_dev_err(dd, "Failed reset probing device 0x%02X\n", - devaddr); - return ret; - } - /* - * Reset no longer leaves bus in start condition, so normal - * i2c_startcmd() will do. - */ - ret = i2c_startcmd(dd, devaddr | READ_CMD); - if (ret) - ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n", - devaddr); - else { - /* - * Device did respond. Complete a single-byte read, because some - * devices apparently cannot handle STOP immediately after they - * ACK the start-cmd. - */ - int data; - data = rd_byte(dd); - stop_cmd(dd); - ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr); - } - return ret; -} - -/* - * Returns the "i2c type". This is a pointer to a struct that describes - * the I2C chain on this board. To minimize impact on struct ipath_devdata, - * the (small integer) index into the table is actually memoized, rather - * then the pointer. - * Memoization is because the type is determined on the first call per chip. - * An alternative would be to move type determination to early - * init code. - */ -static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd) -{ - int idx; - - /* Get memoized index, from previous successful probes */ - idx = dd->ipath_i2c_chain_type - 1; - if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1)) - goto done; - - idx = 0; - while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) { - /* if probe succeeds, this is type */ - if (!i2c_probe(dd, i2c_chains[idx].probe_dev)) - break; - ++idx; - } - - /* - * Old EEPROM (first entry) may require a reset after probe, - * rather than being able to "start" after "stop" - */ - if (idx == 0) - eeprom_reset(dd); - - if (i2c_chains[idx].probe_dev == IPATH_NO_DEV) - idx = -1; - else - dd->ipath_i2c_chain_type = idx + 1; -done: - return (idx >= 0) ? i2c_chains + idx : NULL; -} - -static int ipath_eeprom_internal_read(struct ipath_devdata *dd, - u8 eeprom_offset, void *buffer, int len) -{ - int ret; - struct i2c_chain_desc *icd; - u8 *bp = buffer; - - ret = 1; - icd = ipath_i2c_type(dd); - if (!icd) - goto bail; - - if (icd->eeprom_dev == IPATH_NO_DEV) { - /* legacy not-really-I2C */ - ipath_cdbg(VERBOSE, "Start command only address\n"); - eeprom_offset = (eeprom_offset << 1) | READ_CMD; - ret = i2c_startcmd(dd, eeprom_offset); - } else { - /* Actual I2C */ - ipath_cdbg(VERBOSE, "Start command uses devaddr\n"); - if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) { - ipath_dbg("Failed EEPROM startcmd\n"); - stop_cmd(dd); - ret = 1; - goto bail; - } - ret = wr_byte(dd, eeprom_offset); - stop_cmd(dd); - if (ret) { - ipath_dev_err(dd, "Failed to write EEPROM address\n"); - ret = 1; - goto bail; - } - ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD); - } - if (ret) { - ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev); - stop_cmd(dd); - ret = 1; - goto bail; - } - - /* - * eeprom keeps clocking data out as long as we ack, automatically - * incrementing the address. - */ - while (len-- > 0) { - /* get and store data */ - *bp++ = rd_byte(dd); - /* send ack if not the last byte */ - if (len) - send_ack(dd); - } - - stop_cmd(dd); - - ret = 0; - -bail: - return ret; -} - -static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, - const void *buffer, int len) -{ - int sub_len; - const u8 *bp = buffer; - int max_wait_time, i; - int ret; - struct i2c_chain_desc *icd; - - ret = 1; - icd = ipath_i2c_type(dd); - if (!icd) - goto bail; - - while (len > 0) { - if (icd->eeprom_dev == IPATH_NO_DEV) { - if (i2c_startcmd(dd, - (eeprom_offset << 1) | WRITE_CMD)) { - ipath_dbg("Failed to start cmd offset %u\n", - eeprom_offset); - goto failed_write; - } - } else { - /* Real I2C */ - if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) { - ipath_dbg("Failed EEPROM startcmd\n"); - goto failed_write; - } - ret = wr_byte(dd, eeprom_offset); - if (ret) { - ipath_dev_err(dd, "Failed to write EEPROM " - "address\n"); - goto failed_write; - } - } - - sub_len = min(len, 4); - eeprom_offset += sub_len; - len -= sub_len; - - for (i = 0; i < sub_len; i++) { - if (wr_byte(dd, *bp++)) { - ipath_dbg("no ack after byte %u/%u (%u " - "total remain)\n", i, sub_len, - len + sub_len - i); - goto failed_write; - } - } - - stop_cmd(dd); - - /* - * wait for write complete by waiting for a successful - * read (the chip replies with a zero after the write - * cmd completes, and before it writes to the eeprom. - * The startcmd for the read will fail the ack until - * the writes have completed. We do this inline to avoid - * the debug prints that are in the real read routine - * if the startcmd fails. - * We also use the proper device address, so it doesn't matter - * whether we have real eeprom_dev. legacy likes any address. - */ - max_wait_time = 100; - while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) { - stop_cmd(dd); - if (!--max_wait_time) { - ipath_dbg("Did not get successful read to " - "complete write\n"); - goto failed_write; - } - } - /* now read (and ignore) the resulting byte */ - rd_byte(dd); - stop_cmd(dd); - } - - ret = 0; - goto bail; - -failed_write: - stop_cmd(dd); - ret = 1; - -bail: - return ret; -} - -/** - * ipath_eeprom_read - receives bytes from the eeprom via I2C - * @dd: the infinipath device - * @eeprom_offset: address to read from - * @buffer: where to store result - * @len: number of bytes to receive - */ -int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, - void *buff, int len) -{ - int ret; - - ret = mutex_lock_interruptible(&dd->ipath_eep_lock); - if (!ret) { - ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len); - mutex_unlock(&dd->ipath_eep_lock); - } - - return ret; -} - -/** - * ipath_eeprom_write - writes data to the eeprom via I2C - * @dd: the infinipath device - * @eeprom_offset: where to place data - * @buffer: data to write - * @len: number of bytes to write - */ -int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, - const void *buff, int len) -{ - int ret; - - ret = mutex_lock_interruptible(&dd->ipath_eep_lock); - if (!ret) { - ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len); - mutex_unlock(&dd->ipath_eep_lock); - } - - return ret; -} - -static u8 flash_csum(struct ipath_flash *ifp, int adjust) -{ - u8 *ip = (u8 *) ifp; - u8 csum = 0, len; - - /* - * Limit length checksummed to max length of actual data. - * Checksum of erased eeprom will still be bad, but we avoid - * reading past the end of the buffer we were passed. - */ - len = ifp->if_length; - if (len > sizeof(struct ipath_flash)) - len = sizeof(struct ipath_flash); - while (len--) - csum += *ip++; - csum -= ifp->if_csum; - csum = ~csum; - if (adjust) - ifp->if_csum = csum; - - return csum; -} - -/** - * ipath_get_guid - get the GUID from the i2c device - * @dd: the infinipath device - * - * We have the capability to use the ipath_nguid field, and get - * the guid from the first chip's flash, to use for all of them. - */ -void ipath_get_eeprom_info(struct ipath_devdata *dd) -{ - void *buf; - struct ipath_flash *ifp; - __be64 guid; - int len, eep_stat; - u8 csum, *bguid; - int t = dd->ipath_unit; - struct ipath_devdata *dd0 = ipath_lookup(0); - - if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) { - u8 oguid; - dd->ipath_guid = dd0->ipath_guid; - bguid = (u8 *) & dd->ipath_guid; - - oguid = bguid[7]; - bguid[7] += t; - if (oguid > bguid[7]) { - if (bguid[6] == 0xff) { - if (bguid[5] == 0xff) { - ipath_dev_err( - dd, - "Can't set %s GUID from " - "base, wraps to OUI!\n", - ipath_get_unit_name(t)); - dd->ipath_guid = 0; - goto bail; - } - bguid[5]++; - } - bguid[6]++; - } - dd->ipath_nguid = 1; - - ipath_dbg("nguid %u, so adding %u to device 0 guid, " - "for %llx\n", - dd0->ipath_nguid, t, - (unsigned long long) be64_to_cpu(dd->ipath_guid)); - goto bail; - } - - /* - * read full flash, not just currently used part, since it may have - * been written with a newer definition - * */ - len = sizeof(struct ipath_flash); - buf = vmalloc(len); - if (!buf) { - ipath_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for GUID\n", len); - goto bail; - } - - mutex_lock(&dd->ipath_eep_lock); - eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len); - mutex_unlock(&dd->ipath_eep_lock); - - if (eep_stat) { - ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); - goto done; - } - ifp = (struct ipath_flash *)buf; - - csum = flash_csum(ifp, 0); - if (csum != ifp->if_csum) { - dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: " - "0x%x, not 0x%x\n", csum, ifp->if_csum); - goto done; - } - if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || - *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { - ipath_dev_err(dd, "Invalid GUID %llx from flash; " - "ignoring\n", - *(unsigned long long *) ifp->if_guid); - /* don't allow GUID if all 0 or all 1's */ - goto done; - } - - /* complain, but allow it */ - if (*(u64 *) ifp->if_guid == 0x100007511000000ULL) - dev_info(&dd->pcidev->dev, "Warning, GUID %llx is " - "default, probably not correct!\n", - *(unsigned long long *) ifp->if_guid); - - bguid = ifp->if_guid; - if (!bguid[0] && !bguid[1] && !bguid[2]) { - /* original incorrect GUID format in flash; fix in - * core copy, by shifting up 2 octets; don't need to - * change top octet, since both it and shifted are - * 0.. */ - bguid[1] = bguid[3]; - bguid[2] = bguid[4]; - bguid[3] = bguid[4] = 0; - guid = *(__be64 *) ifp->if_guid; - ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, " - "shifting 2 octets\n"); - } else - guid = *(__be64 *) ifp->if_guid; - dd->ipath_guid = guid; - dd->ipath_nguid = ifp->if_numguid; - /* - * Things are slightly complicated by the desire to transparently - * support both the Pathscale 10-digit serial number and the QLogic - * 13-character version. - */ - if ((ifp->if_fversion > 1) && ifp->if_sprefix[0] - && ((u8 *)ifp->if_sprefix)[0] != 0xFF) { - /* This board has a Serial-prefix, which is stored - * elsewhere for backward-compatibility. - */ - char *snp = dd->ipath_serial; - memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix); - snp[sizeof ifp->if_sprefix] = '\0'; - len = strlen(snp); - snp += len; - len = (sizeof dd->ipath_serial) - len; - if (len > sizeof ifp->if_serial) { - len = sizeof ifp->if_serial; - } - memcpy(snp, ifp->if_serial, len); - } else - memcpy(dd->ipath_serial, ifp->if_serial, - sizeof ifp->if_serial); - if (!strstr(ifp->if_comment, "Tested successfully")) - ipath_dev_err(dd, "Board SN %s did not pass functional " - "test: %s\n", dd->ipath_serial, - ifp->if_comment); - - ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", - (unsigned long long) be64_to_cpu(dd->ipath_guid)); - - memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT); - /* - * Power-on (actually "active") hours are kept as little-endian value - * in EEPROM, but as seconds in a (possibly as small as 24-bit) - * atomic_t while running. - */ - atomic_set(&dd->ipath_active_time, 0); - dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8); - -done: - vfree(buf); - -bail:; -} - -/** - * ipath_update_eeprom_log - copy active-time and error counters to eeprom - * @dd: the infinipath device - * - * Although the time is kept as seconds in the ipath_devdata struct, it is - * rounded to hours for re-write, as we have only 16 bits in EEPROM. - * First-cut code reads whole (expected) struct ipath_flash, modifies, - * re-writes. Future direction: read/write only what we need, assuming - * that the EEPROM had to have been "good enough" for driver init, and - * if not, we aren't making it worse. - * - */ - -int ipath_update_eeprom_log(struct ipath_devdata *dd) -{ - void *buf; - struct ipath_flash *ifp; - int len, hi_water; - uint32_t new_time, new_hrs; - u8 csum; - int ret, idx; - unsigned long flags; - - /* first, check if we actually need to do anything. */ - ret = 0; - for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { - if (dd->ipath_eep_st_new_errs[idx]) { - ret = 1; - break; - } - } - new_time = atomic_read(&dd->ipath_active_time); - - if (ret == 0 && new_time < 3600) - return 0; - - /* - * The quick-check above determined that there is something worthy - * of logging, so get current contents and do a more detailed idea. - * read full flash, not just currently used part, since it may have - * been written with a newer definition - */ - len = sizeof(struct ipath_flash); - buf = vmalloc(len); - ret = 1; - if (!buf) { - ipath_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for logging\n", len); - goto bail; - } - - /* Grab semaphore and read current EEPROM. If we get an - * error, let go, but if not, keep it until we finish write. - */ - ret = mutex_lock_interruptible(&dd->ipath_eep_lock); - if (ret) { - ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n"); - goto free_bail; - } - ret = ipath_eeprom_internal_read(dd, 0, buf, len); - if (ret) { - mutex_unlock(&dd->ipath_eep_lock); - ipath_dev_err(dd, "Unable read EEPROM for logging\n"); - goto free_bail; - } - ifp = (struct ipath_flash *)buf; - - csum = flash_csum(ifp, 0); - if (csum != ifp->if_csum) { - mutex_unlock(&dd->ipath_eep_lock); - ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", - csum, ifp->if_csum); - ret = 1; - goto free_bail; - } - hi_water = 0; - spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); - for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { - int new_val = dd->ipath_eep_st_new_errs[idx]; - if (new_val) { - /* - * If we have seen any errors, add to EEPROM values - * We need to saturate at 0xFF (255) and we also - * would need to adjust the checksum if we were - * trying to minimize EEPROM traffic - * Note that we add to actual current count in EEPROM, - * in case it was altered while we were running. - */ - new_val += ifp->if_errcntp[idx]; - if (new_val > 0xFF) - new_val = 0xFF; - if (ifp->if_errcntp[idx] != new_val) { - ifp->if_errcntp[idx] = new_val; - hi_water = offsetof(struct ipath_flash, - if_errcntp) + idx; - } - /* - * update our shadow (used to minimize EEPROM - * traffic), to match what we are about to write. - */ - dd->ipath_eep_st_errs[idx] = new_val; - dd->ipath_eep_st_new_errs[idx] = 0; - } - } - /* - * now update active-time. We would like to round to the nearest hour - * but unless atomic_t are sure to be proper signed ints we cannot, - * because we need to account for what we "transfer" to EEPROM and - * if we log an hour at 31 minutes, then we would need to set - * active_time to -29 to accurately count the _next_ hour. - */ - if (new_time >= 3600) { - new_hrs = new_time / 3600; - atomic_sub((new_hrs * 3600), &dd->ipath_active_time); - new_hrs += dd->ipath_eep_hrs; - if (new_hrs > 0xFFFF) - new_hrs = 0xFFFF; - dd->ipath_eep_hrs = new_hrs; - if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) { - ifp->if_powerhour[0] = new_hrs & 0xFF; - hi_water = offsetof(struct ipath_flash, if_powerhour); - } - if ((new_hrs >> 8) != ifp->if_powerhour[1]) { - ifp->if_powerhour[1] = new_hrs >> 8; - hi_water = offsetof(struct ipath_flash, if_powerhour) - + 1; - } - } - /* - * There is a tiny possibility that we could somehow fail to write - * the EEPROM after updating our shadows, but problems from holding - * the spinlock too long are a much bigger issue. - */ - spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); - if (hi_water) { - /* we made some change to the data, uopdate cksum and write */ - csum = flash_csum(ifp, 1); - ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1); - } - mutex_unlock(&dd->ipath_eep_lock); - if (ret) - ipath_dev_err(dd, "Failed updating EEPROM\n"); - -free_bail: - vfree(buf); -bail: - return ret; - -} - -/** - * ipath_inc_eeprom_err - increment one of the four error counters - * that are logged to EEPROM. - * @dd: the infinipath device - * @eidx: 0..3, the counter to increment - * @incr: how much to add - * - * Each counter is 8-bits, and saturates at 255 (0xFF). They - * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log() - * is called, but it can only be called in a context that allows sleep. - * This function can be called even at interrupt level. - */ - -void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr) -{ - uint new_val; - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); - new_val = dd->ipath_eep_st_new_errs[eidx] + incr; - if (new_val > 255) - new_val = 255; - dd->ipath_eep_st_new_errs[eidx] = new_val; - spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); - return; -} - -static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum) -{ - int ret; - struct i2c_chain_desc *icd; - - ret = -ENOENT; - - icd = ipath_i2c_type(dd); - if (!icd) - goto bail; - - if (icd->temp_dev == IPATH_NO_DEV) { - /* tempsense only exists on new, real-I2C boards */ - ret = -ENXIO; - goto bail; - } - - if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) { - ipath_dbg("Failed tempsense startcmd\n"); - stop_cmd(dd); - ret = -ENXIO; - goto bail; - } - ret = wr_byte(dd, regnum); - stop_cmd(dd); - if (ret) { - ipath_dev_err(dd, "Failed tempsense WR command %02X\n", - regnum); - ret = -ENXIO; - goto bail; - } - if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) { - ipath_dbg("Failed tempsense RD startcmd\n"); - stop_cmd(dd); - ret = -ENXIO; - goto bail; - } - /* - * We can only clock out one byte per command, sensibly - */ - ret = rd_byte(dd); - stop_cmd(dd); - -bail: - return ret; -} - -#define VALID_TS_RD_REG_MASK 0xBF - -/** - * ipath_tempsense_read - read register of temp sensor via I2C - * @dd: the infinipath device - * @regnum: register to read from - * - * returns reg contents (0..255) or < 0 for error - */ -int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum) -{ - int ret; - - if (regnum > 7) - return -EINVAL; - - /* return a bogus value for (the one) register we do not have */ - if (!((1 << regnum) & VALID_TS_RD_REG_MASK)) - return 0; - - ret = mutex_lock_interruptible(&dd->ipath_eep_lock); - if (!ret) { - ret = ipath_tempsense_internal_read(dd, regnum); - mutex_unlock(&dd->ipath_eep_lock); - } - - /* - * There are three possibilities here: - * ret is actual value (0..255) - * ret is -ENXIO or -EINVAL from code in this file - * ret is -EINTR from mutex_lock_interruptible. - */ - return ret; -} - -static int ipath_tempsense_internal_write(struct ipath_devdata *dd, - u8 regnum, u8 data) -{ - int ret = -ENOENT; - struct i2c_chain_desc *icd; - - icd = ipath_i2c_type(dd); - if (!icd) - goto bail; - - if (icd->temp_dev == IPATH_NO_DEV) { - /* tempsense only exists on new, real-I2C boards */ - ret = -ENXIO; - goto bail; - } - if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) { - ipath_dbg("Failed tempsense startcmd\n"); - stop_cmd(dd); - ret = -ENXIO; - goto bail; - } - ret = wr_byte(dd, regnum); - if (ret) { - stop_cmd(dd); - ipath_dev_err(dd, "Failed to write tempsense command %02X\n", - regnum); - ret = -ENXIO; - goto bail; - } - ret = wr_byte(dd, data); - stop_cmd(dd); - ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD); - if (ret) { - ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n", - regnum); - ret = -ENXIO; - } - -bail: - return ret; -} - -#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD)) - -/** - * ipath_tempsense_write - write register of temp sensor via I2C - * @dd: the infinipath device - * @regnum: register to write - * @data: data to write - * - * returns 0 for success or < 0 for error - */ -int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data) -{ - int ret; - - if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK)) - return -EINVAL; - - ret = mutex_lock_interruptible(&dd->ipath_eep_lock); - if (!ret) { - ret = ipath_tempsense_internal_write(dd, regnum, data); - mutex_unlock(&dd->ipath_eep_lock); - } - - /* - * There are three possibilities here: - * ret is 0 for success - * ret is -ENXIO or -EINVAL from code in this file - * ret is -EINTR from mutex_lock_interruptible. - */ - return ret; -} diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c deleted file mode 100644 index 450d15965..000000000 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ /dev/null @@ -1,2620 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/pci.h> -#include <linux/poll.h> -#include <linux/cdev.h> -#include <linux/swap.h> -#include <linux/export.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> -#include <linux/highmem.h> -#include <linux/io.h> -#include <linux/jiffies.h> -#include <linux/cpu.h> -#include <linux/uio.h> -#include <asm/pgtable.h> - -#include "ipath_kernel.h" -#include "ipath_common.h" -#include "ipath_user_sdma.h" - -static int ipath_open(struct inode *, struct file *); -static int ipath_close(struct inode *, struct file *); -static ssize_t ipath_write(struct file *, const char __user *, size_t, - loff_t *); -static ssize_t ipath_write_iter(struct kiocb *, struct iov_iter *from); -static unsigned int ipath_poll(struct file *, struct poll_table_struct *); -static int ipath_mmap(struct file *, struct vm_area_struct *); - -/* - * This is really, really weird shit - write() and writev() here - * have completely unrelated semantics. Sucky userland ABI, - * film at 11. - */ -static const struct file_operations ipath_file_ops = { - .owner = THIS_MODULE, - .write = ipath_write, - .write_iter = ipath_write_iter, - .open = ipath_open, - .release = ipath_close, - .poll = ipath_poll, - .mmap = ipath_mmap, - .llseek = noop_llseek, -}; - -/* - * Convert kernel virtual addresses to physical addresses so they don't - * potentially conflict with the chip addresses used as mmap offsets. - * It doesn't really matter what mmap offset we use as long as we can - * interpret it correctly. - */ -static u64 cvt_kvaddr(void *p) -{ - struct page *page; - u64 paddr = 0; - - page = vmalloc_to_page(p); - if (page) - paddr = page_to_pfn(page) << PAGE_SHIFT; - - return paddr; -} - -static int ipath_get_base_info(struct file *fp, - void __user *ubase, size_t ubase_size) -{ - struct ipath_portdata *pd = port_fp(fp); - int ret = 0; - struct ipath_base_info *kinfo = NULL; - struct ipath_devdata *dd = pd->port_dd; - unsigned subport_cnt; - int shared, master; - size_t sz; - - subport_cnt = pd->port_subport_cnt; - if (!subport_cnt) { - shared = 0; - master = 0; - subport_cnt = 1; - } else { - shared = 1; - master = !subport_fp(fp); - } - - sz = sizeof(*kinfo); - /* If port sharing is not requested, allow the old size structure */ - if (!shared) - sz -= 7 * sizeof(u64); - if (ubase_size < sz) { - ipath_cdbg(PROC, - "Base size %zu, need %zu (version mismatch?)\n", - ubase_size, sz); - ret = -EINVAL; - goto bail; - } - - kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL); - if (kinfo == NULL) { - ret = -ENOMEM; - goto bail; - } - - ret = dd->ipath_f_get_base_info(pd, kinfo); - if (ret < 0) - goto bail; - - kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt; - kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize; - kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt; - kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize; - /* - * have to mmap whole thing - */ - kinfo->spi_rcv_egrbuftotlen = - pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; - kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk; - kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / - pd->port_rcvegrbuf_chunks; - kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt; - if (master) - kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt; - /* - * for this use, may be ipath_cfgports summed over all chips that - * are are configured and present - */ - kinfo->spi_nports = dd->ipath_cfgports; - /* unit (chip/board) our port is on */ - kinfo->spi_unit = dd->ipath_unit; - /* for now, only a single page */ - kinfo->spi_tid_maxsize = PAGE_SIZE; - - /* - * Doing this per port, and based on the skip value, etc. This has - * to be the actual buffer size, since the protocol code treats it - * as an array. - * - * These have to be set to user addresses in the user code via mmap. - * These values are used on return to user code for the mmap target - * addresses only. For 32 bit, same 44 bit address problem, so use - * the physical address, not virtual. Before 2.6.11, using the - * page_address() macro worked, but in 2.6.11, even that returns the - * full 64 bit address (upper bits all 1's). So far, using the - * physical addresses (or chip offsets, for chip mapping) works, but - * no doubt some future kernel release will change that, and we'll be - * on to yet another method of dealing with this. - */ - kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; - kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys; - kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; - kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; - kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + - (void *) dd->ipath_statusp - - (void *) dd->ipath_pioavailregs_dma; - if (!shared) { - kinfo->spi_piocnt = pd->port_piocnt; - kinfo->spi_piobufbase = (u64) pd->port_piobufs; - kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + - dd->ipath_ureg_align * pd->port_port; - } else if (master) { - kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) + - (pd->port_piocnt % subport_cnt); - /* Master's PIO buffers are after all the slave's */ - kinfo->spi_piobufbase = (u64) pd->port_piobufs + - dd->ipath_palign * - (pd->port_piocnt - kinfo->spi_piocnt); - } else { - unsigned slave = subport_fp(fp) - 1; - - kinfo->spi_piocnt = pd->port_piocnt / subport_cnt; - kinfo->spi_piobufbase = (u64) pd->port_piobufs + - dd->ipath_palign * kinfo->spi_piocnt * slave; - } - - if (shared) { - kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + - dd->ipath_ureg_align * pd->port_port; - kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs; - kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base; - kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr; - - kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase + - PAGE_SIZE * subport_fp(fp)); - - kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * subport_fp(fp)); - kinfo->spi_rcvhdr_tailaddr = 0; - kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf + - pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * - subport_fp(fp)); - - kinfo->spi_subport_uregbase = - cvt_kvaddr(pd->subport_uregbase); - kinfo->spi_subport_rcvegrbuf = - cvt_kvaddr(pd->subport_rcvegrbuf); - kinfo->spi_subport_rcvhdr_base = - cvt_kvaddr(pd->subport_rcvhdr_base); - ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", - kinfo->spi_port, kinfo->spi_runtime_flags, - (unsigned long long) kinfo->spi_subport_uregbase, - (unsigned long long) kinfo->spi_subport_rcvegrbuf, - (unsigned long long) kinfo->spi_subport_rcvhdr_base); - } - - /* - * All user buffers are 2KB buffers. If we ever support - * giving 4KB buffers to user processes, this will need some - * work. - */ - kinfo->spi_pioindex = (kinfo->spi_piobufbase - - (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign; - kinfo->spi_pioalign = dd->ipath_palign; - - kinfo->spi_qpair = IPATH_KD_QP; - /* - * user mode PIO buffers are always 2KB, even when 4KB can - * be received, and sent via the kernel; this is ibmaxlen - * for 2K MTU. - */ - kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32); - kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */ - kinfo->spi_port = pd->port_port; - kinfo->spi_subport = subport_fp(fp); - kinfo->spi_sw_version = IPATH_KERN_SWVERSION; - kinfo->spi_hw_version = dd->ipath_revision; - - if (master) { - kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; - } - - sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo); - if (copy_to_user(ubase, kinfo, sz)) - ret = -EFAULT; - -bail: - kfree(kinfo); - return ret; -} - -/** - * ipath_tid_update - update a port TID - * @pd: the port - * @fp: the ipath device file - * @ti: the TID information - * - * The new implementation as of Oct 2004 is that the driver assigns - * the tid and returns it to the caller. To make it easier to - * catch bugs, and to reduce search time, we keep a cursor for - * each port, walking the shadow tid array to find one that's not - * in use. - * - * For now, if we can't allocate the full list, we fail, although - * in the long run, we'll allocate as many as we can, and the - * caller will deal with that by trying the remaining pages later. - * That means that when we fail, we have to mark the tids as not in - * use again, in our shadow copy. - * - * It's up to the caller to free the tids when they are done. - * We'll unlock the pages as they free them. - * - * Also, right now we are locking one page at a time, but since - * the intended use of this routine is for a single group of - * virtually contiguous pages, that should change to improve - * performance. - */ -static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, - const struct ipath_tid_info *ti) -{ - int ret = 0, ntids; - u32 tid, porttid, cnt, i, tidcnt, tidoff; - u16 *tidlist; - struct ipath_devdata *dd = pd->port_dd; - u64 physaddr; - unsigned long vaddr; - u64 __iomem *tidbase; - unsigned long tidmap[8]; - struct page **pagep = NULL; - unsigned subport = subport_fp(fp); - - if (!dd->ipath_pageshadow) { - ret = -ENOMEM; - goto done; - } - - cnt = ti->tidcnt; - if (!cnt) { - ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n", - (unsigned long long) ti->tidlist); - /* - * Should we treat as success? likely a bug - */ - ret = -EFAULT; - goto done; - } - porttid = pd->port_port * dd->ipath_rcvtidcnt; - if (!pd->port_subport_cnt) { - tidcnt = dd->ipath_rcvtidcnt; - tid = pd->port_tidcursor; - tidoff = 0; - } else if (!subport) { - tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + - (dd->ipath_rcvtidcnt % pd->port_subport_cnt); - tidoff = dd->ipath_rcvtidcnt - tidcnt; - porttid += tidoff; - tid = tidcursor_fp(fp); - } else { - tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; - tidoff = tidcnt * (subport - 1); - porttid += tidoff; - tid = tidcursor_fp(fp); - } - if (cnt > tidcnt) { - /* make sure it all fits in port_tid_pg_list */ - dev_info(&dd->pcidev->dev, "Process tried to allocate %u " - "TIDs, only trying max (%u)\n", cnt, tidcnt); - cnt = tidcnt; - } - pagep = &((struct page **) pd->port_tid_pg_list)[tidoff]; - tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff]; - - memset(tidmap, 0, sizeof(tidmap)); - /* before decrement; chip actual # */ - ntids = tidcnt; - tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) + - dd->ipath_rcvtidbase + - porttid * sizeof(*tidbase)); - - ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n", - pd->port_port, cnt, tid, tidbase); - - /* virtual address of first page in transfer */ - vaddr = ti->tidvaddr; - if (!access_ok(VERIFY_WRITE, (void __user *) vaddr, - cnt * PAGE_SIZE)) { - ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n", - (void *)vaddr, cnt); - ret = -EFAULT; - goto done; - } - ret = ipath_get_user_pages(vaddr, cnt, pagep); - if (ret) { - if (ret == -EBUSY) { - ipath_dbg("Failed to lock addr %p, %u pages " - "(already locked)\n", - (void *) vaddr, cnt); - /* - * for now, continue, and see what happens but with - * the new implementation, this should never happen, - * unless perhaps the user has mpin'ed the pages - * themselves (something we need to test) - */ - ret = 0; - } else { - dev_info(&dd->pcidev->dev, - "Failed to lock addr %p, %u pages: " - "errno %d\n", (void *) vaddr, cnt, -ret); - goto done; - } - } - for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) { - for (; ntids--; tid++) { - if (tid == tidcnt) - tid = 0; - if (!dd->ipath_pageshadow[porttid + tid]) - break; - } - if (ntids < 0) { - /* - * oops, wrapped all the way through their TIDs, - * and didn't have enough free; see comments at - * start of routine - */ - ipath_dbg("Not enough free TIDs for %u pages " - "(index %d), failing\n", cnt, i); - i--; /* last tidlist[i] not filled in */ - ret = -ENOMEM; - break; - } - tidlist[i] = tid + tidoff; - ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, " - "vaddr %lx\n", i, tid + tidoff, vaddr); - /* we "know" system pages and TID pages are same size */ - dd->ipath_pageshadow[porttid + tid] = pagep[i]; - dd->ipath_physshadow[porttid + tid] = ipath_map_page( - dd->pcidev, pagep[i], 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - /* - * don't need atomic or it's overhead - */ - __set_bit(tid, tidmap); - physaddr = dd->ipath_physshadow[porttid + tid]; - ipath_stats.sps_pagelocks++; - ipath_cdbg(VERBOSE, - "TID %u, vaddr %lx, physaddr %llx pgp %p\n", - tid, vaddr, (unsigned long long) physaddr, - pagep[i]); - dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED, - physaddr); - /* - * don't check this tid in ipath_portshadow, since we - * just filled it in; start with the next one. - */ - tid++; - } - - if (ret) { - u32 limit; - cleanup: - /* jump here if copy out of updated info failed... */ - ipath_dbg("After failure (ret=%d), undo %d of %d entries\n", - -ret, i, cnt); - /* same code that's in ipath_free_tid() */ - limit = sizeof(tidmap) * BITS_PER_BYTE; - if (limit > tidcnt) - /* just in case size changes in future */ - limit = tidcnt; - tid = find_first_bit((const unsigned long *)tidmap, limit); - for (; tid < limit; tid++) { - if (!test_bit(tid, tidmap)) - continue; - if (dd->ipath_pageshadow[porttid + tid]) { - ipath_cdbg(VERBOSE, "Freeing TID %u\n", - tid); - dd->ipath_f_put_tid(dd, &tidbase[tid], - RCVHQ_RCV_TYPE_EXPECTED, - dd->ipath_tidinvalid); - pci_unmap_page(dd->pcidev, - dd->ipath_physshadow[porttid + tid], - PAGE_SIZE, PCI_DMA_FROMDEVICE); - dd->ipath_pageshadow[porttid + tid] = NULL; - ipath_stats.sps_pageunlocks++; - } - } - ipath_release_user_pages(pagep, cnt); - } else { - /* - * Copy the updated array, with ipath_tid's filled in, back - * to user. Since we did the copy in already, this "should - * never fail" If it does, we have to clean up... - */ - if (copy_to_user((void __user *) - (unsigned long) ti->tidlist, - tidlist, cnt * sizeof(*tidlist))) { - ret = -EFAULT; - goto cleanup; - } - if (copy_to_user((void __user *) (unsigned long) ti->tidmap, - tidmap, sizeof tidmap)) { - ret = -EFAULT; - goto cleanup; - } - if (tid == tidcnt) - tid = 0; - if (!pd->port_subport_cnt) - pd->port_tidcursor = tid; - else - tidcursor_fp(fp) = tid; - } - -done: - if (ret) - ipath_dbg("Failed to map %u TID pages, failing with %d\n", - ti->tidcnt, -ret); - return ret; -} - -/** - * ipath_tid_free - free a port TID - * @pd: the port - * @subport: the subport - * @ti: the TID info - * - * right now we are unlocking one page at a time, but since - * the intended use of this routine is for a single group of - * virtually contiguous pages, that should change to improve - * performance. We check that the TID is in range for this port - * but otherwise don't check validity; if user has an error and - * frees the wrong tid, it's only their own data that can thereby - * be corrupted. We do check that the TID was in use, for sanity - * We always use our idea of the saved address, not the address that - * they pass in to us. - */ - -static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, - const struct ipath_tid_info *ti) -{ - int ret = 0; - u32 tid, porttid, cnt, limit, tidcnt; - struct ipath_devdata *dd = pd->port_dd; - u64 __iomem *tidbase; - unsigned long tidmap[8]; - - if (!dd->ipath_pageshadow) { - ret = -ENOMEM; - goto done; - } - - if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap, - sizeof tidmap)) { - ret = -EFAULT; - goto done; - } - - porttid = pd->port_port * dd->ipath_rcvtidcnt; - if (!pd->port_subport_cnt) - tidcnt = dd->ipath_rcvtidcnt; - else if (!subport) { - tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + - (dd->ipath_rcvtidcnt % pd->port_subport_cnt); - porttid += dd->ipath_rcvtidcnt - tidcnt; - } else { - tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; - porttid += tidcnt * (subport - 1); - } - tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + - dd->ipath_rcvtidbase + - porttid * sizeof(*tidbase)); - - limit = sizeof(tidmap) * BITS_PER_BYTE; - if (limit > tidcnt) - /* just in case size changes in future */ - limit = tidcnt; - tid = find_first_bit(tidmap, limit); - ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) " - "set is %d, porttid %u\n", pd->port_port, ti->tidcnt, - limit, tid, porttid); - for (cnt = 0; tid < limit; tid++) { - /* - * small optimization; if we detect a run of 3 or so without - * any set, use find_first_bit again. That's mainly to - * accelerate the case where we wrapped, so we have some at - * the beginning, and some at the end, and a big gap - * in the middle. - */ - if (!test_bit(tid, tidmap)) - continue; - cnt++; - if (dd->ipath_pageshadow[porttid + tid]) { - struct page *p; - p = dd->ipath_pageshadow[porttid + tid]; - dd->ipath_pageshadow[porttid + tid] = NULL; - ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", - pid_nr(pd->port_pid), tid); - dd->ipath_f_put_tid(dd, &tidbase[tid], - RCVHQ_RCV_TYPE_EXPECTED, - dd->ipath_tidinvalid); - pci_unmap_page(dd->pcidev, - dd->ipath_physshadow[porttid + tid], - PAGE_SIZE, PCI_DMA_FROMDEVICE); - ipath_release_user_pages(&p, 1); - ipath_stats.sps_pageunlocks++; - } else - ipath_dbg("Unused tid %u, ignoring\n", tid); - } - if (cnt != ti->tidcnt) - ipath_dbg("passed in tidcnt %d, only %d bits set in map\n", - ti->tidcnt, cnt); -done: - if (ret) - ipath_dbg("Failed to unmap %u TID pages, failing with %d\n", - ti->tidcnt, -ret); - return ret; -} - -/** - * ipath_set_part_key - set a partition key - * @pd: the port - * @key: the key - * - * We can have up to 4 active at a time (other than the default, which is - * always allowed). This is somewhat tricky, since multiple ports may set - * the same key, so we reference count them, and clean up at exit. All 4 - * partition keys are packed into a single infinipath register. It's an - * error for a process to set the same pkey multiple times. We provide no - * mechanism to de-allocate a pkey at this time, we may eventually need to - * do that. I've used the atomic operations, and no locking, and only make - * a single pass through what's available. This should be more than - * adequate for some time. I'll think about spinlocks or the like if and as - * it's necessary. - */ -static int ipath_set_part_key(struct ipath_portdata *pd, u16 key) -{ - struct ipath_devdata *dd = pd->port_dd; - int i, any = 0, pidx = -1; - u16 lkey = key & 0x7FFF; - int ret; - - if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) { - /* nothing to do; this key always valid */ - ret = 0; - goto bail; - } - - ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys " - "%hx:%x %hx:%x %hx:%x %hx:%x\n", - pd->port_port, key, dd->ipath_pkeys[0], - atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1], - atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2], - atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3], - atomic_read(&dd->ipath_pkeyrefs[3])); - - if (!lkey) { - ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n", - pd->port_port); - ret = -EINVAL; - goto bail; - } - - /* - * Set the full membership bit, because it has to be - * set in the register or the packet, and it seems - * cleaner to set in the register than to force all - * callers to set it. (see bug 4331) - */ - key |= 0x8000; - - for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { - if (!pd->port_pkeys[i] && pidx == -1) - pidx = i; - if (pd->port_pkeys[i] == key) { - ipath_cdbg(VERBOSE, "p%u tries to set same pkey " - "(%x) more than once\n", - pd->port_port, key); - ret = -EEXIST; - goto bail; - } - } - if (pidx == -1) { - ipath_dbg("All pkeys for port %u already in use, " - "can't set %x\n", pd->port_port, key); - ret = -EBUSY; - goto bail; - } - for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i]) { - any++; - continue; - } - if (dd->ipath_pkeys[i] == key) { - atomic_t *pkrefs = &dd->ipath_pkeyrefs[i]; - - if (atomic_inc_return(pkrefs) > 1) { - pd->port_pkeys[pidx] = key; - ipath_cdbg(VERBOSE, "p%u set key %x " - "matches #%d, count now %d\n", - pd->port_port, key, i, - atomic_read(pkrefs)); - ret = 0; - goto bail; - } else { - /* - * lost race, decrement count, catch below - */ - atomic_dec(pkrefs); - ipath_cdbg(VERBOSE, "Lost race, count was " - "0, after dec, it's %d\n", - atomic_read(pkrefs)); - any++; - } - } - if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { - /* - * It makes no sense to have both the limited and - * full membership PKEY set at the same time since - * the unlimited one will disable the limited one. - */ - ret = -EEXIST; - goto bail; - } - } - if (!any) { - ipath_dbg("port %u, all pkeys already in use, " - "can't set %x\n", pd->port_port, key); - ret = -EBUSY; - goto bail; - } - for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i] && - atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { - u64 pkey; - - /* for ipathstats, etc. */ - ipath_stats.sps_pkeys[i] = lkey; - pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key; - pkey = - (u64) dd->ipath_pkeys[0] | - ((u64) dd->ipath_pkeys[1] << 16) | - ((u64) dd->ipath_pkeys[2] << 32) | - ((u64) dd->ipath_pkeys[3] << 48); - ipath_cdbg(PROC, "p%u set key %x in #%d, " - "portidx %d, new pkey reg %llx\n", - pd->port_port, key, i, pidx, - (unsigned long long) pkey); - ipath_write_kreg( - dd, dd->ipath_kregs->kr_partitionkey, pkey); - - ret = 0; - goto bail; - } - } - ipath_dbg("port %u, all pkeys already in use 2nd pass, " - "can't set %x\n", pd->port_port, key); - ret = -EBUSY; - -bail: - return ret; -} - -/** - * ipath_manage_rcvq - manage a port's receive queue - * @pd: the port - * @subport: the subport - * @start_stop: action to carry out - * - * start_stop == 0 disables receive on the port, for use in queue - * overflow conditions. start_stop==1 re-enables, to be used to - * re-init the software copy of the head register - */ -static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, - int start_stop) -{ - struct ipath_devdata *dd = pd->port_dd; - - ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n", - start_stop ? "en" : "dis", dd->ipath_unit, - pd->port_port, subport); - if (subport) - goto bail; - /* atomically clear receive enable port. */ - if (start_stop) { - /* - * On enable, force in-memory copy of the tail register to - * 0, so that protocol code doesn't have to worry about - * whether or not the chip has yet updated the in-memory - * copy or not on return from the system call. The chip - * always resets it's tail register back to 0 on a - * transition from disabled to enabled. This could cause a - * problem if software was broken, and did the enable w/o - * the disable, but eventually the in-memory copy will be - * updated and correct itself, even in the face of software - * bugs. - */ - if (pd->port_rcvhdrtail_kvaddr) - ipath_clear_rcvhdrtail(pd); - set_bit(dd->ipath_r_portenable_shift + pd->port_port, - &dd->ipath_rcvctrl); - } else - clear_bit(dd->ipath_r_portenable_shift + pd->port_port, - &dd->ipath_rcvctrl); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - /* now be sure chip saw it before we return */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - if (start_stop) { - /* - * And try to be sure that tail reg update has happened too. - * This should in theory interlock with the RXE changes to - * the tail register. Don't assign it to the tail register - * in memory copy, since we could overwrite an update by the - * chip if we did. - */ - ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); - } - /* always; new head should be equal to new tail; see above */ -bail: - return 0; -} - -static void ipath_clean_part_key(struct ipath_portdata *pd, - struct ipath_devdata *dd) -{ - int i, j, pchanged = 0; - u64 oldpkey; - - /* for debugging only */ - oldpkey = (u64) dd->ipath_pkeys[0] | - ((u64) dd->ipath_pkeys[1] << 16) | - ((u64) dd->ipath_pkeys[2] << 32) | - ((u64) dd->ipath_pkeys[3] << 48); - - for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { - if (!pd->port_pkeys[i]) - continue; - ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i, - pd->port_pkeys[i]); - for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) { - /* check for match independent of the global bit */ - if ((dd->ipath_pkeys[j] & 0x7fff) != - (pd->port_pkeys[i] & 0x7fff)) - continue; - if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) { - ipath_cdbg(VERBOSE, "p%u clear key " - "%x matches #%d\n", - pd->port_port, - pd->port_pkeys[i], j); - ipath_stats.sps_pkeys[j] = - dd->ipath_pkeys[j] = 0; - pchanged++; - } - else ipath_cdbg( - VERBOSE, "p%u key %x matches #%d, " - "but ref still %d\n", pd->port_port, - pd->port_pkeys[i], j, - atomic_read(&dd->ipath_pkeyrefs[j])); - break; - } - pd->port_pkeys[i] = 0; - } - if (pchanged) { - u64 pkey = (u64) dd->ipath_pkeys[0] | - ((u64) dd->ipath_pkeys[1] << 16) | - ((u64) dd->ipath_pkeys[2] << 32) | - ((u64) dd->ipath_pkeys[3] << 48); - ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, " - "new pkey reg %llx\n", pd->port_port, - (unsigned long long) oldpkey, - (unsigned long long) pkey); - ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, - pkey); - } -} - -/* - * Initialize the port data with the receive buffer sizes - * so this can be done while the master port is locked. - * Otherwise, there is a race with a slave opening the port - * and seeing these fields uninitialized. - */ -static void init_user_egr_sizes(struct ipath_portdata *pd) -{ - struct ipath_devdata *dd = pd->port_dd; - unsigned egrperchunk, egrcnt, size; - - /* - * to avoid wasting a lot of memory, we allocate 32KB chunks of - * physically contiguous memory, advance through it until used up - * and then allocate more. Of course, we need memory to store those - * extra pointers, now. Started out with 256KB, but under heavy - * memory pressure (creating large files and then copying them over - * NFS while doing lots of MPI jobs), we hit some allocation - * failures, even though we can sleep... (2.6.10) Still get - * failures at 64K. 32K is the lowest we can go without wasting - * additional memory. - */ - size = 0x8000; - egrperchunk = size / dd->ipath_rcvegrbufsize; - egrcnt = dd->ipath_rcvegrcnt; - pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk; - pd->port_rcvegrbufs_perchunk = egrperchunk; - pd->port_rcvegrbuf_size = size; -} - -/** - * ipath_create_user_egr - allocate eager TID buffers - * @pd: the port to allocate TID buffers for - * - * This routine is now quite different for user and kernel, because - * the kernel uses skb's, for the accelerated network performance - * This is the user port version - * - * Allocate the eager TID buffers and program them into infinipath - * They are no longer completely contiguous, we do multiple allocation - * calls. - */ -static int ipath_create_user_egr(struct ipath_portdata *pd) -{ - struct ipath_devdata *dd = pd->port_dd; - unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; - size_t size; - int ret; - gfp_t gfp_flags; - - /* - * GFP_USER, but without GFP_FS, so buffer cache can be - * coalesced (we hope); otherwise, even at order 4, - * heavy filesystem activity makes these fail, and we can - * use compound pages. - */ - gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; - - egrcnt = dd->ipath_rcvegrcnt; - /* TID number offset for this port */ - egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt; - egrsize = dd->ipath_rcvegrbufsize; - ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " - "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); - - chunk = pd->port_rcvegrbuf_chunks; - egrperchunk = pd->port_rcvegrbufs_perchunk; - size = pd->port_rcvegrbuf_size; - pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), - GFP_KERNEL); - if (!pd->port_rcvegrbuf) { - ret = -ENOMEM; - goto bail; - } - pd->port_rcvegrbuf_phys = - kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]), - GFP_KERNEL); - if (!pd->port_rcvegrbuf_phys) { - ret = -ENOMEM; - goto bail_rcvegrbuf; - } - for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { - - pd->port_rcvegrbuf[e] = dma_alloc_coherent( - &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e], - gfp_flags); - - if (!pd->port_rcvegrbuf[e]) { - ret = -ENOMEM; - goto bail_rcvegrbuf_phys; - } - } - - pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0]; - - for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) { - dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk]; - unsigned i; - - for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { - dd->ipath_f_put_tid(dd, e + egroff + - (u64 __iomem *) - ((char __iomem *) - dd->ipath_kregbase + - dd->ipath_rcvegrbase), - RCVHQ_RCV_TYPE_EAGER, pa); - pa += egrsize; - } - cond_resched(); /* don't hog the cpu */ - } - - ret = 0; - goto bail; - -bail_rcvegrbuf_phys: - for (e = 0; e < pd->port_rcvegrbuf_chunks && - pd->port_rcvegrbuf[e]; e++) { - dma_free_coherent(&dd->pcidev->dev, size, - pd->port_rcvegrbuf[e], - pd->port_rcvegrbuf_phys[e]); - - } - kfree(pd->port_rcvegrbuf_phys); - pd->port_rcvegrbuf_phys = NULL; -bail_rcvegrbuf: - kfree(pd->port_rcvegrbuf); - pd->port_rcvegrbuf = NULL; -bail: - return ret; -} - - -/* common code for the mappings on dma_alloc_coherent mem */ -static int ipath_mmap_mem(struct vm_area_struct *vma, - struct ipath_portdata *pd, unsigned len, int write_ok, - void *kvaddr, char *what) -{ - struct ipath_devdata *dd = pd->port_dd; - unsigned long pfn; - int ret; - - if ((vma->vm_end - vma->vm_start) > len) { - dev_info(&dd->pcidev->dev, - "FAIL on %s: len %lx > %x\n", what, - vma->vm_end - vma->vm_start, len); - ret = -EFAULT; - goto bail; - } - - if (!write_ok) { - if (vma->vm_flags & VM_WRITE) { - dev_info(&dd->pcidev->dev, - "%s must be mapped readonly\n", what); - ret = -EPERM; - goto bail; - } - - /* don't allow them to later change with mprotect */ - vma->vm_flags &= ~VM_MAYWRITE; - } - - pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT; - ret = remap_pfn_range(vma, vma->vm_start, pfn, - len, vma->vm_page_prot); - if (ret) - dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x " - "bytes r%c failed: %d\n", what, pd->port_port, - pfn, len, write_ok?'w':'o', ret); - else - ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes " - "r%c\n", what, pd->port_port, pfn, len, - write_ok?'w':'o'); -bail: - return ret; -} - -static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, - u64 ureg) -{ - unsigned long phys; - int ret; - - /* - * This is real hardware, so use io_remap. This is the mechanism - * for the user process to update the head registers for their port - * in the chip. - */ - if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { - dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen " - "%lx > PAGE\n", vma->vm_end - vma->vm_start); - ret = -EFAULT; - } else { - phys = dd->ipath_physaddr + ureg; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - ret = io_remap_pfn_range(vma, vma->vm_start, - phys >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); - } - return ret; -} - -static int mmap_piobufs(struct vm_area_struct *vma, - struct ipath_devdata *dd, - struct ipath_portdata *pd, - unsigned piobufs, unsigned piocnt) -{ - unsigned long phys; - int ret; - - /* - * When we map the PIO buffers in the chip, we want to map them as - * writeonly, no read possible. This prevents access to previous - * process data, and catches users who might try to read the i/o - * space due to a bug. - */ - if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) { - dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " - "reqlen %lx > PAGE\n", - vma->vm_end - vma->vm_start); - ret = -EINVAL; - goto bail; - } - - phys = dd->ipath_physaddr + piobufs; - -#if defined(__powerpc__) - /* There isn't a generic way to specify writethrough mappings */ - pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; - pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU; - pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; -#endif - - /* - * don't allow them to later change to readable with mprotect (for when - * not initially mapped readable, as is normally the case) - */ - vma->vm_flags &= ~VM_MAYREAD; - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - - ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); -bail: - return ret; -} - -static int mmap_rcvegrbufs(struct vm_area_struct *vma, - struct ipath_portdata *pd) -{ - struct ipath_devdata *dd = pd->port_dd; - unsigned long start, size; - size_t total_size, i; - unsigned long pfn; - int ret; - - size = pd->port_rcvegrbuf_size; - total_size = pd->port_rcvegrbuf_chunks * size; - if ((vma->vm_end - vma->vm_start) > total_size) { - dev_info(&dd->pcidev->dev, "FAIL on egr bufs: " - "reqlen %lx > actual %lx\n", - vma->vm_end - vma->vm_start, - (unsigned long) total_size); - ret = -EINVAL; - goto bail; - } - - if (vma->vm_flags & VM_WRITE) { - dev_info(&dd->pcidev->dev, "Can't map eager buffers as " - "writable (flags=%lx)\n", vma->vm_flags); - ret = -EPERM; - goto bail; - } - /* don't allow them to later change to writeable with mprotect */ - vma->vm_flags &= ~VM_MAYWRITE; - - start = vma->vm_start; - - for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { - pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT; - ret = remap_pfn_range(vma, start, pfn, size, - vma->vm_page_prot); - if (ret < 0) - goto bail; - } - ret = 0; - -bail: - return ret; -} - -/* - * ipath_file_vma_fault - handle a VMA page fault. - */ -static int ipath_file_vma_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - struct page *page; - - page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); - if (!page) - return VM_FAULT_SIGBUS; - get_page(page); - vmf->page = page; - - return 0; -} - -static const struct vm_operations_struct ipath_file_vm_ops = { - .fault = ipath_file_vma_fault, -}; - -static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, - struct ipath_portdata *pd, unsigned subport) -{ - unsigned long len; - struct ipath_devdata *dd; - void *addr; - size_t size; - int ret = 0; - - /* If the port is not shared, all addresses should be physical */ - if (!pd->port_subport_cnt) - goto bail; - - dd = pd->port_dd; - size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; - - /* - * Each process has all the subport uregbase, rcvhdrq, and - * rcvegrbufs mmapped - as an array for all the processes, - * and also separately for this process. - */ - if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) { - addr = pd->subport_uregbase; - size = PAGE_SIZE * pd->port_subport_cnt; - } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) { - addr = pd->subport_rcvhdr_base; - size = pd->port_rcvhdrq_size * pd->port_subport_cnt; - } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) { - addr = pd->subport_rcvegrbuf; - size *= pd->port_subport_cnt; - } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase + - PAGE_SIZE * subport)) { - addr = pd->subport_uregbase + PAGE_SIZE * subport; - size = PAGE_SIZE; - } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * subport)) { - addr = pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * subport; - size = pd->port_rcvhdrq_size; - } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf + - size * subport)) { - addr = pd->subport_rcvegrbuf + size * subport; - /* rcvegrbufs are read-only on the slave */ - if (vma->vm_flags & VM_WRITE) { - dev_info(&dd->pcidev->dev, - "Can't map eager buffers as " - "writable (flags=%lx)\n", vma->vm_flags); - ret = -EPERM; - goto bail; - } - /* - * Don't allow permission to later change to writeable - * with mprotect. - */ - vma->vm_flags &= ~VM_MAYWRITE; - } else { - goto bail; - } - len = vma->vm_end - vma->vm_start; - if (len > size) { - ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size); - ret = -EINVAL; - goto bail; - } - - vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; - vma->vm_ops = &ipath_file_vm_ops; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; - ret = 1; - -bail: - return ret; -} - -/** - * ipath_mmap - mmap various structures into user space - * @fp: the file pointer - * @vma: the VM area - * - * We use this to have a shared buffer between the kernel and the user code - * for the rcvhdr queue, egr buffers, and the per-port user regs and pio - * buffers in the chip. We have the open and close entries so we can bump - * the ref count and keep the driver from being unloaded while still mapped. - */ -static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) -{ - struct ipath_portdata *pd; - struct ipath_devdata *dd; - u64 pgaddr, ureg; - unsigned piobufs, piocnt; - int ret; - - pd = port_fp(fp); - if (!pd) { - ret = -EINVAL; - goto bail; - } - dd = pd->port_dd; - - /* - * This is the ipath_do_user_init() code, mapping the shared buffers - * into the user process. The address referred to by vm_pgoff is the - * file offset passed via mmap(). For shared ports, this is the - * kernel vmalloc() address of the pages to share with the master. - * For non-shared or master ports, this is a physical address. - * We only do one mmap for each space mapped. - */ - pgaddr = vma->vm_pgoff << PAGE_SHIFT; - - /* - * Check for 0 in case one of the allocations failed, but user - * called mmap anyway. - */ - if (!pgaddr) { - ret = -EINVAL; - goto bail; - } - - ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n", - (unsigned long long) pgaddr, vma->vm_start, - vma->vm_end - vma->vm_start, dd->ipath_unit, - pd->port_port, subport_fp(fp)); - - /* - * Physical addresses must fit in 40 bits for our hardware. - * Check for kernel virtual addresses first, anything else must - * match a HW or memory address. - */ - ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); - if (ret) { - if (ret > 0) - ret = 0; - goto bail; - } - - ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; - if (!pd->port_subport_cnt) { - /* port is not shared */ - piocnt = pd->port_piocnt; - piobufs = pd->port_piobufs; - } else if (!subport_fp(fp)) { - /* caller is the master */ - piocnt = (pd->port_piocnt / pd->port_subport_cnt) + - (pd->port_piocnt % pd->port_subport_cnt); - piobufs = pd->port_piobufs + - dd->ipath_palign * (pd->port_piocnt - piocnt); - } else { - unsigned slave = subport_fp(fp) - 1; - - /* caller is a slave */ - piocnt = pd->port_piocnt / pd->port_subport_cnt; - piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; - } - - if (pgaddr == ureg) - ret = mmap_ureg(vma, dd, ureg); - else if (pgaddr == piobufs) - ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt); - else if (pgaddr == dd->ipath_pioavailregs_phys) - /* in-memory copy of pioavail registers */ - ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, - (void *) dd->ipath_pioavailregs_dma, - "pioavail registers"); - else if (pgaddr == pd->port_rcvegr_phys) - ret = mmap_rcvegrbufs(vma, pd); - else if (pgaddr == (u64) pd->port_rcvhdrq_phys) - /* - * The rcvhdrq itself; readonly except on HT (so have - * to allow writable mapping), multiple pages, contiguous - * from an i/o perspective. - */ - ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1, - pd->port_rcvhdrq, - "rcvhdrq"); - else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys) - /* in-memory copy of rcvhdrq tail register */ - ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, - pd->port_rcvhdrtail_kvaddr, - "rcvhdrq tail"); - else - ret = -EINVAL; - - vma->vm_private_data = NULL; - - if (ret < 0) - dev_info(&dd->pcidev->dev, - "Failure %d on off %llx len %lx\n", - -ret, (unsigned long long)pgaddr, - vma->vm_end - vma->vm_start); -bail: - return ret; -} - -static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd) -{ - unsigned pollflag = 0; - - if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) && - pd->port_hdrqfull != pd->port_hdrqfull_poll) { - pollflag |= POLLIN | POLLRDNORM; - pd->port_hdrqfull_poll = pd->port_hdrqfull; - } - - return pollflag; -} - -static unsigned int ipath_poll_urgent(struct ipath_portdata *pd, - struct file *fp, - struct poll_table_struct *pt) -{ - unsigned pollflag = 0; - struct ipath_devdata *dd; - - dd = pd->port_dd; - - /* variable access in ipath_poll_hdrqfull() needs this */ - rmb(); - pollflag = ipath_poll_hdrqfull(pd); - - if (pd->port_urgent != pd->port_urgent_poll) { - pollflag |= POLLIN | POLLRDNORM; - pd->port_urgent_poll = pd->port_urgent; - } - - if (!pollflag) { - /* this saves a spin_lock/unlock in interrupt handler... */ - set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag); - /* flush waiting flag so don't miss an event... */ - wmb(); - poll_wait(fp, &pd->port_wait, pt); - } - - return pollflag; -} - -static unsigned int ipath_poll_next(struct ipath_portdata *pd, - struct file *fp, - struct poll_table_struct *pt) -{ - u32 head; - u32 tail; - unsigned pollflag = 0; - struct ipath_devdata *dd; - - dd = pd->port_dd; - - /* variable access in ipath_poll_hdrqfull() needs this */ - rmb(); - pollflag = ipath_poll_hdrqfull(pd); - - head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); - if (pd->port_rcvhdrtail_kvaddr) - tail = ipath_get_rcvhdrtail(pd); - else - tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); - - if (head != tail) - pollflag |= POLLIN | POLLRDNORM; - else { - /* this saves a spin_lock/unlock in interrupt handler */ - set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); - /* flush waiting flag so we don't miss an event */ - wmb(); - - set_bit(pd->port_port + dd->ipath_r_intravail_shift, - &dd->ipath_rcvctrl); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - - if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ - ipath_write_ureg(dd, ur_rcvhdrhead, - dd->ipath_rhdrhead_intr_off | head, - pd->port_port); - - poll_wait(fp, &pd->port_wait, pt); - } - - return pollflag; -} - -static unsigned int ipath_poll(struct file *fp, - struct poll_table_struct *pt) -{ - struct ipath_portdata *pd; - unsigned pollflag; - - pd = port_fp(fp); - if (!pd) - pollflag = 0; - else if (pd->poll_type & IPATH_POLL_TYPE_URGENT) - pollflag = ipath_poll_urgent(pd, fp, pt); - else - pollflag = ipath_poll_next(pd, fp, pt); - - return pollflag; -} - -static int ipath_supports_subports(int user_swmajor, int user_swminor) -{ - /* no subport implementation prior to software version 1.3 */ - return (user_swmajor > 1) || (user_swminor >= 3); -} - -static int ipath_compatible_subports(int user_swmajor, int user_swminor) -{ - /* this code is written long-hand for clarity */ - if (IPATH_USER_SWMAJOR != user_swmajor) { - /* no promise of compatibility if major mismatch */ - return 0; - } - if (IPATH_USER_SWMAJOR == 1) { - switch (IPATH_USER_SWMINOR) { - case 0: - case 1: - case 2: - /* no subport implementation so cannot be compatible */ - return 0; - case 3: - /* 3 is only compatible with itself */ - return user_swminor == 3; - default: - /* >= 4 are compatible (or are expected to be) */ - return user_swminor >= 4; - } - } - /* make no promises yet for future major versions */ - return 0; -} - -static int init_subports(struct ipath_devdata *dd, - struct ipath_portdata *pd, - const struct ipath_user_info *uinfo) -{ - int ret = 0; - unsigned num_subports; - size_t size; - - /* - * If the user is requesting zero subports, - * skip the subport allocation. - */ - if (uinfo->spu_subport_cnt <= 0) - goto bail; - - /* Self-consistency check for ipath_compatible_subports() */ - if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) && - !ipath_compatible_subports(IPATH_USER_SWMAJOR, - IPATH_USER_SWMINOR)) { - dev_info(&dd->pcidev->dev, - "Inconsistent ipath_compatible_subports()\n"); - goto bail; - } - - /* Check for subport compatibility */ - if (!ipath_compatible_subports(uinfo->spu_userversion >> 16, - uinfo->spu_userversion & 0xffff)) { - dev_info(&dd->pcidev->dev, - "Mismatched user version (%d.%d) and driver " - "version (%d.%d) while port sharing. Ensure " - "that driver and library are from the same " - "release.\n", - (int) (uinfo->spu_userversion >> 16), - (int) (uinfo->spu_userversion & 0xffff), - IPATH_USER_SWMAJOR, - IPATH_USER_SWMINOR); - goto bail; - } - if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) { - ret = -EINVAL; - goto bail; - } - - num_subports = uinfo->spu_subport_cnt; - pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports); - if (!pd->subport_uregbase) { - ret = -ENOMEM; - goto bail; - } - /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ - size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * - sizeof(u32), PAGE_SIZE) * num_subports; - pd->subport_rcvhdr_base = vzalloc(size); - if (!pd->subport_rcvhdr_base) { - ret = -ENOMEM; - goto bail_ureg; - } - - pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks * - pd->port_rcvegrbuf_size * - num_subports); - if (!pd->subport_rcvegrbuf) { - ret = -ENOMEM; - goto bail_rhdr; - } - - pd->port_subport_cnt = uinfo->spu_subport_cnt; - pd->port_subport_id = uinfo->spu_subport_id; - pd->active_slaves = 1; - set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); - goto bail; - -bail_rhdr: - vfree(pd->subport_rcvhdr_base); -bail_ureg: - vfree(pd->subport_uregbase); - pd->subport_uregbase = NULL; -bail: - return ret; -} - -static int try_alloc_port(struct ipath_devdata *dd, int port, - struct file *fp, - const struct ipath_user_info *uinfo) -{ - struct ipath_portdata *pd; - int ret; - - if (!(pd = dd->ipath_pd[port])) { - void *ptmp; - - pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); - - /* - * Allocate memory for use in ipath_tid_update() just once - * at open, not per call. Reduces cost of expected send - * setup. - */ - ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) + - dd->ipath_rcvtidcnt * sizeof(struct page **), - GFP_KERNEL); - if (!pd || !ptmp) { - ipath_dev_err(dd, "Unable to allocate portdata " - "memory, failing open\n"); - ret = -ENOMEM; - kfree(pd); - kfree(ptmp); - goto bail; - } - dd->ipath_pd[port] = pd; - dd->ipath_pd[port]->port_port = port; - dd->ipath_pd[port]->port_dd = dd; - dd->ipath_pd[port]->port_tid_pg_list = ptmp; - init_waitqueue_head(&dd->ipath_pd[port]->port_wait); - } - if (!pd->port_cnt) { - pd->userversion = uinfo->spu_userversion; - init_user_egr_sizes(pd); - if ((ret = init_subports(dd, pd, uinfo)) != 0) - goto bail; - ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n", - current->comm, current->pid, dd->ipath_unit, - port); - pd->port_cnt = 1; - port_fp(fp) = pd; - pd->port_pid = get_pid(task_pid(current)); - strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); - ipath_stats.sps_ports++; - ret = 0; - } else - ret = -EBUSY; - -bail: - return ret; -} - -static inline int usable(struct ipath_devdata *dd) -{ - return dd && - (dd->ipath_flags & IPATH_PRESENT) && - dd->ipath_kregbase && - dd->ipath_lid && - !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED - | IPATH_LINKUNK)); -} - -static int find_free_port(int unit, struct file *fp, - const struct ipath_user_info *uinfo) -{ - struct ipath_devdata *dd = ipath_lookup(unit); - int ret, i; - - if (!dd) { - ret = -ENODEV; - goto bail; - } - - if (!usable(dd)) { - ret = -ENETDOWN; - goto bail; - } - - for (i = 1; i < dd->ipath_cfgports; i++) { - ret = try_alloc_port(dd, i, fp, uinfo); - if (ret != -EBUSY) - goto bail; - } - ret = -EBUSY; - -bail: - return ret; -} - -static int find_best_unit(struct file *fp, - const struct ipath_user_info *uinfo) -{ - int ret = 0, i, prefunit = -1, devmax; - int maxofallports, npresent, nup; - int ndev; - - devmax = ipath_count_units(&npresent, &nup, &maxofallports); - - /* - * This code is present to allow a knowledgeable person to - * specify the layout of processes to processors before opening - * this driver, and then we'll assign the process to the "closest" - * InfiniPath chip to that processor (we assume reasonable connectivity, - * for now). This code assumes that if affinity has been set - * before this point, that at most one cpu is set; for now this - * is reasonable. I check for both cpumask_empty() and cpumask_full(), - * in case some kernel variant sets none of the bits when no - * affinity is set. 2.6.11 and 12 kernels have all present - * cpus set. Some day we'll have to fix it up further to handle - * a cpu subset. This algorithm fails for two HT chips connected - * in tunnel fashion. Eventually this needs real topology - * information. There may be some issues with dual core numbering - * as well. This needs more work prior to release. - */ - if (!cpumask_empty(tsk_cpus_allowed(current)) && - !cpumask_full(tsk_cpus_allowed(current))) { - int ncpus = num_online_cpus(), curcpu = -1, nset = 0; - get_online_cpus(); - for_each_online_cpu(i) - if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) { - ipath_cdbg(PROC, "%s[%u] affinity set for " - "cpu %d/%d\n", current->comm, - current->pid, i, ncpus); - curcpu = i; - nset++; - } - put_online_cpus(); - if (curcpu != -1 && nset != ncpus) { - if (npresent) { - prefunit = curcpu / (ncpus / npresent); - ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, " - "%d cpus/chip, select unit %d\n", - current->comm, current->pid, - npresent, ncpus, ncpus / npresent, - prefunit); - } - } - } - - /* - * user ports start at 1, kernel port is 0 - * For now, we do round-robin access across all chips - */ - - if (prefunit != -1) - devmax = prefunit + 1; -recheck: - for (i = 1; i < maxofallports; i++) { - for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax; - ndev++) { - struct ipath_devdata *dd = ipath_lookup(ndev); - - if (!usable(dd)) - continue; /* can't use this unit */ - if (i >= dd->ipath_cfgports) - /* - * Maxed out on users of this unit. Try - * next. - */ - continue; - ret = try_alloc_port(dd, i, fp, uinfo); - if (!ret) - goto done; - } - } - - if (npresent) { - if (nup == 0) { - ret = -ENETDOWN; - ipath_dbg("No ports available (none initialized " - "and ready)\n"); - } else { - if (prefunit > 0) { - /* if started above 0, retry from 0 */ - ipath_cdbg(PROC, - "%s[%u] no ports on prefunit " - "%d, clear and re-check\n", - current->comm, current->pid, - prefunit); - devmax = ipath_count_units(NULL, NULL, - NULL); - prefunit = -1; - goto recheck; - } - ret = -EBUSY; - ipath_dbg("No ports available\n"); - } - } else { - ret = -ENXIO; - ipath_dbg("No boards found\n"); - } - -done: - return ret; -} - -static int find_shared_port(struct file *fp, - const struct ipath_user_info *uinfo) -{ - int devmax, ndev, i; - int ret = 0; - - devmax = ipath_count_units(NULL, NULL, NULL); - - for (ndev = 0; ndev < devmax; ndev++) { - struct ipath_devdata *dd = ipath_lookup(ndev); - - if (!usable(dd)) - continue; - for (i = 1; i < dd->ipath_cfgports; i++) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - - /* Skip ports which are not yet open */ - if (!pd || !pd->port_cnt) - continue; - /* Skip port if it doesn't match the requested one */ - if (pd->port_subport_id != uinfo->spu_subport_id) - continue; - /* Verify the sharing process matches the master */ - if (pd->port_subport_cnt != uinfo->spu_subport_cnt || - pd->userversion != uinfo->spu_userversion || - pd->port_cnt >= pd->port_subport_cnt) { - ret = -EINVAL; - goto done; - } - port_fp(fp) = pd; - subport_fp(fp) = pd->port_cnt++; - pd->port_subpid[subport_fp(fp)] = - get_pid(task_pid(current)); - tidcursor_fp(fp) = 0; - pd->active_slaves |= 1 << subport_fp(fp); - ipath_cdbg(PROC, - "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", - current->comm, current->pid, - subport_fp(fp), - pd->port_comm, pid_nr(pd->port_pid), - dd->ipath_unit, pd->port_port); - ret = 1; - goto done; - } - } - -done: - return ret; -} - -static int ipath_open(struct inode *in, struct file *fp) -{ - /* The real work is performed later in ipath_assign_port() */ - fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); - return fp->private_data ? 0 : -ENOMEM; -} - -/* Get port early, so can set affinity prior to memory allocation */ -static int ipath_assign_port(struct file *fp, - const struct ipath_user_info *uinfo) -{ - int ret; - int i_minor; - unsigned swmajor, swminor; - - /* Check to be sure we haven't already initialized this file */ - if (port_fp(fp)) { - ret = -EINVAL; - goto done; - } - - /* for now, if major version is different, bail */ - swmajor = uinfo->spu_userversion >> 16; - if (swmajor != IPATH_USER_SWMAJOR) { - ipath_dbg("User major version %d not same as driver " - "major %d\n", uinfo->spu_userversion >> 16, - IPATH_USER_SWMAJOR); - ret = -ENODEV; - goto done; - } - - swminor = uinfo->spu_userversion & 0xffff; - if (swminor != IPATH_USER_SWMINOR) - ipath_dbg("User minor version %d not same as driver " - "minor %d\n", swminor, IPATH_USER_SWMINOR); - - mutex_lock(&ipath_mutex); - - if (ipath_compatible_subports(swmajor, swminor) && - uinfo->spu_subport_cnt && - (ret = find_shared_port(fp, uinfo))) { - if (ret > 0) - ret = 0; - goto done_chk_sdma; - } - - i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE; - ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", - (long)file_inode(fp)->i_rdev, i_minor); - - if (i_minor) - ret = find_free_port(i_minor - 1, fp, uinfo); - else - ret = find_best_unit(fp, uinfo); - -done_chk_sdma: - if (!ret) { - struct ipath_filedata *fd = fp->private_data; - const struct ipath_portdata *pd = fd->pd; - const struct ipath_devdata *dd = pd->port_dd; - - fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev, - dd->ipath_unit, - pd->port_port, - fd->subport); - - if (!fd->pq) - ret = -ENOMEM; - } - - mutex_unlock(&ipath_mutex); - -done: - return ret; -} - - -static int ipath_do_user_init(struct file *fp, - const struct ipath_user_info *uinfo) -{ - int ret; - struct ipath_portdata *pd = port_fp(fp); - struct ipath_devdata *dd; - u32 head32; - - /* Subports don't need to initialize anything since master did it. */ - if (subport_fp(fp)) { - ret = wait_event_interruptible(pd->port_wait, - !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag)); - goto done; - } - - dd = pd->port_dd; - - if (uinfo->spu_rcvhdrsize) { - ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); - if (ret) - goto done; - } - - /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ - - /* some ports may get extra buffers, calculate that here */ - if (pd->port_port <= dd->ipath_ports_extrabuf) - pd->port_piocnt = dd->ipath_pbufsport + 1; - else - pd->port_piocnt = dd->ipath_pbufsport; - - /* for right now, kernel piobufs are at end, so port 1 is at 0 */ - if (pd->port_port <= dd->ipath_ports_extrabuf) - pd->port_pio_base = (dd->ipath_pbufsport + 1) - * (pd->port_port - 1); - else - pd->port_pio_base = dd->ipath_ports_extrabuf + - dd->ipath_pbufsport * (pd->port_port - 1); - pd->port_piobufs = dd->ipath_piobufbase + - pd->port_pio_base * dd->ipath_palign; - ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u," - " first pio %u\n", pd->port_port, pd->port_piobufs, - pd->port_piocnt, pd->port_pio_base); - ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0); - - /* - * Now allocate the rcvhdr Q and eager TIDs; skip the TID - * array for time being. If pd->port_port > chip-supported, - * we need to do extra stuff here to handle by handling overflow - * through port 0, someday - */ - ret = ipath_create_rcvhdrq(dd, pd); - if (!ret) - ret = ipath_create_user_egr(pd); - if (ret) - goto done; - - /* - * set the eager head register for this port to the current values - * of the tail pointers, since we don't know if they were - * updated on last use of the port. - */ - head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); - ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); - pd->port_lastrcvhdrqtail = -1; - ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", - pd->port_port, head32); - pd->port_tidcursor = 0; /* start at beginning after open */ - - /* initialize poll variables... */ - pd->port_urgent = 0; - pd->port_urgent_poll = 0; - pd->port_hdrqfull_poll = pd->port_hdrqfull; - - /* - * Now enable the port for receive. - * For chips that are set to DMA the tail register to memory - * when they change (and when the update bit transitions from - * 0 to 1. So for those chips, we turn it off and then back on. - * This will (very briefly) affect any other open ports, but the - * duration is very short, and therefore isn't an issue. We - * explicitly set the in-memory tail copy to 0 beforehand, so we - * don't have to wait to be sure the DMA update has happened - * (chip resets head/tail to 0 on transition to enable). - */ - set_bit(dd->ipath_r_portenable_shift + pd->port_port, - &dd->ipath_rcvctrl); - if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { - if (pd->port_rcvhdrtail_kvaddr) - ipath_clear_rcvhdrtail(pd); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl & - ~(1ULL << dd->ipath_r_tailupd_shift)); - } - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - /* Notify any waiting slaves */ - if (pd->port_subport_cnt) { - clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); - wake_up(&pd->port_wait); - } -done: - return ret; -} - -/** - * unlock_exptid - unlock any expected TID entries port still had in use - * @pd: port - * - * We don't actually update the chip here, because we do a bulk update - * below, using ipath_f_clear_tids. - */ -static void unlock_expected_tids(struct ipath_portdata *pd) -{ - struct ipath_devdata *dd = pd->port_dd; - int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt; - int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt; - - ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n", - pd->port_port); - for (i = port_tidbase; i < maxtid; i++) { - struct page *ps = dd->ipath_pageshadow[i]; - - if (!ps) - continue; - - dd->ipath_pageshadow[i] = NULL; - pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i], - PAGE_SIZE, PCI_DMA_FROMDEVICE); - ipath_release_user_pages_on_close(&ps, 1); - cnt++; - ipath_stats.sps_pageunlocks++; - } - if (cnt) - ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n", - pd->port_port, cnt); - - if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks) - ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n", - (unsigned long long) ipath_stats.sps_pagelocks, - (unsigned long long) - ipath_stats.sps_pageunlocks); -} - -static int ipath_close(struct inode *in, struct file *fp) -{ - int ret = 0; - struct ipath_filedata *fd; - struct ipath_portdata *pd; - struct ipath_devdata *dd; - unsigned long flags; - unsigned port; - struct pid *pid; - - ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n", - (long)in->i_rdev, fp->private_data); - - mutex_lock(&ipath_mutex); - - fd = fp->private_data; - fp->private_data = NULL; - pd = fd->pd; - if (!pd) { - mutex_unlock(&ipath_mutex); - goto bail; - } - - dd = pd->port_dd; - - /* drain user sdma queue */ - ipath_user_sdma_queue_drain(dd, fd->pq); - ipath_user_sdma_queue_destroy(fd->pq); - - if (--pd->port_cnt) { - /* - * XXX If the master closes the port before the slave(s), - * revoke the mmap for the eager receive queue so - * the slave(s) don't wait for receive data forever. - */ - pd->active_slaves &= ~(1 << fd->subport); - put_pid(pd->port_subpid[fd->subport]); - pd->port_subpid[fd->subport] = NULL; - mutex_unlock(&ipath_mutex); - goto bail; - } - /* early; no interrupt users after this */ - spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); - port = pd->port_port; - dd->ipath_pd[port] = NULL; - pid = pd->port_pid; - pd->port_pid = NULL; - spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); - - if (pd->port_rcvwait_to || pd->port_piowait_to - || pd->port_rcvnowait || pd->port_pionowait) { - ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; " - "%u rcv %u, pio already\n", - pd->port_port, pd->port_rcvwait_to, - pd->port_piowait_to, pd->port_rcvnowait, - pd->port_pionowait); - pd->port_rcvwait_to = pd->port_piowait_to = - pd->port_rcvnowait = pd->port_pionowait = 0; - } - if (pd->port_flag) { - ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n", - pd->port_port, pd->port_flag); - pd->port_flag = 0; - } - - if (dd->ipath_kregbase) { - /* atomically clear receive enable port and intr avail. */ - clear_bit(dd->ipath_r_portenable_shift + port, - &dd->ipath_rcvctrl); - clear_bit(pd->port_port + dd->ipath_r_intravail_shift, - &dd->ipath_rcvctrl); - ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - /* and read back from chip to be sure that nothing - * else is in flight when we do the rest */ - (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - - /* clean up the pkeys for this port user */ - ipath_clean_part_key(pd, dd); - /* - * be paranoid, and never write 0's to these, just use an - * unused part of the port 0 tail page. Of course, - * rcvhdraddr points to a large chunk of memory, so this - * could still trash things, but at least it won't trash - * page 0, and by disabling the port, it should stop "soon", - * even if a packet or two is in already in flight after we - * disabled the port. - */ - ipath_write_kreg_port(dd, - dd->ipath_kregs->kr_rcvhdrtailaddr, port, - dd->ipath_dummy_hdrq_phys); - ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, - pd->port_port, dd->ipath_dummy_hdrq_phys); - - ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt); - ipath_chg_pioavailkernel(dd, pd->port_pio_base, - pd->port_piocnt, 1); - - dd->ipath_f_clear_tids(dd, pd->port_port); - - if (dd->ipath_pageshadow) - unlock_expected_tids(pd); - ipath_stats.sps_ports--; - ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", - pd->port_comm, pid_nr(pid), - dd->ipath_unit, port); - } - - put_pid(pid); - mutex_unlock(&ipath_mutex); - ipath_free_pddata(dd, pd); /* after releasing the mutex */ - -bail: - kfree(fd); - return ret; -} - -static int ipath_port_info(struct ipath_portdata *pd, u16 subport, - struct ipath_port_info __user *uinfo) -{ - struct ipath_port_info info; - int nup; - int ret; - size_t sz; - - (void) ipath_count_units(NULL, &nup, NULL); - info.num_active = nup; - info.unit = pd->port_dd->ipath_unit; - info.port = pd->port_port; - info.subport = subport; - /* Don't return new fields if old library opened the port. */ - if (ipath_supports_subports(pd->userversion >> 16, - pd->userversion & 0xffff)) { - /* Number of user ports available for this device. */ - info.num_ports = pd->port_dd->ipath_cfgports - 1; - info.num_subports = pd->port_subport_cnt; - sz = sizeof(info); - } else - sz = sizeof(info) - 2 * sizeof(u16); - - if (copy_to_user(uinfo, &info, sz)) { - ret = -EFAULT; - goto bail; - } - ret = 0; - -bail: - return ret; -} - -static int ipath_get_slave_info(struct ipath_portdata *pd, - void __user *slave_mask_addr) -{ - int ret = 0; - - if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32))) - ret = -EFAULT; - return ret; -} - -static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq, - u32 __user *inflightp) -{ - const u32 val = ipath_user_sdma_inflight_counter(pq); - - if (put_user(val, inflightp)) - return -EFAULT; - - return 0; -} - -static int ipath_sdma_get_complete(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - u32 __user *completep) -{ - u32 val; - int err; - - err = ipath_user_sdma_make_progress(dd, pq); - if (err < 0) - return err; - - val = ipath_user_sdma_complete_counter(pq); - if (put_user(val, completep)) - return -EFAULT; - - return 0; -} - -static ssize_t ipath_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - const struct ipath_cmd __user *ucmd; - struct ipath_portdata *pd; - const void __user *src; - size_t consumed, copy; - struct ipath_cmd cmd; - ssize_t ret = 0; - void *dest; - - if (count < sizeof(cmd.type)) { - ret = -EINVAL; - goto bail; - } - - ucmd = (const struct ipath_cmd __user *) data; - - if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) { - ret = -EFAULT; - goto bail; - } - - consumed = sizeof(cmd.type); - - switch (cmd.type) { - case IPATH_CMD_ASSIGN_PORT: - case __IPATH_CMD_USER_INIT: - case IPATH_CMD_USER_INIT: - copy = sizeof(cmd.cmd.user_info); - dest = &cmd.cmd.user_info; - src = &ucmd->cmd.user_info; - break; - case IPATH_CMD_RECV_CTRL: - copy = sizeof(cmd.cmd.recv_ctrl); - dest = &cmd.cmd.recv_ctrl; - src = &ucmd->cmd.recv_ctrl; - break; - case IPATH_CMD_PORT_INFO: - copy = sizeof(cmd.cmd.port_info); - dest = &cmd.cmd.port_info; - src = &ucmd->cmd.port_info; - break; - case IPATH_CMD_TID_UPDATE: - case IPATH_CMD_TID_FREE: - copy = sizeof(cmd.cmd.tid_info); - dest = &cmd.cmd.tid_info; - src = &ucmd->cmd.tid_info; - break; - case IPATH_CMD_SET_PART_KEY: - copy = sizeof(cmd.cmd.part_key); - dest = &cmd.cmd.part_key; - src = &ucmd->cmd.part_key; - break; - case __IPATH_CMD_SLAVE_INFO: - copy = sizeof(cmd.cmd.slave_mask_addr); - dest = &cmd.cmd.slave_mask_addr; - src = &ucmd->cmd.slave_mask_addr; - break; - case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg - copy = 0; - src = NULL; - dest = NULL; - break; - case IPATH_CMD_POLL_TYPE: - copy = sizeof(cmd.cmd.poll_type); - dest = &cmd.cmd.poll_type; - src = &ucmd->cmd.poll_type; - break; - case IPATH_CMD_ARMLAUNCH_CTRL: - copy = sizeof(cmd.cmd.armlaunch_ctrl); - dest = &cmd.cmd.armlaunch_ctrl; - src = &ucmd->cmd.armlaunch_ctrl; - break; - case IPATH_CMD_SDMA_INFLIGHT: - copy = sizeof(cmd.cmd.sdma_inflight); - dest = &cmd.cmd.sdma_inflight; - src = &ucmd->cmd.sdma_inflight; - break; - case IPATH_CMD_SDMA_COMPLETE: - copy = sizeof(cmd.cmd.sdma_complete); - dest = &cmd.cmd.sdma_complete; - src = &ucmd->cmd.sdma_complete; - break; - default: - ret = -EINVAL; - goto bail; - } - - if (copy) { - if ((count - consumed) < copy) { - ret = -EINVAL; - goto bail; - } - - if (copy_from_user(dest, src, copy)) { - ret = -EFAULT; - goto bail; - } - - consumed += copy; - } - - pd = port_fp(fp); - if (!pd && cmd.type != __IPATH_CMD_USER_INIT && - cmd.type != IPATH_CMD_ASSIGN_PORT) { - ret = -EINVAL; - goto bail; - } - - switch (cmd.type) { - case IPATH_CMD_ASSIGN_PORT: - ret = ipath_assign_port(fp, &cmd.cmd.user_info); - if (ret) - goto bail; - break; - case __IPATH_CMD_USER_INIT: - /* backwards compatibility, get port first */ - ret = ipath_assign_port(fp, &cmd.cmd.user_info); - if (ret) - goto bail; - /* and fall through to current version. */ - case IPATH_CMD_USER_INIT: - ret = ipath_do_user_init(fp, &cmd.cmd.user_info); - if (ret) - goto bail; - ret = ipath_get_base_info( - fp, (void __user *) (unsigned long) - cmd.cmd.user_info.spu_base_info, - cmd.cmd.user_info.spu_base_info_size); - break; - case IPATH_CMD_RECV_CTRL: - ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl); - break; - case IPATH_CMD_PORT_INFO: - ret = ipath_port_info(pd, subport_fp(fp), - (struct ipath_port_info __user *) - (unsigned long) cmd.cmd.port_info); - break; - case IPATH_CMD_TID_UPDATE: - ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info); - break; - case IPATH_CMD_TID_FREE: - ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info); - break; - case IPATH_CMD_SET_PART_KEY: - ret = ipath_set_part_key(pd, cmd.cmd.part_key); - break; - case __IPATH_CMD_SLAVE_INFO: - ret = ipath_get_slave_info(pd, - (void __user *) (unsigned long) - cmd.cmd.slave_mask_addr); - break; - case IPATH_CMD_PIOAVAILUPD: - ipath_force_pio_avail_update(pd->port_dd); - break; - case IPATH_CMD_POLL_TYPE: - pd->poll_type = cmd.cmd.poll_type; - break; - case IPATH_CMD_ARMLAUNCH_CTRL: - if (cmd.cmd.armlaunch_ctrl) - ipath_enable_armlaunch(pd->port_dd); - else - ipath_disable_armlaunch(pd->port_dd); - break; - case IPATH_CMD_SDMA_INFLIGHT: - ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp), - (u32 __user *) (unsigned long) - cmd.cmd.sdma_inflight); - break; - case IPATH_CMD_SDMA_COMPLETE: - ret = ipath_sdma_get_complete(pd->port_dd, - user_sdma_queue_fp(fp), - (u32 __user *) (unsigned long) - cmd.cmd.sdma_complete); - break; - } - - if (ret >= 0) - ret = consumed; - -bail: - return ret; -} - -static ssize_t ipath_write_iter(struct kiocb *iocb, struct iov_iter *from) -{ - struct file *filp = iocb->ki_filp; - struct ipath_filedata *fp = filp->private_data; - struct ipath_portdata *pd = port_fp(filp); - struct ipath_user_sdma_queue *pq = fp->pq; - - if (!iter_is_iovec(from) || !from->nr_segs) - return -EINVAL; - - return ipath_user_sdma_writev(pd->port_dd, pq, from->iov, from->nr_segs); -} - -static struct class *ipath_class; - -static int init_cdev(int minor, char *name, const struct file_operations *fops, - struct cdev **cdevp, struct device **devp) -{ - const dev_t dev = MKDEV(IPATH_MAJOR, minor); - struct cdev *cdev = NULL; - struct device *device = NULL; - int ret; - - cdev = cdev_alloc(); - if (!cdev) { - printk(KERN_ERR IPATH_DRV_NAME - ": Could not allocate cdev for minor %d, %s\n", - minor, name); - ret = -ENOMEM; - goto done; - } - - cdev->owner = THIS_MODULE; - cdev->ops = fops; - kobject_set_name(&cdev->kobj, name); - - ret = cdev_add(cdev, dev, 1); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME - ": Could not add cdev for minor %d, %s (err %d)\n", - minor, name, -ret); - goto err_cdev; - } - - device = device_create(ipath_class, NULL, dev, NULL, name); - - if (IS_ERR(device)) { - ret = PTR_ERR(device); - printk(KERN_ERR IPATH_DRV_NAME ": Could not create " - "device for minor %d, %s (err %d)\n", - minor, name, -ret); - goto err_cdev; - } - - goto done; - -err_cdev: - cdev_del(cdev); - cdev = NULL; - -done: - if (ret >= 0) { - *cdevp = cdev; - *devp = device; - } else { - *cdevp = NULL; - *devp = NULL; - } - - return ret; -} - -int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, - struct cdev **cdevp, struct device **devp) -{ - return init_cdev(minor, name, fops, cdevp, devp); -} - -static void cleanup_cdev(struct cdev **cdevp, - struct device **devp) -{ - struct device *dev = *devp; - - if (dev) { - device_unregister(dev); - *devp = NULL; - } - - if (*cdevp) { - cdev_del(*cdevp); - *cdevp = NULL; - } -} - -void ipath_cdev_cleanup(struct cdev **cdevp, - struct device **devp) -{ - cleanup_cdev(cdevp, devp); -} - -static struct cdev *wildcard_cdev; -static struct device *wildcard_dev; - -static const dev_t dev = MKDEV(IPATH_MAJOR, 0); - -static int user_init(void) -{ - int ret; - - ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME ": Could not register " - "chrdev region (err %d)\n", -ret); - goto done; - } - - ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME); - - if (IS_ERR(ipath_class)) { - ret = PTR_ERR(ipath_class); - printk(KERN_ERR IPATH_DRV_NAME ": Could not create " - "device class (err %d)\n", -ret); - goto bail; - } - - goto done; -bail: - unregister_chrdev_region(dev, IPATH_NMINORS); -done: - return ret; -} - -static void user_cleanup(void) -{ - if (ipath_class) { - class_destroy(ipath_class); - ipath_class = NULL; - } - - unregister_chrdev_region(dev, IPATH_NMINORS); -} - -static atomic_t user_count = ATOMIC_INIT(0); -static atomic_t user_setup = ATOMIC_INIT(0); - -int ipath_user_add(struct ipath_devdata *dd) -{ - char name[10]; - int ret; - - if (atomic_inc_return(&user_count) == 1) { - ret = user_init(); - if (ret < 0) { - ipath_dev_err(dd, "Unable to set up user support: " - "error %d\n", -ret); - goto bail; - } - ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev, - &wildcard_dev); - if (ret < 0) { - ipath_dev_err(dd, "Could not create wildcard " - "minor: error %d\n", -ret); - goto bail_user; - } - - atomic_set(&user_setup, 1); - } - - snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit); - - ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops, - &dd->user_cdev, &dd->user_dev); - if (ret < 0) - ipath_dev_err(dd, "Could not create user minor %d, %s\n", - dd->ipath_unit + 1, name); - - goto bail; - -bail_user: - user_cleanup(); -bail: - return ret; -} - -void ipath_user_remove(struct ipath_devdata *dd) -{ - cleanup_cdev(&dd->user_cdev, &dd->user_dev); - - if (atomic_dec_return(&user_count) == 0) { - if (atomic_read(&user_setup) == 0) - goto bail; - - cleanup_cdev(&wildcard_cdev, &wildcard_dev); - user_cleanup(); - - atomic_set(&user_setup, 0); - } -bail: - return; -} diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c deleted file mode 100644 index 25422a3a7..000000000 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/mount.h> -#include <linux/pagemap.h> -#include <linux/init.h> -#include <linux/namei.h> -#include <linux/slab.h> - -#include "ipath_kernel.h" - -#define IPATHFS_MAGIC 0x726a77 - -static struct super_block *ipath_super; - -static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, - umode_t mode, const struct file_operations *fops, - void *data) -{ - int error; - struct inode *inode = new_inode(dir->i_sb); - - if (!inode) { - error = -EPERM; - goto bail; - } - - inode->i_ino = get_next_ino(); - inode->i_mode = mode; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_private = data; - if (S_ISDIR(mode)) { - inode->i_op = &simple_dir_inode_operations; - inc_nlink(inode); - inc_nlink(dir); - } - - inode->i_fop = fops; - - d_instantiate(dentry, inode); - error = 0; - -bail: - return error; -} - -static int create_file(const char *name, umode_t mode, - struct dentry *parent, struct dentry **dentry, - const struct file_operations *fops, void *data) -{ - int error; - - mutex_lock(&d_inode(parent)->i_mutex); - *dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(*dentry)) - error = ipathfs_mknod(d_inode(parent), *dentry, - mode, fops, data); - else - error = PTR_ERR(*dentry); - mutex_unlock(&d_inode(parent)->i_mutex); - - return error; -} - -static ssize_t atomic_stats_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - return simple_read_from_buffer(buf, count, ppos, &ipath_stats, - sizeof ipath_stats); -} - -static const struct file_operations atomic_stats_ops = { - .read = atomic_stats_read, - .llseek = default_llseek, -}; - -static ssize_t atomic_counters_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct infinipath_counters counters; - struct ipath_devdata *dd; - - dd = file_inode(file)->i_private; - dd->ipath_f_read_counters(dd, &counters); - - return simple_read_from_buffer(buf, count, ppos, &counters, - sizeof counters); -} - -static const struct file_operations atomic_counters_ops = { - .read = atomic_counters_read, - .llseek = default_llseek, -}; - -static ssize_t flash_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct ipath_devdata *dd; - ssize_t ret; - loff_t pos; - char *tmp; - - pos = *ppos; - - if ( pos < 0) { - ret = -EINVAL; - goto bail; - } - - if (pos >= sizeof(struct ipath_flash)) { - ret = 0; - goto bail; - } - - if (count > sizeof(struct ipath_flash) - pos) - count = sizeof(struct ipath_flash) - pos; - - tmp = kmalloc(count, GFP_KERNEL); - if (!tmp) { - ret = -ENOMEM; - goto bail; - } - - dd = file_inode(file)->i_private; - if (ipath_eeprom_read(dd, pos, tmp, count)) { - ipath_dev_err(dd, "failed to read from flash\n"); - ret = -ENXIO; - goto bail_tmp; - } - - if (copy_to_user(buf, tmp, count)) { - ret = -EFAULT; - goto bail_tmp; - } - - *ppos = pos + count; - ret = count; - -bail_tmp: - kfree(tmp); - -bail: - return ret; -} - -static ssize_t flash_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct ipath_devdata *dd; - ssize_t ret; - loff_t pos; - char *tmp; - - pos = *ppos; - - if (pos != 0) { - ret = -EINVAL; - goto bail; - } - - if (count != sizeof(struct ipath_flash)) { - ret = -EINVAL; - goto bail; - } - - tmp = kmalloc(count, GFP_KERNEL); - if (!tmp) { - ret = -ENOMEM; - goto bail; - } - - if (copy_from_user(tmp, buf, count)) { - ret = -EFAULT; - goto bail_tmp; - } - - dd = file_inode(file)->i_private; - if (ipath_eeprom_write(dd, pos, tmp, count)) { - ret = -ENXIO; - ipath_dev_err(dd, "failed to write to flash\n"); - goto bail_tmp; - } - - *ppos = pos + count; - ret = count; - -bail_tmp: - kfree(tmp); - -bail: - return ret; -} - -static const struct file_operations flash_ops = { - .read = flash_read, - .write = flash_write, - .llseek = default_llseek, -}; - -static int create_device_files(struct super_block *sb, - struct ipath_devdata *dd) -{ - struct dentry *dir, *tmp; - char unit[10]; - int ret; - - snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); - ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, - &simple_dir_operations, dd); - if (ret) { - printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); - goto bail; - } - - ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp, - &atomic_counters_ops, dd); - if (ret) { - printk(KERN_ERR "create_file(%s/atomic_counters) " - "failed: %d\n", unit, ret); - goto bail; - } - - ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, - &flash_ops, dd); - if (ret) { - printk(KERN_ERR "create_file(%s/flash) " - "failed: %d\n", unit, ret); - goto bail; - } - -bail: - return ret; -} - -static int remove_file(struct dentry *parent, char *name) -{ - struct dentry *tmp; - int ret; - - tmp = lookup_one_len(name, parent, strlen(name)); - - if (IS_ERR(tmp)) { - ret = PTR_ERR(tmp); - goto bail; - } - - spin_lock(&tmp->d_lock); - if (simple_positive(tmp)) { - dget_dlock(tmp); - __d_drop(tmp); - spin_unlock(&tmp->d_lock); - simple_unlink(d_inode(parent), tmp); - } else - spin_unlock(&tmp->d_lock); - - ret = 0; -bail: - /* - * We don't expect clients to care about the return value, but - * it's there if they need it. - */ - return ret; -} - -static int remove_device_files(struct super_block *sb, - struct ipath_devdata *dd) -{ - struct dentry *dir, *root; - char unit[10]; - int ret; - - root = dget(sb->s_root); - mutex_lock(&d_inode(root)->i_mutex); - snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); - dir = lookup_one_len(unit, root, strlen(unit)); - - if (IS_ERR(dir)) { - ret = PTR_ERR(dir); - printk(KERN_ERR "Lookup of %s failed\n", unit); - goto bail; - } - - remove_file(dir, "flash"); - remove_file(dir, "atomic_counters"); - d_delete(dir); - ret = simple_rmdir(d_inode(root), dir); - -bail: - mutex_unlock(&d_inode(root)->i_mutex); - dput(root); - return ret; -} - -static int ipathfs_fill_super(struct super_block *sb, void *data, - int silent) -{ - struct ipath_devdata *dd, *tmp; - unsigned long flags; - int ret; - - static struct tree_descr files[] = { - [2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO}, - {""}, - }; - - ret = simple_fill_super(sb, IPATHFS_MAGIC, files); - if (ret) { - printk(KERN_ERR "simple_fill_super failed: %d\n", ret); - goto bail; - } - - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - spin_unlock_irqrestore(&ipath_devs_lock, flags); - ret = create_device_files(sb, dd); - if (ret) - goto bail; - spin_lock_irqsave(&ipath_devs_lock, flags); - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - -bail: - return ret; -} - -static struct dentry *ipathfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - struct dentry *ret; - ret = mount_single(fs_type, flags, data, ipathfs_fill_super); - if (!IS_ERR(ret)) - ipath_super = ret->d_sb; - return ret; -} - -static void ipathfs_kill_super(struct super_block *s) -{ - kill_litter_super(s); - ipath_super = NULL; -} - -int ipathfs_add_device(struct ipath_devdata *dd) -{ - int ret; - - if (ipath_super == NULL) { - ret = 0; - goto bail; - } - - ret = create_device_files(ipath_super, dd); - -bail: - return ret; -} - -int ipathfs_remove_device(struct ipath_devdata *dd) -{ - int ret; - - if (ipath_super == NULL) { - ret = 0; - goto bail; - } - - ret = remove_device_files(ipath_super, dd); - -bail: - return ret; -} - -static struct file_system_type ipathfs_fs_type = { - .owner = THIS_MODULE, - .name = "ipathfs", - .mount = ipathfs_mount, - .kill_sb = ipathfs_kill_super, -}; -MODULE_ALIAS_FS("ipathfs"); - -int __init ipath_init_ipathfs(void) -{ - return register_filesystem(&ipathfs_fs_type); -} - -void __exit ipath_exit_ipathfs(void) -{ - unregister_filesystem(&ipathfs_fs_type); -} diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c deleted file mode 100644 index 7cc305488..000000000 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ /dev/null @@ -1,1940 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * This file contains all of the code that is specific to the InfiniPath - * HT chip. - */ - -#include <linux/vmalloc.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/htirq.h> -#include <rdma/ib_verbs.h> - -#include "ipath_kernel.h" -#include "ipath_registers.h" - -static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64); - - -/* - * This lists the InfiniPath registers, in the actual chip layout. - * This structure should never be directly accessed. - * - * The names are in InterCap form because they're taken straight from - * the chip specification. Since they're only used in this file, they - * don't pollute the rest of the source. -*/ - -struct _infinipath_do_not_use_kernel_regs { - unsigned long long Revision; - unsigned long long Control; - unsigned long long PageAlign; - unsigned long long PortCnt; - unsigned long long DebugPortSelect; - unsigned long long DebugPort; - unsigned long long SendRegBase; - unsigned long long UserRegBase; - unsigned long long CounterRegBase; - unsigned long long Scratch; - unsigned long long ReservedMisc1; - unsigned long long InterruptConfig; - unsigned long long IntBlocked; - unsigned long long IntMask; - unsigned long long IntStatus; - unsigned long long IntClear; - unsigned long long ErrorMask; - unsigned long long ErrorStatus; - unsigned long long ErrorClear; - unsigned long long HwErrMask; - unsigned long long HwErrStatus; - unsigned long long HwErrClear; - unsigned long long HwDiagCtrl; - unsigned long long MDIO; - unsigned long long IBCStatus; - unsigned long long IBCCtrl; - unsigned long long ExtStatus; - unsigned long long ExtCtrl; - unsigned long long GPIOOut; - unsigned long long GPIOMask; - unsigned long long GPIOStatus; - unsigned long long GPIOClear; - unsigned long long RcvCtrl; - unsigned long long RcvBTHQP; - unsigned long long RcvHdrSize; - unsigned long long RcvHdrCnt; - unsigned long long RcvHdrEntSize; - unsigned long long RcvTIDBase; - unsigned long long RcvTIDCnt; - unsigned long long RcvEgrBase; - unsigned long long RcvEgrCnt; - unsigned long long RcvBufBase; - unsigned long long RcvBufSize; - unsigned long long RxIntMemBase; - unsigned long long RxIntMemSize; - unsigned long long RcvPartitionKey; - unsigned long long ReservedRcv[10]; - unsigned long long SendCtrl; - unsigned long long SendPIOBufBase; - unsigned long long SendPIOSize; - unsigned long long SendPIOBufCnt; - unsigned long long SendPIOAvailAddr; - unsigned long long TxIntMemBase; - unsigned long long TxIntMemSize; - unsigned long long ReservedSend[9]; - unsigned long long SendBufferError; - unsigned long long SendBufferErrorCONT1; - unsigned long long SendBufferErrorCONT2; - unsigned long long SendBufferErrorCONT3; - unsigned long long ReservedSBE[4]; - unsigned long long RcvHdrAddr0; - unsigned long long RcvHdrAddr1; - unsigned long long RcvHdrAddr2; - unsigned long long RcvHdrAddr3; - unsigned long long RcvHdrAddr4; - unsigned long long RcvHdrAddr5; - unsigned long long RcvHdrAddr6; - unsigned long long RcvHdrAddr7; - unsigned long long RcvHdrAddr8; - unsigned long long ReservedRHA[7]; - unsigned long long RcvHdrTailAddr0; - unsigned long long RcvHdrTailAddr1; - unsigned long long RcvHdrTailAddr2; - unsigned long long RcvHdrTailAddr3; - unsigned long long RcvHdrTailAddr4; - unsigned long long RcvHdrTailAddr5; - unsigned long long RcvHdrTailAddr6; - unsigned long long RcvHdrTailAddr7; - unsigned long long RcvHdrTailAddr8; - unsigned long long ReservedRHTA[7]; - unsigned long long Sync; /* Software only */ - unsigned long long Dump; /* Software only */ - unsigned long long SimVer; /* Software only */ - unsigned long long ReservedSW[5]; - unsigned long long SerdesConfig0; - unsigned long long SerdesConfig1; - unsigned long long SerdesStatus; - unsigned long long XGXSConfig; - unsigned long long ReservedSW2[4]; -}; - -struct _infinipath_do_not_use_counters { - __u64 LBIntCnt; - __u64 LBFlowStallCnt; - __u64 Reserved1; - __u64 TxUnsupVLErrCnt; - __u64 TxDataPktCnt; - __u64 TxFlowPktCnt; - __u64 TxDwordCnt; - __u64 TxLenErrCnt; - __u64 TxMaxMinLenErrCnt; - __u64 TxUnderrunCnt; - __u64 TxFlowStallCnt; - __u64 TxDroppedPktCnt; - __u64 RxDroppedPktCnt; - __u64 RxDataPktCnt; - __u64 RxFlowPktCnt; - __u64 RxDwordCnt; - __u64 RxLenErrCnt; - __u64 RxMaxMinLenErrCnt; - __u64 RxICRCErrCnt; - __u64 RxVCRCErrCnt; - __u64 RxFlowCtrlErrCnt; - __u64 RxBadFormatCnt; - __u64 RxLinkProblemCnt; - __u64 RxEBPCnt; - __u64 RxLPCRCErrCnt; - __u64 RxBufOvflCnt; - __u64 RxTIDFullErrCnt; - __u64 RxTIDValidErrCnt; - __u64 RxPKeyMismatchCnt; - __u64 RxP0HdrEgrOvflCnt; - __u64 RxP1HdrEgrOvflCnt; - __u64 RxP2HdrEgrOvflCnt; - __u64 RxP3HdrEgrOvflCnt; - __u64 RxP4HdrEgrOvflCnt; - __u64 RxP5HdrEgrOvflCnt; - __u64 RxP6HdrEgrOvflCnt; - __u64 RxP7HdrEgrOvflCnt; - __u64 RxP8HdrEgrOvflCnt; - __u64 Reserved6; - __u64 Reserved7; - __u64 IBStatusChangeCnt; - __u64 IBLinkErrRecoveryCnt; - __u64 IBLinkDownedCnt; - __u64 IBSymbolErrCnt; -}; - -#define IPATH_KREG_OFFSET(field) (offsetof( \ - struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) -#define IPATH_CREG_OFFSET(field) (offsetof( \ - struct _infinipath_do_not_use_counters, field) / sizeof(u64)) - -static const struct ipath_kregs ipath_ht_kregs = { - .kr_control = IPATH_KREG_OFFSET(Control), - .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase), - .kr_debugport = IPATH_KREG_OFFSET(DebugPort), - .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect), - .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear), - .kr_errormask = IPATH_KREG_OFFSET(ErrorMask), - .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus), - .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl), - .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus), - .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear), - .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask), - .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut), - .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus), - .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl), - .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear), - .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask), - .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus), - .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl), - .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus), - .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked), - .kr_intclear = IPATH_KREG_OFFSET(IntClear), - .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig), - .kr_intmask = IPATH_KREG_OFFSET(IntMask), - .kr_intstatus = IPATH_KREG_OFFSET(IntStatus), - .kr_mdio = IPATH_KREG_OFFSET(MDIO), - .kr_pagealign = IPATH_KREG_OFFSET(PageAlign), - .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey), - .kr_portcnt = IPATH_KREG_OFFSET(PortCnt), - .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP), - .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase), - .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize), - .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl), - .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase), - .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt), - .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt), - .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize), - .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize), - .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase), - .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize), - .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase), - .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt), - .kr_revision = IPATH_KREG_OFFSET(Revision), - .kr_scratch = IPATH_KREG_OFFSET(Scratch), - .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError), - .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl), - .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr), - .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase), - .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt), - .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize), - .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase), - .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase), - .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize), - .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase), - .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0), - .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1), - .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus), - .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), - /* - * These should not be used directly via ipath_write_kreg64(), - * use them with ipath_write_kreg64_port(), - */ - .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), - .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0) -}; - -static const struct ipath_cregs ipath_ht_cregs = { - .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt), - .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt), - .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt), - .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt), - .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt), - .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt), - .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt), - .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt), - .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt), - .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt), - .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt), - .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt), - /* calc from Reg_CounterRegBase + offset */ - .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt), - .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt), - .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt), - .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt), - .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt), - .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt), - .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt), - .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt), - .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt), - .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt), - .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt), - .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt), - .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt), - .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt), - .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt), - .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt), - .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt), - .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt), - .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt), - .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt), - .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt) -}; - -/* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1) -#define INFINIPATH_I_RCVURG_SHIFT 0 -#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1) -#define INFINIPATH_I_RCVAVAIL_SHIFT 12 - -/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ -#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0 -#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL -#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR 0x0000000000800000ULL -#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR 0x0000000001000000ULL -#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR 0x0000000002000000ULL -#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR 0x0000000004000000ULL -#define INFINIPATH_HWE_HTCMISCERR4 0x0000000008000000ULL -#define INFINIPATH_HWE_HTCMISCERR5 0x0000000010000000ULL -#define INFINIPATH_HWE_HTCMISCERR6 0x0000000020000000ULL -#define INFINIPATH_HWE_HTCMISCERR7 0x0000000040000000ULL -#define INFINIPATH_HWE_HTCBUSTREQPARITYERR 0x0000000080000000ULL -#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL -#define INFINIPATH_HWE_HTCBUSIREQPARITYERR 0x0000000200000000ULL -#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL -#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL -#define INFINIPATH_HWE_HTBPLL_FBSLIP 0x0200000000000000ULL -#define INFINIPATH_HWE_HTBPLL_RFSLIP 0x0400000000000000ULL -#define INFINIPATH_HWE_HTAPLL_FBSLIP 0x0800000000000000ULL -#define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL -#define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL - -#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf -#define IBA6110_IBCS_LINKSTATE_SHIFT 4 - -/* kr_extstatus bits */ -#define INFINIPATH_EXTS_FREQSEL 0x2 -#define INFINIPATH_EXTS_SERDESSEL 0x4 -#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 -#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 - - -/* TID entries (memory), HT-only */ -#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ -#define INFINIPATH_RT_VALID 0x8000000000000000ULL -#define INFINIPATH_RT_ADDR_SHIFT 0 -#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL -#define INFINIPATH_RT_BUFSIZE_SHIFT 48 - -#define INFINIPATH_R_INTRAVAIL_SHIFT 16 -#define INFINIPATH_R_TAILUPD_SHIFT 31 - -/* kr_xgxsconfig bits */ -#define INFINIPATH_XGXS_RESET 0x7ULL - -/* - * masks and bits that are different in different chips, or present only - * in one - */ -static const ipath_err_t infinipath_hwe_htcmemparityerr_mask = - INFINIPATH_HWE_HTCMEMPARITYERR_MASK; -static const ipath_err_t infinipath_hwe_htcmemparityerr_shift = - INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT; - -static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr = - INFINIPATH_HWE_HTCLNKABYTE0CRCERR; -static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr = - INFINIPATH_HWE_HTCLNKABYTE1CRCERR; -static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr = - INFINIPATH_HWE_HTCLNKBBYTE0CRCERR; -static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr = - INFINIPATH_HWE_HTCLNKBBYTE1CRCERR; - -#define _IPATH_GPIO_SDA_NUM 1 -#define _IPATH_GPIO_SCL_NUM 0 - -#define IPATH_GPIO_SDA \ - (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) -#define IPATH_GPIO_SCL \ - (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) - -/* keep the code below somewhat more readable; not used elsewhere */ -#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \ - infinipath_hwe_htclnkabyte1crcerr) -#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \ - infinipath_hwe_htclnkbbyte1crcerr) -#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \ - infinipath_hwe_htclnkbbyte0crcerr) -#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr | \ - infinipath_hwe_htclnkbbyte1crcerr) - -static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs, - char *msg, size_t msgl) -{ - char bitsmsg[64]; - ipath_err_t crcbits = hwerrs & - (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS); - /* don't check if 8bit HT */ - if (dd->ipath_flags & IPATH_8BIT_IN_HT0) - crcbits &= ~infinipath_hwe_htclnkabyte1crcerr; - /* don't check if 8bit HT */ - if (dd->ipath_flags & IPATH_8BIT_IN_HT1) - crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr; - /* - * we'll want to ignore link errors on link that is - * not in use, if any. For now, complain about both - */ - if (crcbits) { - u16 ctrl0, ctrl1; - snprintf(bitsmsg, sizeof bitsmsg, - "[HT%s lane %s CRC (%llx); powercycle to completely clear]", - !(crcbits & _IPATH_HTLINK1_CRCBITS) ? - "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS) - ? "1 (B)" : "0+1 (A+B)"), - !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0" - : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" : - "0+1"), (unsigned long long) crcbits); - strlcat(msg, bitsmsg, msgl); - - /* - * print extra info for debugging. slave/primary - * config word 4, 8 (link control 0, 1) - */ - - if (pci_read_config_word(dd->pcidev, - dd->ipath_ht_slave_off + 0x4, - &ctrl0)) - dev_info(&dd->pcidev->dev, "Couldn't read " - "linkctrl0 of slave/primary " - "config block\n"); - else if (!(ctrl0 & 1 << 6)) - /* not if EOC bit set */ - ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0, - ((ctrl0 >> 8) & 7) ? " CRC" : "", - ((ctrl0 >> 4) & 1) ? "linkfail" : - ""); - if (pci_read_config_word(dd->pcidev, - dd->ipath_ht_slave_off + 0x8, - &ctrl1)) - dev_info(&dd->pcidev->dev, "Couldn't read " - "linkctrl1 of slave/primary " - "config block\n"); - else if (!(ctrl1 & 1 << 6)) - /* not if EOC bit set */ - ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1, - ((ctrl1 >> 8) & 7) ? " CRC" : "", - ((ctrl1 >> 4) & 1) ? "linkfail" : - ""); - - /* disable until driver reloaded */ - dd->ipath_hwerrmask &= ~crcbits; - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - ipath_dbg("HT crc errs: %s\n", msg); - } else - ipath_dbg("ignoring HT crc errors 0x%llx, " - "not in use\n", (unsigned long long) - (hwerrs & (_IPATH_HTLINK0_CRCBITS | - _IPATH_HTLINK1_CRCBITS))); -} - -/* 6110 specific hardware errors... */ -static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = { - INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"), - INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"), - INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"), - INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"), - INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"), - INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"), - INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"), - INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), -}; - -#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) -#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \ - << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) - -static void ipath_ht_txe_recover(struct ipath_devdata *dd) -{ - ++ipath_stats.sps_txeparity; - dev_info(&dd->pcidev->dev, - "Recovering from TXE PIO parity error\n"); -} - - -/** - * ipath_ht_handle_hwerrors - display hardware errors. - * @dd: the infinipath device - * @msg: the output buffer - * @msgl: the size of the output buffer - * - * Use same msg buffer as regular errors to avoid excessive stack - * use. Most hardware errors are catastrophic, but for right now, - * we'll print them and continue. We reuse the same message buffer as - * ipath_handle_errors() to avoid excessive stack usage. - */ -static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, - size_t msgl) -{ - ipath_err_t hwerrs; - u32 bits, ctrl; - int isfatal = 0; - char bitsmsg[64]; - int log_idx; - - hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); - - if (!hwerrs) { - ipath_cdbg(VERBOSE, "Called but no hardware errors set\n"); - /* - * better than printing cofusing messages - * This seems to be related to clearing the crc error, or - * the pll error during init. - */ - goto bail; - } else if (hwerrs == -1LL) { - ipath_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); - goto bail; - } - ipath_stats.sps_hwerrs++; - - /* Always clear the error status register, except MEMBISTFAIL, - * regardless of whether we continue or stop using the chip. - * We want that set so we know it failed, even across driver reload. - * We'll still ignore it in the hwerrmask. We do this partly for - * diagnostics, but also for support */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - hwerrs&~INFINIPATH_HWE_MEMBISTFAILED); - - hwerrs &= dd->ipath_hwerrmask; - - /* We log some errors to EEPROM, check if we have any of those. */ - for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) - if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log) - ipath_inc_eeprom_err(dd, log_idx, 1); - - /* - * make sure we get this much out, unless told to be quiet, - * it's a parity error we may recover from, - * or it's occurred within the last 5 seconds - */ - if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY | - RXE_EAGER_PARITY)) || - (ipath_debug & __IPATH_VERBDBG)) - dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); - dd->ipath_lasthwerror |= hwerrs; - - if (hwerrs & ~dd->ipath_hwe_bitsextant) - ipath_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~dd->ipath_hwe_bitsextant)); - - ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); - if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { - /* - * parity errors in send memory are recoverable, - * just cancel the send (if indicated in * sendbuffererror), - * count the occurrence, unfreeze (if no other handled - * hardware error bits are set), and continue. They can - * occur if a processor speculative read is done to the PIO - * buffer while we are sending a packet, for example. - */ - if (hwerrs & TXE_PIO_PARITY) { - ipath_ht_txe_recover(dd); - hwerrs &= ~TXE_PIO_PARITY; - } - - if (!hwerrs) { - ipath_dbg("Clearing freezemode on ignored or " - "recovered hardware error\n"); - ipath_clear_freeze(dd); - } - } - - *msg = '\0'; - - /* - * may someday want to decode into which bits are which - * functional area for parity errors, etc. - */ - if (hwerrs & (infinipath_hwe_htcmemparityerr_mask - << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_HTCMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } - - ipath_format_hwerrors(hwerrs, - ipath_6110_hwerror_msgs, - ARRAY_SIZE(ipath_6110_hwerror_msgs), - msg, msgl); - - if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS)) - hwerr_crcbits(dd, hwerrs, msg, msgl); - - if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { - strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", - msgl); - /* ignore from now on, so disable until driver reloaded */ - dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED; - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } -#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \ - INFINIPATH_HWE_COREPLL_RFSLIP | \ - INFINIPATH_HWE_HTBPLL_FBSLIP | \ - INFINIPATH_HWE_HTBPLL_RFSLIP | \ - INFINIPATH_HWE_HTAPLL_FBSLIP | \ - INFINIPATH_HWE_HTAPLL_RFSLIP) - - if (hwerrs & _IPATH_PLL_FAIL) { - snprintf(bitsmsg, sizeof bitsmsg, - "[PLL failed (%llx), InfiniPath hardware unusable]", - (unsigned long long) (hwerrs & _IPATH_PLL_FAIL)); - strlcat(msg, bitsmsg, msgl); - /* ignore from now on, so disable until driver reloaded */ - dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } - - if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) { - /* - * If it occurs, it is left masked since the eternal - * interface is unused - */ - dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED; - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } - - if (hwerrs) { - /* - * if any set that we aren't ignoring; only - * make the complaint once, in case it's stuck - * or recurring, and we get here multiple - * times. - * force link down, so switch knows, and - * LEDs are turned off - */ - if (dd->ipath_flags & IPATH_INITTED) { - ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); - ipath_setup_ht_setextled(dd, - INFINIPATH_IBCS_L_STATE_DOWN, - INFINIPATH_IBCS_LT_STATE_DISABLED); - ipath_dev_err(dd, "Fatal Hardware Error (freeze " - "mode), no longer usable, SN %.16s\n", - dd->ipath_serial); - isfatal = 1; - } - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - /* mark as having had error */ - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - /* - * mark as not usable, at a minimum until driver - * is reloaded, probably until reboot, since no - * other reset is possible. - */ - dd->ipath_flags &= ~IPATH_INITTED; - } - else - *msg = 0; /* recovered from all of them */ - if (*msg) - ipath_dev_err(dd, "%s hardware error\n", msg); - if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) - /* - * for status file; if no trailing brace is copied, - * we'll know it was truncated. - */ - snprintf(dd->ipath_freezemsg, - dd->ipath_freezelen, "{%s}", msg); - -bail:; -} - -/** - * ipath_ht_boardname - fill in the board name - * @dd: the infinipath device - * @name: the output buffer - * @namelen: the size of the output buffer - * - * fill in the board name, based on the board revision register - */ -static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, - size_t namelen) -{ - char *n = NULL; - u8 boardrev = dd->ipath_boardrev; - int ret = 0; - - switch (boardrev) { - case 5: - /* - * original production board; two production levels, with - * different serial number ranges. See ipath_ht_early_init() for - * case where we enable IPATH_GPIO_INTR for later serial # range. - * Original 112* serial number is no longer supported. - */ - n = "InfiniPath_QHT7040"; - break; - case 7: - /* small form factor production board */ - n = "InfiniPath_QHT7140"; - break; - default: /* don't know, just print the number */ - ipath_dev_err(dd, "Don't yet know about board " - "with ID %u\n", boardrev); - snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u", - boardrev); - break; - } - if (n) - snprintf(name, namelen, "%s", n); - - if (ret) { - ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name); - goto bail; - } - if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || - dd->ipath_minrev > 4)) { - /* - * This version of the driver only supports Rev 3.2 - 3.4 - */ - ipath_dev_err(dd, - "Unsupported InfiniPath hardware revision %u.%u!\n", - dd->ipath_majrev, dd->ipath_minrev); - ret = 1; - goto bail; - } - /* - * pkt/word counters are 32 bit, and therefore wrap fast enough - * that we snapshot them from a timer, and maintain 64 bit shadow - * copies - */ - dd->ipath_flags |= IPATH_32BITCOUNTERS; - dd->ipath_flags |= IPATH_GPIO_INTR; - if (dd->ipath_lbus_speed != 800) - ipath_dev_err(dd, - "Incorrectly configured for HT @ %uMHz\n", - dd->ipath_lbus_speed); - - /* - * set here, not in ipath_init_*_funcs because we have to do - * it after we can read chip registers. - */ - dd->ipath_ureg_align = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); - -bail: - return ret; -} - -static void ipath_check_htlink(struct ipath_devdata *dd) -{ - u8 linkerr, link_off, i; - - for (i = 0; i < 2; i++) { - link_off = dd->ipath_ht_slave_off + i * 4 + 0xd; - if (pci_read_config_byte(dd->pcidev, link_off, &linkerr)) - dev_info(&dd->pcidev->dev, "Couldn't read " - "linkerror%d of HT slave/primary block\n", - i); - else if (linkerr & 0xf0) { - ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, " - "clearing\n", linkerr >> 4, i); - /* - * writing the linkerr bits that are set should - * clear them - */ - if (pci_write_config_byte(dd->pcidev, link_off, - linkerr)) - ipath_dbg("Failed write to clear HT " - "linkerror%d\n", i); - if (pci_read_config_byte(dd->pcidev, link_off, - &linkerr)) - dev_info(&dd->pcidev->dev, - "Couldn't reread linkerror%d of " - "HT slave/primary block\n", i); - else if (linkerr & 0xf0) - dev_info(&dd->pcidev->dev, - "HT linkerror%d bits 0x%x " - "couldn't be cleared\n", - i, linkerr >> 4); - } - } -} - -static int ipath_setup_ht_reset(struct ipath_devdata *dd) -{ - ipath_dbg("No reset possible for this InfiniPath hardware\n"); - return 0; -} - -#define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */ -#define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */ - -/* - * Bits 13-15 of command==0 is slave/primary block. Clear any HT CRC - * errors. We only bother to do this at load time, because it's OK if - * it happened before we were loaded (first time after boot/reset), - * but any time after that, it's fatal anyway. Also need to not check - * for upper byte errors if we are in 8 bit mode, so figure out - * our width. For now, at least, also complain if it's 8 bit. - */ -static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev, - int pos, u8 cap_type) -{ - u8 linkwidth = 0, linkerr, link_a_b_off, link_off; - u16 linkctrl = 0; - int i; - - dd->ipath_ht_slave_off = pos; - /* command word, master_host bit */ - /* master host || slave */ - if ((cap_type >> 2) & 1) - link_a_b_off = 4; - else - link_a_b_off = 0; - ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n", - link_a_b_off ? 1 : 0, - link_a_b_off ? 'B' : 'A'); - - link_a_b_off += pos; - - /* - * check both link control registers; clear both HT CRC sets if - * necessary. - */ - for (i = 0; i < 2; i++) { - link_off = pos + i * 4 + 0x4; - if (pci_read_config_word(pdev, link_off, &linkctrl)) - ipath_dev_err(dd, "Couldn't read HT link control%d " - "register\n", i); - else if (linkctrl & (0xf << 8)) { - ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error " - "bits %x\n", i, linkctrl & (0xf << 8)); - /* - * now write them back to clear the error. - */ - pci_write_config_word(pdev, link_off, - linkctrl & (0xf << 8)); - } - } - - /* - * As with HT CRC bits, same for protocol errors that might occur - * during boot. - */ - for (i = 0; i < 2; i++) { - link_off = pos + i * 4 + 0xd; - if (pci_read_config_byte(pdev, link_off, &linkerr)) - dev_info(&pdev->dev, "Couldn't read linkerror%d " - "of HT slave/primary block\n", i); - else if (linkerr & 0xf0) { - ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, " - "clearing\n", linkerr >> 4, i); - /* - * writing the linkerr bits that are set will clear - * them - */ - if (pci_write_config_byte - (pdev, link_off, linkerr)) - ipath_dbg("Failed write to clear HT " - "linkerror%d\n", i); - if (pci_read_config_byte(pdev, link_off, &linkerr)) - dev_info(&pdev->dev, "Couldn't reread " - "linkerror%d of HT slave/primary " - "block\n", i); - else if (linkerr & 0xf0) - dev_info(&pdev->dev, "HT linkerror%d bits " - "0x%x couldn't be cleared\n", - i, linkerr >> 4); - } - } - - /* - * this is just for our link to the host, not devices connected - * through tunnel. - */ - - if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth)) - ipath_dev_err(dd, "Couldn't read HT link width " - "config register\n"); - else { - u32 width; - switch (linkwidth & 7) { - case 5: - width = 4; - break; - case 4: - width = 2; - break; - case 3: - width = 32; - break; - case 1: - width = 16; - break; - case 0: - default: /* if wrong, assume 8 bit */ - width = 8; - break; - } - - dd->ipath_lbus_width = width; - - if (linkwidth != 0x11) { - ipath_dev_err(dd, "Not configured for 16 bit HT " - "(%x)\n", linkwidth); - if (!(linkwidth & 0xf)) { - ipath_dbg("Will ignore HT lane1 errors\n"); - dd->ipath_flags |= IPATH_8BIT_IN_HT0; - } - } - } - - /* - * this is just for our link to the host, not devices connected - * through tunnel. - */ - if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth)) - ipath_dev_err(dd, "Couldn't read HT link frequency " - "config register\n"); - else { - u32 speed; - switch (linkwidth & 0xf) { - case 6: - speed = 1000; - break; - case 5: - speed = 800; - break; - case 4: - speed = 600; - break; - case 3: - speed = 500; - break; - case 2: - speed = 400; - break; - case 1: - speed = 300; - break; - default: - /* - * assume reserved and vendor-specific are 200... - */ - case 0: - speed = 200; - break; - } - dd->ipath_lbus_speed = speed; - } - - snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info), - "HyperTransport,%uMHz,x%u\n", - dd->ipath_lbus_speed, - dd->ipath_lbus_width); -} - -static int ipath_ht_intconfig(struct ipath_devdata *dd) -{ - int ret; - - if (dd->ipath_intconfig) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig, - dd->ipath_intconfig); /* interrupt address */ - ret = 0; - } else { - ipath_dev_err(dd, "No interrupts enabled, couldn't setup " - "interrupt address\n"); - ret = -EINVAL; - } - - return ret; -} - -static void ipath_ht_irq_update(struct pci_dev *dev, int irq, - struct ht_irq_msg *msg) -{ - struct ipath_devdata *dd = pci_get_drvdata(dev); - u64 prev_intconfig = dd->ipath_intconfig; - - dd->ipath_intconfig = msg->address_lo; - dd->ipath_intconfig |= ((u64) msg->address_hi) << 32; - - /* - * If the previous value of dd->ipath_intconfig is zero, we're - * getting configured for the first time, and must not program the - * intconfig register here (it will be programmed later, when the - * hardware is ready). Otherwise, we should. - */ - if (prev_intconfig) - ipath_ht_intconfig(dd); -} - -/** - * ipath_setup_ht_config - setup the interruptconfig register - * @dd: the infinipath device - * @pdev: the PCI device - * - * setup the interruptconfig register from the HT config info. - * Also clear CRC errors in HT linkcontrol, if necessary. - * This is done only for the real hardware. It is done before - * chip address space is initted, so can't touch infinipath registers - */ -static int ipath_setup_ht_config(struct ipath_devdata *dd, - struct pci_dev *pdev) -{ - int pos, ret; - - ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update); - if (ret < 0) { - ipath_dev_err(dd, "Couldn't create interrupt handler: " - "err %d\n", ret); - goto bail; - } - dd->ipath_irq = ret; - ret = 0; - - /* - * Handle clearing CRC errors in linkctrl register if necessary. We - * do this early, before we ever enable errors or hardware errors, - * mostly to avoid causing the chip to enter freeze mode. - */ - pos = pci_find_capability(pdev, PCI_CAP_ID_HT); - if (!pos) { - ipath_dev_err(dd, "Couldn't find HyperTransport " - "capability; no interrupts\n"); - ret = -ENODEV; - goto bail; - } - do { - u8 cap_type; - - /* - * The HT capability type byte is 3 bytes after the - * capability byte. - */ - if (pci_read_config_byte(pdev, pos + 3, &cap_type)) { - dev_info(&pdev->dev, "Couldn't read config " - "command @ %d\n", pos); - continue; - } - if (!(cap_type & 0xE0)) - slave_or_pri_blk(dd, pdev, pos, cap_type); - } while ((pos = pci_find_next_capability(pdev, pos, - PCI_CAP_ID_HT))); - - dd->ipath_flags |= IPATH_SWAP_PIOBUFS; - -bail: - return ret; -} - -/** - * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff - * @dd: the infinipath device - * - * Called during driver unload. - * This is currently a nop for the HT chip, not for all chips - */ -static void ipath_setup_ht_cleanup(struct ipath_devdata *dd) -{ -} - -/** - * ipath_setup_ht_setextled - set the state of the two external LEDs - * @dd: the infinipath device - * @lst: the L state - * @ltst: the LT state - * - * Set the state of the two external LEDs, to indicate physical and - * logical state of IB link. For this chip (at least with recommended - * board pinouts), LED1 is Green (physical state), and LED2 is Yellow - * (logical state) - * - * Note: We try to match the Mellanox HCA LED behavior as best - * we can. Green indicates physical link state is OK (something is - * plugged in, and we can train). - * Amber indicates the link is logically up (ACTIVE). - * Mellanox further blinks the amber LED to indicate data packet - * activity, but we have no hardware support for that, so it would - * require waking up every 10-20 msecs and checking the counters - * on the chip, and then turning the LED off if appropriate. That's - * visible overhead, so not something we will do. - * - */ -static void ipath_setup_ht_setextled(struct ipath_devdata *dd, - u64 lst, u64 ltst) -{ - u64 extctl; - unsigned long flags = 0; - - /* the diags use the LED to indicate diag info, so we leave - * the external LED alone when the diags are running */ - if (ipath_diag_inuse) - return; - - /* Allow override of LED display for, e.g. Locating system in rack */ - if (dd->ipath_led_override) { - ltst = (dd->ipath_led_override & IPATH_LED_PHYS) - ? INFINIPATH_IBCS_LT_STATE_LINKUP - : INFINIPATH_IBCS_LT_STATE_DISABLED; - lst = (dd->ipath_led_override & IPATH_LED_LOG) - ? INFINIPATH_IBCS_L_STATE_ACTIVE - : INFINIPATH_IBCS_L_STATE_DOWN; - } - - spin_lock_irqsave(&dd->ipath_gpio_lock, flags); - /* - * start by setting both LED control bits to off, then turn - * on the appropriate bit(s). - */ - if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */ - /* - * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF - * is inverted, because it is normally used to indicate - * a hardware fault at reset, if there were errors - */ - extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON) - | INFINIPATH_EXTC_LEDGBLERR_OFF; - if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) - extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF; - if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE) - extctl |= INFINIPATH_EXTC_LEDGBLOK_ON; - } - else { - extctl = dd->ipath_extctrl & - ~(INFINIPATH_EXTC_LED1PRIPORT_ON | - INFINIPATH_EXTC_LED2PRIPORT_ON); - if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) - extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON; - if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE) - extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON; - } - dd->ipath_extctrl = extctl; - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); - spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); -} - -static void ipath_init_ht_variables(struct ipath_devdata *dd) -{ - /* - * setup the register offsets, since they are different for each - * chip - */ - dd->ipath_kregs = &ipath_ht_kregs; - dd->ipath_cregs = &ipath_ht_cregs; - - dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; - dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; - dd->ipath_gpio_sda = IPATH_GPIO_SDA; - dd->ipath_gpio_scl = IPATH_GPIO_SCL; - - /* - * Fill in data for field-values that change in newer chips. - * We dynamically specify only the mask for LINKTRAININGSTATE - * and only the shift for LINKSTATE, as they are the only ones - * that change. Also precalculate the 3 link states of interest - * and the combined mask. - */ - dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT; - dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK; - dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK << - dd->ibcs_ls_shift) | dd->ibcs_lts_mask; - dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift); - dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift); - dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift); - - /* - * Fill in data for ibcc field-values that change in newer chips. - * We dynamically specify only the mask for LINKINITCMD - * and only the shift for LINKCMD and MAXPKTLEN, as they are - * the only ones that change. - */ - dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK; - dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT; - dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT; - - /* Fill in shifts for RcvCtrl. */ - dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT; - dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT; - dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT; - dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */ - - dd->ipath_i_bitsextant = - (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | - (INFINIPATH_I_RCVAVAIL_MASK << - INFINIPATH_I_RCVAVAIL_SHIFT) | - INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | - INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; - - dd->ipath_e_bitsextant = - INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | - INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | - INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | - INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR | - INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP | - INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION | - INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN | - INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK | - INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN | - INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN | - INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT | - INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | - INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED | - INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | - INFINIPATH_E_HARDWARE; - - dd->ipath_hwe_bitsextant = - (INFINIPATH_HWE_HTCMEMPARITYERR_MASK << - INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) | - (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) | - (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | - INFINIPATH_HWE_HTCLNKABYTE0CRCERR | - INFINIPATH_HWE_HTCLNKABYTE1CRCERR | - INFINIPATH_HWE_HTCLNKBBYTE0CRCERR | - INFINIPATH_HWE_HTCLNKBBYTE1CRCERR | - INFINIPATH_HWE_HTCMISCERR4 | - INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 | - INFINIPATH_HWE_HTCMISCERR7 | - INFINIPATH_HWE_HTCBUSTREQPARITYERR | - INFINIPATH_HWE_HTCBUSTRESPPARITYERR | - INFINIPATH_HWE_HTCBUSIREQPARITYERR | - INFINIPATH_HWE_RXDSYNCMEMPARITYERR | - INFINIPATH_HWE_MEMBISTFAILED | - INFINIPATH_HWE_COREPLL_FBSLIP | - INFINIPATH_HWE_COREPLL_RFSLIP | - INFINIPATH_HWE_HTBPLL_FBSLIP | - INFINIPATH_HWE_HTBPLL_RFSLIP | - INFINIPATH_HWE_HTAPLL_FBSLIP | - INFINIPATH_HWE_HTAPLL_RFSLIP | - INFINIPATH_HWE_SERDESPLLFAILED | - INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | - INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; - - dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; - dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; - dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT; - dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT; - - /* - * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. - * 2 is Some Misc, 3 is reserved for future. - */ - dd->ipath_eep_st_masks[0].hwerrs_to_log = - INFINIPATH_HWE_TXEMEMPARITYERR_MASK << - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT; - - dd->ipath_eep_st_masks[1].hwerrs_to_log = - INFINIPATH_HWE_RXEMEMPARITYERR_MASK << - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; - - dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET; - - dd->delay_mult = 2; /* SDR, 4X, can't change */ - - dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; - dd->ipath_link_speed_supported = IPATH_IB_SDR; - dd->ipath_link_width_enabled = IB_WIDTH_4X; - dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported; - /* these can't change for this chip, so set once */ - dd->ipath_link_width_active = dd->ipath_link_width_enabled; - dd->ipath_link_speed_active = dd->ipath_link_speed_enabled; -} - -/** - * ipath_ht_init_hwerrors - enable hardware errors - * @dd: the infinipath device - * - * now that we have finished initializing everything that might reasonably - * cause a hardware error, and cleared those errors bits as they occur, - * we can enable hardware errors in the mask (potentially enabling - * freeze mode), and enable hardware errors as errors (along with - * everything else) in errormask - */ -static void ipath_ht_init_hwerrors(struct ipath_devdata *dd) -{ - ipath_err_t val; - u64 extsval; - - extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); - - if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) - ipath_dev_err(dd, "MemBIST did not complete!\n"); - if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT) - ipath_dbg("MemBIST corrected\n"); - - ipath_check_htlink(dd); - - /* barring bugs, all hwerrors become interrupts, which can */ - val = -1LL; - /* don't look at crc lane1 if 8 bit */ - if (dd->ipath_flags & IPATH_8BIT_IN_HT0) - val &= ~infinipath_hwe_htclnkabyte1crcerr; - /* don't look at crc lane1 if 8 bit */ - if (dd->ipath_flags & IPATH_8BIT_IN_HT1) - val &= ~infinipath_hwe_htclnkbbyte1crcerr; - - /* - * disable RXDSYNCMEMPARITY because external serdes is unused, - * and therefore the logic will never be used or initialized, - * and uninitialized state will normally result in this error - * being asserted. Similarly for the external serdess pll - * lock signal. - */ - val &= ~(INFINIPATH_HWE_SERDESPLLFAILED | - INFINIPATH_HWE_RXDSYNCMEMPARITYERR); - - /* - * Disable MISCERR4 because of an inversion in the HT core - * logic checking for errors that cause this bit to be set. - * The errata can also cause the protocol error bit to be set - * in the HT config space linkerror register(s). - */ - val &= ~INFINIPATH_HWE_HTCMISCERR4; - - /* - * PLL ignored because unused MDIO interface has a logic problem - */ - if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9) - val &= ~INFINIPATH_HWE_SERDESPLLFAILED; - dd->ipath_hwerrmask = val; -} - - - - -/** - * ipath_ht_bringup_serdes - bring up the serdes - * @dd: the infinipath device - */ -static int ipath_ht_bringup_serdes(struct ipath_devdata *dd) -{ - u64 val, config1; - int ret = 0, change = 0; - - ipath_dbg("Trying to bringup serdes\n"); - - if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) & - INFINIPATH_HWE_SERDESPLLFAILED) - { - ipath_dbg("At start, serdes PLL failed bit set in " - "hwerrstatus, clearing and continuing\n"); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - INFINIPATH_HWE_SERDESPLLFAILED); - } - - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); - config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1); - - ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx " - "config1=%llx, sstatus=%llx xgxs %llx\n", - (unsigned long long) val, (unsigned long long) config1, - (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus), - (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); - - /* force reset on */ - val |= INFINIPATH_SERDC0_RESET_PLL - /* | INFINIPATH_SERDC0_RESET_MASK */ - ; - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); - udelay(15); /* need pll reset set at least for a bit */ - - if (val & INFINIPATH_SERDC0_RESET_PLL) { - u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL; - /* set lane resets, and tx idle, during pll reset */ - val2 |= INFINIPATH_SERDC0_RESET_MASK | - INFINIPATH_SERDC0_TXIDLE; - ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing " - "%llx)\n", (unsigned long long) val2); - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, - val2); - /* - * be sure chip saw it - */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - /* - * need pll reset clear at least 11 usec before lane - * resets cleared; give it a few more - */ - udelay(15); - val = val2; /* for check below */ - } - - if (val & (INFINIPATH_SERDC0_RESET_PLL | - INFINIPATH_SERDC0_RESET_MASK | - INFINIPATH_SERDC0_TXIDLE)) { - val &= ~(INFINIPATH_SERDC0_RESET_PLL | - INFINIPATH_SERDC0_RESET_MASK | - INFINIPATH_SERDC0_TXIDLE); - /* clear them */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, - val); - } - - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - if (val & INFINIPATH_XGXS_RESET) { - /* normally true after boot */ - val &= ~INFINIPATH_XGXS_RESET; - change = 1; - } - if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & - INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { - /* need to compensate for Tx inversion in partner */ - val &= ~(INFINIPATH_XGXS_RX_POL_MASK << - INFINIPATH_XGXS_RX_POL_SHIFT); - val |= dd->ipath_rx_pol_inv << - INFINIPATH_XGXS_RX_POL_SHIFT; - change = 1; - } - if (change) - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); - - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); - - /* clear current and de-emphasis bits */ - config1 &= ~0x0ffffffff00ULL; - /* set current to 20ma */ - config1 |= 0x00000000000ULL; - /* set de-emphasis to -5.68dB */ - config1 |= 0x0cccc000000ULL; - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1); - - ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx " - "config1=%llx, sstatus=%llx xgxs %llx\n", - (unsigned long long) val, (unsigned long long) config1, - (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus), - (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); - - return ret; /* for now, say we always succeeded */ -} - -/** - * ipath_ht_quiet_serdes - set serdes to txidle - * @dd: the infinipath device - * driver is being unloaded - */ -static void ipath_ht_quiet_serdes(struct ipath_devdata *dd) -{ - u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); - - val |= INFINIPATH_SERDC0_TXIDLE; - ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n", - (unsigned long long) val); - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); -} - -/** - * ipath_pe_put_tid - write a TID in chip - * @dd: the infinipath device - * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected - * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing - * - * This exists as a separate routine to allow for special locking etc. - * It's used for both the full cleanup on exit, as well as the normal - * setup and teardown. - */ -static void ipath_ht_put_tid(struct ipath_devdata *dd, - u64 __iomem *tidptr, u32 type, - unsigned long pa) -{ - if (!dd->ipath_kregbase) - return; - - if (pa != dd->ipath_tidinvalid) { - if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { - dev_info(&dd->pcidev->dev, - "physaddr %lx has more than " - "40 bits, using only 40!!!\n", pa); - pa &= INFINIPATH_RT_ADDR_MASK; - } - if (type == RCVHQ_RCV_TYPE_EAGER) - pa |= dd->ipath_tidtemplate; - else { - /* in words (fixed, full page). */ - u64 lenvalid = PAGE_SIZE >> 2; - lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT; - pa |= lenvalid | INFINIPATH_RT_VALID; - } - } - - writeq(pa, tidptr); -} - - -/** - * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager - * @dd: the infinipath device - * @port: the port - * - * Used from ipath_close(), and at chip initialization. - */ -static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port) -{ - u64 __iomem *tidbase; - int i; - - if (!dd->ipath_kregbase) - return; - - ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port); - - /* - * need to invalidate all of the expected TID entries for this - * port, so we don't have valid entries that might somehow get - * used (early in next use of this port, or through some bug) - */ - tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + - dd->ipath_rcvtidbase + - port * dd->ipath_rcvtidcnt * - sizeof(*tidbase)); - for (i = 0; i < dd->ipath_rcvtidcnt; i++) - ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, - dd->ipath_tidinvalid); - - tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + - dd->ipath_rcvegrbase + - port * dd->ipath_rcvegrcnt * - sizeof(*tidbase)); - - for (i = 0; i < dd->ipath_rcvegrcnt; i++) - ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, - dd->ipath_tidinvalid); -} - -/** - * ipath_ht_tidtemplate - setup constants for TID updates - * @dd: the infinipath device - * - * We setup stuff that we use a lot, to avoid calculating each time - */ -static void ipath_ht_tidtemplate(struct ipath_devdata *dd) -{ - dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2; - dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT; - dd->ipath_tidtemplate |= INFINIPATH_RT_VALID; - - /* - * work around chip errata bug 7358, by marking invalid tids - * as having max length - */ - dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) << - INFINIPATH_RT_BUFSIZE_SHIFT; -} - -static int ipath_ht_early_init(struct ipath_devdata *dd) -{ - u32 __iomem *piobuf; - u32 pioincr, val32; - int i; - - /* - * one cache line; long IB headers will spill over into received - * buffer - */ - dd->ipath_rcvhdrentsize = 16; - dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; - - /* - * For HT, we allocate a somewhat overly large eager buffer, - * such that we can guarantee that we can receive the largest - * packet that we can send out. To truly support a 4KB MTU, - * we need to bump this to a large value. To date, other than - * testing, we have never encountered an HCA that can really - * send 4KB MTU packets, so we do not handle that (we'll get - * errors interrupts if we ever see one). - */ - dd->ipath_rcvegrbufsize = dd->ipath_piosize2k; - - /* - * the min() check here is currently a nop, but it may not - * always be, depending on just how we do ipath_rcvegrbufsize - */ - dd->ipath_ibmaxlen = min(dd->ipath_piosize2k, - dd->ipath_rcvegrbufsize); - dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen; - ipath_ht_tidtemplate(dd); - - /* - * zero all the TID entries at startup. We do this for sanity, - * in case of a previous driver crash of some kind, and also - * because the chip powers up with these memories in an unknown - * state. Use portcnt, not cfgports, since this is for the - * full chip, not for current (possibly different) configuration - * value. - * Chip Errata bug 6447 - */ - for (val32 = 0; val32 < dd->ipath_portcnt; val32++) - ipath_ht_clear_tids(dd, val32); - - /* - * write the pbc of each buffer, to be sure it's initialized, then - * cancel all the buffers, and also abort any packets that might - * have been in flight for some reason (the latter is for driver - * unload/reload, but isn't a bad idea at first init). PIO send - * isn't enabled at this point, so there is no danger of sending - * these out on the wire. - * Chip Errata bug 6610 - */ - piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) + - dd->ipath_piobufbase); - pioincr = dd->ipath_palign / sizeof(*piobuf); - for (i = 0; i < dd->ipath_piobcnt2k; i++) { - /* - * reasonable word count, just to init pbc - */ - writel(16, piobuf); - piobuf += pioincr; - } - - ipath_get_eeprom_info(dd); - if (dd->ipath_boardrev == 5) { - /* - * Later production QHT7040 has same changes as QHT7140, so - * can use GPIO interrupts. They have serial #'s starting - * with 128, rather than 112. - */ - if (dd->ipath_serial[0] == '1' && - dd->ipath_serial[1] == '2' && - dd->ipath_serial[2] == '8') - dd->ipath_flags |= IPATH_GPIO_INTR; - else { - ipath_dev_err(dd, "Unsupported InfiniPath board " - "(serial number %.16s)!\n", - dd->ipath_serial); - return 1; - } - } - - if (dd->ipath_minrev >= 4) { - /* Rev4+ reports extra errors via internal GPIO pins */ - dd->ipath_flags |= IPATH_GPIO_ERRINTRS; - dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK; - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - dd->ipath_gpio_mask); - } - - return 0; -} - - -/** - * ipath_init_ht_get_base_info - set chip-specific flags for user code - * @dd: the infinipath device - * @kbase: ipath_base_info pointer - * - * We set the PCIE flag because the lower bandwidth on PCIe vs - * HyperTransport can affect some user packet algorithms. - */ -static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) -{ - struct ipath_base_info *kinfo = kbase; - - kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT | - IPATH_RUNTIME_PIO_REGSWAPPED; - - if (pd->port_dd->ipath_minrev < 4) - kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY; - - return 0; -} - -static void ipath_ht_free_irq(struct ipath_devdata *dd) -{ - free_irq(dd->ipath_irq, dd); - ht_destroy_irq(dd->ipath_irq); - dd->ipath_irq = 0; - dd->ipath_intconfig = 0; -} - -static struct ipath_message_header * -ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) -{ - return (struct ipath_message_header *) - &rhf_addr[sizeof(u64) / sizeof(u32)]; -} - -static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports) -{ - dd->ipath_portcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); - dd->ipath_p0_rcvegrcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); -} - -static void ipath_ht_read_counters(struct ipath_devdata *dd, - struct infinipath_counters *cntrs) -{ - cntrs->LBIntCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt)); - cntrs->LBFlowStallCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt)); - cntrs->TxSDmaDescCnt = 0; - cntrs->TxUnsupVLErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt)); - cntrs->TxDataPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt)); - cntrs->TxFlowPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt)); - cntrs->TxDwordCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt)); - cntrs->TxLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt)); - cntrs->TxMaxMinLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt)); - cntrs->TxUnderrunCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt)); - cntrs->TxFlowStallCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt)); - cntrs->TxDroppedPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt)); - cntrs->RxDroppedPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt)); - cntrs->RxDataPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt)); - cntrs->RxFlowPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt)); - cntrs->RxDwordCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt)); - cntrs->RxLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt)); - cntrs->RxMaxMinLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt)); - cntrs->RxICRCErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt)); - cntrs->RxVCRCErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt)); - cntrs->RxFlowCtrlErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt)); - cntrs->RxBadFormatCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt)); - cntrs->RxLinkProblemCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt)); - cntrs->RxEBPCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt)); - cntrs->RxLPCRCErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt)); - cntrs->RxBufOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt)); - cntrs->RxTIDFullErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt)); - cntrs->RxTIDValidErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt)); - cntrs->RxPKeyMismatchCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt)); - cntrs->RxP0HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt)); - cntrs->RxP1HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt)); - cntrs->RxP2HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt)); - cntrs->RxP3HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt)); - cntrs->RxP4HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt)); - cntrs->RxP5HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt)); - cntrs->RxP6HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt)); - cntrs->RxP7HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt)); - cntrs->RxP8HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt)); - cntrs->RxP9HdrEgrOvflCnt = 0; - cntrs->RxP10HdrEgrOvflCnt = 0; - cntrs->RxP11HdrEgrOvflCnt = 0; - cntrs->RxP12HdrEgrOvflCnt = 0; - cntrs->RxP13HdrEgrOvflCnt = 0; - cntrs->RxP14HdrEgrOvflCnt = 0; - cntrs->RxP15HdrEgrOvflCnt = 0; - cntrs->RxP16HdrEgrOvflCnt = 0; - cntrs->IBStatusChangeCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt)); - cntrs->IBLinkErrRecoveryCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt)); - cntrs->IBLinkDownedCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt)); - cntrs->IBSymbolErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt)); - cntrs->RxVL15DroppedPktCnt = 0; - cntrs->RxOtherLocalPhyErrCnt = 0; - cntrs->PcieRetryBufDiagQwordCnt = 0; - cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs; - cntrs->LocalLinkIntegrityErrCnt = - (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? - dd->ipath_lli_errs : dd->ipath_lli_errors; - cntrs->RxVlErrCnt = 0; - cntrs->RxDlidFltrCnt = 0; -} - - -/* no interrupt fallback for these chips */ -static int ipath_ht_nointr_fallback(struct ipath_devdata *dd) -{ - return 0; -} - - -/* - * reset the XGXS (between serdes and IBC). Slightly less intrusive - * than resetting the IBC or external link state, and useful in some - * cases to cause some retraining. To do this right, we reset IBC - * as well. - */ -static void ipath_ht_xgxs_reset(struct ipath_devdata *dd) -{ - u64 val, prev_val; - - prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - val = prev_val | INFINIPATH_XGXS_RESET; - prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control & ~INFINIPATH_C_LINKENABLE); - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val); - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); -} - - -static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which) -{ - int ret; - - switch (which) { - case IPATH_IB_CFG_LWID: - ret = dd->ipath_link_width_active; - break; - case IPATH_IB_CFG_SPD: - ret = dd->ipath_link_speed_active; - break; - case IPATH_IB_CFG_LWID_ENB: - ret = dd->ipath_link_width_enabled; - break; - case IPATH_IB_CFG_SPD_ENB: - ret = dd->ipath_link_speed_enabled; - break; - default: - ret = -ENOTSUPP; - break; - } - return ret; -} - - -/* we assume range checking is already done, if needed */ -static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val) -{ - int ret = 0; - - if (which == IPATH_IB_CFG_LWID_ENB) - dd->ipath_link_width_enabled = val; - else if (which == IPATH_IB_CFG_SPD_ENB) - dd->ipath_link_speed_enabled = val; - else - ret = -ENOTSUPP; - return ret; -} - - -static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b) -{ -} - - -static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) -{ - ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs), - ipath_ib_linktrstate(dd, ibcs)); - return 0; -} - - -/** - * ipath_init_iba6110_funcs - set up the chip-specific function pointers - * @dd: the infinipath device - * - * This is global, and is called directly at init to set up the - * chip-specific function pointers for later use. - */ -void ipath_init_iba6110_funcs(struct ipath_devdata *dd) -{ - dd->ipath_f_intrsetup = ipath_ht_intconfig; - dd->ipath_f_bus = ipath_setup_ht_config; - dd->ipath_f_reset = ipath_setup_ht_reset; - dd->ipath_f_get_boardname = ipath_ht_boardname; - dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors; - dd->ipath_f_early_init = ipath_ht_early_init; - dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors; - dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes; - dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes; - dd->ipath_f_clear_tids = ipath_ht_clear_tids; - dd->ipath_f_put_tid = ipath_ht_put_tid; - dd->ipath_f_cleanup = ipath_setup_ht_cleanup; - dd->ipath_f_setextled = ipath_setup_ht_setextled; - dd->ipath_f_get_base_info = ipath_ht_get_base_info; - dd->ipath_f_free_irq = ipath_ht_free_irq; - dd->ipath_f_tidtemplate = ipath_ht_tidtemplate; - dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback; - dd->ipath_f_get_msgheader = ipath_ht_get_msgheader; - dd->ipath_f_config_ports = ipath_ht_config_ports; - dd->ipath_f_read_counters = ipath_ht_read_counters; - dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset; - dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg; - dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg; - dd->ipath_f_config_jint = ipath_ht_config_jint; - dd->ipath_f_ib_updown = ipath_ht_ib_updown; - - /* - * initialize chip-specific variables - */ - ipath_init_ht_variables(dd); -} diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c deleted file mode 100644 index be2a60e14..000000000 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ /dev/null @@ -1,1066 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/pci.h> -#include <linux/netdevice.h> -#include <linux/moduleparam.h> -#include <linux/slab.h> -#include <linux/stat.h> -#include <linux/vmalloc.h> - -#include "ipath_kernel.h" -#include "ipath_common.h" - -/* - * min buffers we want to have per port, after driver - */ -#define IPATH_MIN_USER_PORT_BUFCNT 7 - -/* - * Number of ports we are configured to use (to allow for more pio - * buffers per port, etc.) Zero means use chip value. - */ -static ushort ipath_cfgports; - -module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO); -MODULE_PARM_DESC(cfgports, "Set max number of ports to use"); - -/* - * Number of buffers reserved for driver (verbs and layered drivers.) - * Initialized based on number of PIO buffers if not set via module interface. - * The problem with this is that it's global, but we'll use different - * numbers for different chip types. - */ -static ushort ipath_kpiobufs; - -static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp); - -module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort, - &ipath_kpiobufs, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver"); - -/** - * create_port0_egr - allocate the eager TID buffers - * @dd: the infinipath device - * - * This code is now quite different for user and kernel, because - * the kernel uses skb's, for the accelerated network performance. - * This is the kernel (port0) version. - * - * Allocate the eager TID buffers and program them into infinipath. - * We use the network layer alloc_skb() allocator to allocate the - * memory, and either use the buffers as is for things like verbs - * packets, or pass the buffers up to the ipath layered driver and - * thence the network layer, replacing them as we do so (see - * ipath_rcv_layer()). - */ -static int create_port0_egr(struct ipath_devdata *dd) -{ - unsigned e, egrcnt; - struct ipath_skbinfo *skbinfo; - int ret; - - egrcnt = dd->ipath_p0_rcvegrcnt; - - skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt); - if (skbinfo == NULL) { - ipath_dev_err(dd, "allocation error for eager TID " - "skb array\n"); - ret = -ENOMEM; - goto bail; - } - for (e = 0; e < egrcnt; e++) { - /* - * This is a bit tricky in that we allocate extra - * space for 2 bytes of the 14 byte ethernet header. - * These two bytes are passed in the ipath header so - * the rest of the data is word aligned. We allocate - * 4 bytes so that the data buffer stays word aligned. - * See ipath_kreceive() for more details. - */ - skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL); - if (!skbinfo[e].skb) { - ipath_dev_err(dd, "SKB allocation error for " - "eager TID %u\n", e); - while (e != 0) - dev_kfree_skb(skbinfo[--e].skb); - vfree(skbinfo); - ret = -ENOMEM; - goto bail; - } - } - /* - * After loop above, so we can test non-NULL to see if ready - * to use at receive, etc. - */ - dd->ipath_port0_skbinfo = skbinfo; - - for (e = 0; e < egrcnt; e++) { - dd->ipath_port0_skbinfo[e].phys = - ipath_map_single(dd->pcidev, - dd->ipath_port0_skbinfo[e].skb->data, - dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE); - dd->ipath_f_put_tid(dd, e + (u64 __iomem *) - ((char __iomem *) dd->ipath_kregbase + - dd->ipath_rcvegrbase), - RCVHQ_RCV_TYPE_EAGER, - dd->ipath_port0_skbinfo[e].phys); - } - - ret = 0; - -bail: - return ret; -} - -static int bringup_link(struct ipath_devdata *dd) -{ - u64 val, ibc; - int ret = 0; - - /* hold IBC in reset */ - dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); - - /* - * set initial max size pkt IBC will send, including ICRC; it's the - * PIO buffer size in dwords, less 1; also see ipath_set_mtu() - */ - val = (dd->ipath_ibmaxlen >> 2) + 1; - ibc = val << dd->ibcc_mpl_shift; - - /* flowcontrolwatermark is in units of KBytes */ - ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT; - /* - * How often flowctrl sent. More or less in usecs; balance against - * watermark value, so that in theory senders always get a flow - * control update in time to not let the IB link go idle. - */ - ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT; - /* max error tolerance */ - ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; - /* use "real" buffer space for */ - ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT; - /* IB credit flow control. */ - ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; - /* initially come up waiting for TS1, without sending anything. */ - dd->ipath_ibcctrl = ibc; - /* - * Want to start out with both LINKCMD and LINKINITCMD in NOP - * (0 and 0). Don't put linkinitcmd in ipath_ibcctrl, want that - * to stay a NOP. Flag that we are disabled, for the (unlikely) - * case that some recovery path is trying to bring the link up - * before we are ready. - */ - ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE << - INFINIPATH_IBCC_LINKINITCMD_SHIFT; - dd->ipath_flags |= IPATH_IB_LINK_DISABLED; - ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n", - (unsigned long long) ibc); - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc); - - // be sure chip saw it - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - - ret = dd->ipath_f_bringup_serdes(dd); - - if (ret) - dev_info(&dd->pcidev->dev, "Could not initialize SerDes, " - "not usable\n"); - else { - /* enable IBC */ - dd->ipath_control |= INFINIPATH_C_LINKENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); - } - - return ret; -} - -static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd) -{ - struct ipath_portdata *pd = NULL; - - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (pd) { - pd->port_dd = dd; - pd->port_cnt = 1; - /* The port 0 pkey table is used by the layer interface. */ - pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY; - pd->port_seq_cnt = 1; - } - return pd; -} - -static int init_chip_first(struct ipath_devdata *dd) -{ - struct ipath_portdata *pd; - int ret = 0; - u64 val; - - spin_lock_init(&dd->ipath_kernel_tid_lock); - spin_lock_init(&dd->ipath_user_tid_lock); - spin_lock_init(&dd->ipath_sendctrl_lock); - spin_lock_init(&dd->ipath_uctxt_lock); - spin_lock_init(&dd->ipath_sdma_lock); - spin_lock_init(&dd->ipath_gpio_lock); - spin_lock_init(&dd->ipath_eep_st_lock); - spin_lock_init(&dd->ipath_sdepb_lock); - mutex_init(&dd->ipath_eep_lock); - - /* - * skip cfgports stuff because we are not allocating memory, - * and we don't want problems if the portcnt changed due to - * cfgports. We do still check and report a difference, if - * not same (should be impossible). - */ - dd->ipath_f_config_ports(dd, ipath_cfgports); - if (!ipath_cfgports) - dd->ipath_cfgports = dd->ipath_portcnt; - else if (ipath_cfgports <= dd->ipath_portcnt) { - dd->ipath_cfgports = ipath_cfgports; - ipath_dbg("Configured to use %u ports out of %u in chip\n", - dd->ipath_cfgports, ipath_read_kreg32(dd, - dd->ipath_kregs->kr_portcnt)); - } else { - dd->ipath_cfgports = dd->ipath_portcnt; - ipath_dbg("Tried to configured to use %u ports; chip " - "only supports %u\n", ipath_cfgports, - ipath_read_kreg32(dd, - dd->ipath_kregs->kr_portcnt)); - } - /* - * Allocate full portcnt array, rather than just cfgports, because - * cleanup iterates across all possible ports. - */ - dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt, - GFP_KERNEL); - - if (!dd->ipath_pd) { - ipath_dev_err(dd, "Unable to allocate portdata array, " - "failing\n"); - ret = -ENOMEM; - goto done; - } - - pd = create_portdata0(dd); - if (!pd) { - ipath_dev_err(dd, "Unable to allocate portdata for port " - "0, failing\n"); - ret = -ENOMEM; - goto done; - } - dd->ipath_pd[0] = pd; - - dd->ipath_rcvtidcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); - dd->ipath_rcvtidbase = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase); - dd->ipath_rcvegrcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); - dd->ipath_rcvegrbase = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase); - dd->ipath_palign = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); - dd->ipath_piobufbase = - ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase); - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize); - dd->ipath_piosize2k = val & ~0U; - dd->ipath_piosize4k = val >> 32; - if (dd->ipath_piosize4k == 0 && ipath_mtu4096) - ipath_mtu4096 = 0; /* 4KB not supported by this chip */ - dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048; - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt); - dd->ipath_piobcnt2k = val & ~0U; - dd->ipath_piobcnt4k = val >> 32; - dd->ipath_pio2kbase = - (u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) + - (dd->ipath_piobufbase & 0xffffffff)); - if (dd->ipath_piobcnt4k) { - dd->ipath_pio4kbase = (u32 __iomem *) - (((char __iomem *) dd->ipath_kregbase) + - (dd->ipath_piobufbase >> 32)); - /* - * 4K buffers take 2 pages; we use roundup just to be - * paranoid; we calculate it once here, rather than on - * ever buf allocate - */ - dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k, - dd->ipath_palign); - ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p " - "(%x aligned)\n", - dd->ipath_piobcnt2k, dd->ipath_piosize2k, - dd->ipath_pio2kbase, dd->ipath_piobcnt4k, - dd->ipath_piosize4k, dd->ipath_pio4kbase, - dd->ipath_4kalign); - } - else ipath_dbg("%u 2k piobufs @ %p\n", - dd->ipath_piobcnt2k, dd->ipath_pio2kbase); - -done: - return ret; -} - -/** - * init_chip_reset - re-initialize after a reset, or enable - * @dd: the infinipath device - * - * sanity check at least some of the values after reset, and - * ensure no receive or transmit (explicitly, in case reset - * failed - */ -static int init_chip_reset(struct ipath_devdata *dd) -{ - u32 rtmp; - int i; - unsigned long flags; - - /* - * ensure chip does no sends or receives, tail updates, or - * pioavail updates while we re-initialize - */ - dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift); - for (i = 0; i < dd->ipath_portcnt; i++) { - clear_bit(dd->ipath_r_portenable_shift + i, - &dd->ipath_rcvctrl); - clear_bit(dd->ipath_r_intravail_shift + i, - &dd->ipath_rcvctrl); - } - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl = 0U; /* no sdma, etc */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); - - rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); - if (rtmp != dd->ipath_rcvtidcnt) - dev_info(&dd->pcidev->dev, "tidcnt was %u before " - "reset, now %u, using original\n", - dd->ipath_rcvtidcnt, rtmp); - rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase); - if (rtmp != dd->ipath_rcvtidbase) - dev_info(&dd->pcidev->dev, "tidbase was %u before " - "reset, now %u, using original\n", - dd->ipath_rcvtidbase, rtmp); - rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); - if (rtmp != dd->ipath_rcvegrcnt) - dev_info(&dd->pcidev->dev, "egrcnt was %u before " - "reset, now %u, using original\n", - dd->ipath_rcvegrcnt, rtmp); - rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase); - if (rtmp != dd->ipath_rcvegrbase) - dev_info(&dd->pcidev->dev, "egrbase was %u before " - "reset, now %u, using original\n", - dd->ipath_rcvegrbase, rtmp); - - return 0; -} - -static int init_pioavailregs(struct ipath_devdata *dd) -{ - int ret; - - dd->ipath_pioavailregs_dma = dma_alloc_coherent( - &dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys, - GFP_KERNEL); - if (!dd->ipath_pioavailregs_dma) { - ipath_dev_err(dd, "failed to allocate PIOavail reg area " - "in memory\n"); - ret = -ENOMEM; - goto done; - } - - /* - * we really want L2 cache aligned, but for current CPUs of - * interest, they are the same. - */ - dd->ipath_statusp = (u64 *) - ((char *)dd->ipath_pioavailregs_dma + - ((2 * L1_CACHE_BYTES + - dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES)); - /* copy the current value now that it's really allocated */ - *dd->ipath_statusp = dd->_ipath_status; - /* - * setup buffer to hold freeze msg, accessible to apps, - * following statusp - */ - dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1]; - /* and its length */ - dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]); - - ret = 0; - -done: - return ret; -} - -/** - * init_shadow_tids - allocate the shadow TID array - * @dd: the infinipath device - * - * allocate the shadow TID array, so we can ipath_munlock previous - * entries. It may make more sense to move the pageshadow to the - * port data structure, so we only allocate memory for ports actually - * in use, since we at 8k per port, now. - */ -static void init_shadow_tids(struct ipath_devdata *dd) -{ - struct page **pages; - dma_addr_t *addrs; - - pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * - sizeof(struct page *)); - if (!pages) { - ipath_dev_err(dd, "failed to allocate shadow page * " - "array, no expected sends!\n"); - dd->ipath_pageshadow = NULL; - return; - } - - addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * - sizeof(dma_addr_t)); - if (!addrs) { - ipath_dev_err(dd, "failed to allocate shadow dma handle " - "array, no expected sends!\n"); - vfree(pages); - dd->ipath_pageshadow = NULL; - return; - } - - dd->ipath_pageshadow = pages; - dd->ipath_physshadow = addrs; -} - -static void enable_chip(struct ipath_devdata *dd, int reinit) -{ - u32 val; - u64 rcvmask; - unsigned long flags; - int i; - - if (!reinit) - init_waitqueue_head(&ipath_state_wait); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - /* Enable PIO send, and update of PIOavail regs to memory. */ - dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE | - INFINIPATH_S_PIOBUFAVAILUPD; - - /* - * Set the PIO avail update threshold to host memory - * on chips that support it. - */ - if (dd->ipath_pioupd_thresh) - dd->ipath_sendctrl |= dd->ipath_pioupd_thresh - << INFINIPATH_S_UPDTHRESH_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* - * Enable kernel ports' receive and receive interrupt. - * Other ports done as user opens and inits them. - */ - rcvmask = 1ULL; - dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) | - (rcvmask << dd->ipath_r_intravail_shift); - if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) - dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - - /* - * now ready for use. this should be cleared whenever we - * detect a reset, or initiate one. - */ - dd->ipath_flags |= IPATH_INITTED; - - /* - * Init our shadow copies of head from tail values, - * and write head values to match. - */ - val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0); - ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0); - - /* Initialize so we interrupt on next packet received */ - ipath_write_ureg(dd, ur_rcvhdrhead, - dd->ipath_rhdrhead_intr_off | - dd->ipath_pd[0]->port_head, 0); - - /* - * by now pioavail updates to memory should have occurred, so - * copy them into our working/shadow registers; this is in - * case something went wrong with abort, but mostly to get the - * initial values of the generation bit correct. - */ - for (i = 0; i < dd->ipath_pioavregs; i++) { - __le64 pioavail; - - /* - * Chip Errata bug 6641; even and odd qwords>3 are swapped. - */ - if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) - pioavail = dd->ipath_pioavailregs_dma[i ^ 1]; - else - pioavail = dd->ipath_pioavailregs_dma[i]; - /* - * don't need to worry about ipath_pioavailkernel here - * because we will call ipath_chg_pioavailkernel() later - * in initialization, to busy out buffers as needed - */ - dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail); - } - /* can get counters, stats, etc. */ - dd->ipath_flags |= IPATH_PRESENT; -} - -static int init_housekeeping(struct ipath_devdata *dd, int reinit) -{ - char boardn[40]; - int ret = 0; - - /* - * have to clear shadow copies of registers at init that are - * not otherwise set here, or all kinds of bizarre things - * happen with driver on chip reset - */ - dd->ipath_rcvhdrsize = 0; - - /* - * Don't clear ipath_flags as 8bit mode was set before - * entering this func. However, we do set the linkstate to - * unknown, so we can watch for a transition. - * PRESENT is set because we want register reads to work, - * and the kernel infrastructure saw it in config space; - * We clear it if we have failures. - */ - dd->ipath_flags |= IPATH_LINKUNK | IPATH_PRESENT; - dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED | - IPATH_LINKDOWN | IPATH_LINKINIT); - - ipath_cdbg(VERBOSE, "Try to read spc chip revision\n"); - dd->ipath_revision = - ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision); - - /* - * set up fundamental info we need to use the chip; we assume - * if the revision reg and these regs are OK, we don't need to - * special case the rest - */ - dd->ipath_sregbase = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase); - dd->ipath_cregbase = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase); - dd->ipath_uregbase = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase); - ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, " - "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase, - dd->ipath_uregbase, dd->ipath_cregbase); - if ((dd->ipath_revision & 0xffffffff) == 0xffffffff - || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff - || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff - || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) { - ipath_dev_err(dd, "Register read failures from chip, " - "giving up initialization\n"); - dd->ipath_flags &= ~IPATH_PRESENT; - ret = -ENODEV; - goto done; - } - - - /* clear diagctrl register, in case diags were running and crashed */ - ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0); - - /* clear the initial reset flag, in case first driver load */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, - INFINIPATH_E_RESET); - - ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n", - (unsigned long long) dd->ipath_revision, - dd->ipath_pcirev); - - if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) & - INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) { - ipath_dev_err(dd, "Driver only handles version %d, " - "chip swversion is %d (%llx), failng\n", - IPATH_CHIP_SWVERSION, - (int)(dd->ipath_revision >> - INFINIPATH_R_SOFTWARE_SHIFT) & - INFINIPATH_R_SOFTWARE_MASK, - (unsigned long long) dd->ipath_revision); - ret = -ENOSYS; - goto done; - } - dd->ipath_majrev = (u8) ((dd->ipath_revision >> - INFINIPATH_R_CHIPREVMAJOR_SHIFT) & - INFINIPATH_R_CHIPREVMAJOR_MASK); - dd->ipath_minrev = (u8) ((dd->ipath_revision >> - INFINIPATH_R_CHIPREVMINOR_SHIFT) & - INFINIPATH_R_CHIPREVMINOR_MASK); - dd->ipath_boardrev = (u8) ((dd->ipath_revision >> - INFINIPATH_R_BOARDID_SHIFT) & - INFINIPATH_R_BOARDID_MASK); - - ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn); - - snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion), - "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, " - "SW Compat %u\n", - IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn, - (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) & - INFINIPATH_R_ARCH_MASK, - dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev, - (unsigned)(dd->ipath_revision >> - INFINIPATH_R_SOFTWARE_SHIFT) & - INFINIPATH_R_SOFTWARE_MASK); - - ipath_dbg("%s", dd->ipath_boardversion); - - if (ret) - goto done; - - if (reinit) - ret = init_chip_reset(dd); - else - ret = init_chip_first(dd); - -done: - return ret; -} - -static void verify_interrupt(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *) opaque; - - if (!dd) - return; /* being torn down */ - - /* - * If we don't have any interrupts, let the user know and - * don't bother checking again. - */ - if (dd->ipath_int_counter == 0) { - if (!dd->ipath_f_intr_fallback(dd)) - dev_err(&dd->pcidev->dev, "No interrupts detected, " - "not usable.\n"); - else /* re-arm the timer to see if fallback works */ - mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2); - } else - ipath_cdbg(VERBOSE, "%u interrupts at timer check\n", - dd->ipath_int_counter); -} - -/** - * ipath_init_chip - do the actual initialization sequence on the chip - * @dd: the infinipath device - * @reinit: reinitializing, so don't allocate new memory - * - * Do the actual initialization sequence on the chip. This is done - * both from the init routine called from the PCI infrastructure, and - * when we reset the chip, or detect that it was reset internally, - * or it's administratively re-enabled. - * - * Memory allocation here and in called routines is only done in - * the first case (reinit == 0). We have to be careful, because even - * without memory allocation, we need to re-write all the chip registers - * TIDs, etc. after the reset or enable has completed. - */ -int ipath_init_chip(struct ipath_devdata *dd, int reinit) -{ - int ret = 0; - u32 kpiobufs, defkbufs; - u32 piobufs, uports; - u64 val; - struct ipath_portdata *pd; - gfp_t gfp_flags = GFP_USER | __GFP_COMP; - - ret = init_housekeeping(dd, reinit); - if (ret) - goto done; - - /* - * We could bump this to allow for full rcvegrcnt + rcvtidcnt, - * but then it no longer nicely fits power of two, and since - * we now use routines that backend onto __get_free_pages, the - * rest would be wasted. - */ - dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt, - dd->ipath_rcvhdrcnt); - - /* - * Set up the shadow copies of the piobufavail registers, - * which we compare against the chip registers for now, and - * the in memory DMA'ed copies of the registers. This has to - * be done early, before we calculate lastport, etc. - */ - piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; - /* - * calc number of pioavail registers, and save it; we have 2 - * bits per buffer. - */ - dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) - / (sizeof(u64) * BITS_PER_BYTE / 2); - uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; - if (piobufs > 144) - defkbufs = 32 + dd->ipath_pioreserved; - else - defkbufs = 16 + dd->ipath_pioreserved; - - if (ipath_kpiobufs && (ipath_kpiobufs + - (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) { - int i = (int) piobufs - - (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); - if (i < 1) - i = 1; - dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of " - "%d for kernel leaves too few for %d user ports " - "(%d each); using %u\n", ipath_kpiobufs, - piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i); - /* - * shouldn't change ipath_kpiobufs, because could be - * different for different devices... - */ - kpiobufs = i; - } else if (ipath_kpiobufs) - kpiobufs = ipath_kpiobufs; - else - kpiobufs = defkbufs; - dd->ipath_lastport_piobuf = piobufs - kpiobufs; - dd->ipath_pbufsport = - uports ? dd->ipath_lastport_piobuf / uports : 0; - /* if not an even divisor, some user ports get extra buffers */ - dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf - - (dd->ipath_pbufsport * uports); - if (dd->ipath_ports_extrabuf) - ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to " - "ports <= %u\n", dd->ipath_pbufsport, - dd->ipath_ports_extrabuf); - dd->ipath_lastpioindex = 0; - dd->ipath_lastpioindexl = dd->ipath_piobcnt2k; - /* ipath_pioavailshadow initialized earlier */ - ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " - "each for %u user ports\n", kpiobufs, - piobufs, dd->ipath_pbufsport, uports); - ret = dd->ipath_f_early_init(dd); - if (ret) { - ipath_dev_err(dd, "Early initialization failure\n"); - goto done; - } - - /* - * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be - * done after early_init. - */ - dd->ipath_hdrqlast = - dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize, - dd->ipath_rcvhdrentsize); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, - dd->ipath_rcvhdrsize); - - if (!reinit) { - ret = init_pioavailregs(dd); - init_shadow_tids(dd); - if (ret) - goto done; - } - - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr, - dd->ipath_pioavailregs_phys); - - /* - * this is to detect s/w errors, which the h/w works around by - * ignoring the low 6 bits of address, if it wasn't aligned. - */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr); - if (val != dd->ipath_pioavailregs_phys) { - ipath_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->ipath_pioavailregs_phys, - (unsigned long long) val); - ret = -EINVAL; - goto done; - } - - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP); - - /* - * make sure we are not in freeze, and PIO send enabled, so - * writes to pbc happen - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); - - /* - * before error clears, since we expect serdes pll errors during - * this, the first time after reset - */ - if (bringup_link(dd)) { - dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n"); - ret = -ENETDOWN; - goto done; - } - - /* - * clear any "expected" hwerrs from reset and/or initialization - * clear any that aren't enabled (at least this once), and then - * set the enable mask - */ - dd->ipath_f_init_hwerrors(dd); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - - /* clear all */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); - /* enable errors that are masked, at least this first time. */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - ~dd->ipath_maskederrs); - dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */ - dd->ipath_errormask = - ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask); - /* clear any interrupts up to this point (ints still not enabled) */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); - - dd->ipath_f_tidtemplate(dd); - - /* - * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing - * re-init, the simplest way to handle this is to free - * existing, and re-allocate. - * Need to re-create rest of port 0 portdata as well. - */ - pd = dd->ipath_pd[0]; - if (reinit) { - struct ipath_portdata *npd; - - /* - * Alloc and init new ipath_portdata for port0, - * Then free old pd. Could lead to fragmentation, but also - * makes later support for hot-swap easier. - */ - npd = create_portdata0(dd); - if (npd) { - ipath_free_pddata(dd, pd); - dd->ipath_pd[0] = npd; - pd = npd; - } else { - ipath_dev_err(dd, "Unable to allocate portdata" - " for port 0, failing\n"); - ret = -ENOMEM; - goto done; - } - } - ret = ipath_create_rcvhdrq(dd, pd); - if (!ret) - ret = create_port0_egr(dd); - if (ret) { - ipath_dev_err(dd, "failed to allocate kernel port's " - "rcvhdrq and/or egr bufs\n"); - goto done; - } - else - enable_chip(dd, reinit); - - /* after enable_chip, so pioavailshadow setup */ - ipath_chg_pioavailkernel(dd, 0, piobufs, 1); - - /* - * Cancel any possible active sends from early driver load. - * Follows early_init because some chips have to initialize - * PIO buffers in early_init to avoid false parity errors. - * After enable and ipath_chg_pioavailkernel so we can safely - * enable pioavail updates and PIOENABLE; packets are now - * ready to go out. - */ - ipath_cancel_sends(dd, 1); - - if (!reinit) { - /* - * Used when we close a port, for DMA already in flight - * at close. - */ - dd->ipath_dummy_hdrq = dma_alloc_coherent( - &dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size, - &dd->ipath_dummy_hdrq_phys, - gfp_flags); - if (!dd->ipath_dummy_hdrq) { - dev_info(&dd->pcidev->dev, - "Couldn't allocate 0x%lx bytes for dummy hdrq\n", - dd->ipath_pd[0]->port_rcvhdrq_size); - /* fallback to just 0'ing */ - dd->ipath_dummy_hdrq_phys = 0UL; - } - } - - /* - * cause retrigger of pending interrupts ignored during init, - * even if we had errors - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); - - if (!dd->ipath_stats_timer_active) { - /* - * first init, or after an admin disable/enable - * set up stats retrieval timer, even if we had errors - * in last portion of setup - */ - init_timer(&dd->ipath_stats_timer); - dd->ipath_stats_timer.function = ipath_get_faststats; - dd->ipath_stats_timer.data = (unsigned long) dd; - /* every 5 seconds; */ - dd->ipath_stats_timer.expires = jiffies + 5 * HZ; - /* takes ~16 seconds to overflow at full IB 4x bandwdith */ - add_timer(&dd->ipath_stats_timer); - dd->ipath_stats_timer_active = 1; - } - - /* Set up SendDMA if chip supports it */ - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - ret = setup_sdma(dd); - - /* Set up HoL state */ - init_timer(&dd->ipath_hol_timer); - dd->ipath_hol_timer.function = ipath_hol_event; - dd->ipath_hol_timer.data = (unsigned long)dd; - dd->ipath_hol_state = IPATH_HOL_UP; - -done: - if (!ret) { - *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT; - if (!dd->ipath_f_intrsetup(dd)) { - /* now we can enable all interrupts from the chip */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, - -1LL); - /* force re-interrupt of any pending interrupts. */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, - 0ULL); - /* chip is usable; mark it as initialized */ - *dd->ipath_statusp |= IPATH_STATUS_INITTED; - - /* - * setup to verify we get an interrupt, and fallback - * to an alternate if necessary and possible - */ - if (!reinit) { - init_timer(&dd->ipath_intrchk_timer); - dd->ipath_intrchk_timer.function = - verify_interrupt; - dd->ipath_intrchk_timer.data = - (unsigned long) dd; - } - dd->ipath_intrchk_timer.expires = jiffies + HZ/2; - add_timer(&dd->ipath_intrchk_timer); - } else - ipath_dev_err(dd, "No interrupts enabled, couldn't " - "setup interrupt address\n"); - - if (dd->ipath_cfgports > ipath_stats.sps_nports) - /* - * sps_nports is a global, so, we set it to - * the highest number of ports of any of the - * chips we find; we never decrement it, at - * least for now. Since this might have changed - * over disable/enable or prior to reset, always - * do the check and potentially adjust. - */ - ipath_stats.sps_nports = dd->ipath_cfgports; - } else - ipath_dbg("Failed (%d) to initialize chip\n", ret); - - /* if ret is non-zero, we probably should do some cleanup - here... */ - return ret; -} - -static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp) -{ - struct ipath_devdata *dd; - unsigned long flags; - unsigned short val; - int ret; - - ret = ipath_parse_ushort(str, &val); - - spin_lock_irqsave(&ipath_devs_lock, flags); - - if (ret < 0) - goto bail; - - if (val == 0) { - ret = -EINVAL; - goto bail; - } - - list_for_each_entry(dd, &ipath_dev_list, ipath_list) { - if (dd->ipath_kregbase) - continue; - if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - - (dd->ipath_cfgports * - IPATH_MIN_USER_PORT_BUFCNT))) - { - ipath_dev_err( - dd, - "Allocating %d PIO bufs for kernel leaves " - "too few for %d user ports (%d each)\n", - val, dd->ipath_cfgports - 1, - IPATH_MIN_USER_PORT_BUFCNT); - ret = -EINVAL; - goto bail; - } - dd->ipath_lastport_piobuf = - dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val; - } - - ipath_kpiobufs = val; - ret = 0; -bail: - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - return ret; -} diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c deleted file mode 100644 index 01ba79279..000000000 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ /dev/null @@ -1,1273 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/sched.h> - -#include "ipath_kernel.h" -#include "ipath_verbs.h" -#include "ipath_common.h" - - -/* - * Called when we might have an error that is specific to a particular - * PIO buffer, and may need to cancel that buffer, so it can be re-used. - */ -void ipath_disarm_senderrbufs(struct ipath_devdata *dd) -{ - u32 piobcnt; - unsigned long sbuf[4]; - /* - * it's possible that sendbuffererror could have bits set; might - * have already done this as a result of hardware error handling - */ - piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; - /* read these before writing errorclear */ - sbuf[0] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror); - sbuf[1] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 1); - if (piobcnt > 128) - sbuf[2] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 2); - if (piobcnt > 192) - sbuf[3] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 3); - else - sbuf[3] = 0; - - if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { - int i; - if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) && - time_after(dd->ipath_lastcancel, jiffies)) { - __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, - "SendbufErrs %lx %lx", sbuf[0], - sbuf[1]); - if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128) - printk(" %lx %lx ", sbuf[2], sbuf[3]); - printk("\n"); - } - - for (i = 0; i < piobcnt; i++) - if (test_bit(i, sbuf)) - ipath_disarm_piobufs(dd, i, 1); - /* ignore armlaunch errs for a bit */ - dd->ipath_lastcancel = jiffies+3; - } -} - - -/* These are all rcv-related errors which we want to count for stats */ -#define E_SUM_PKTERRS \ - (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \ - INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \ - INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \ - INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \ - INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \ - INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP) - -/* These are all send-related errors which we want to count for stats */ -#define E_SUM_ERRS \ - (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \ - INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ - INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \ - INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ - INFINIPATH_E_INVALIDADDR) - -/* - * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore - * errors not related to freeze and cancelling buffers. Can't ignore - * armlaunch because could get more while still cleaning up, and need - * to cancel those as they happen. - */ -#define E_SPKT_ERRS_IGNORE \ - (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ - INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \ - INFINIPATH_E_SPKTLEN) - -/* - * these are errors that can occur when the link changes state while - * a packet is being sent or received. This doesn't cover things - * like EBP or VCRC that can be the result of a sending having the - * link change state, so we receive a "known bad" packet. - */ -#define E_SUM_LINK_PKTERRS \ - (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ - INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ - INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \ - INFINIPATH_E_RUNEXPCHAR) - -static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) -{ - u64 ignore_this_time = 0; - - ipath_disarm_senderrbufs(dd); - if ((errs & E_SUM_LINK_PKTERRS) && - !(dd->ipath_flags & IPATH_LINKACTIVE)) { - /* - * This can happen when SMA is trying to bring the link - * up, but the IB link changes state at the "wrong" time. - * The IB logic then complains that the packet isn't - * valid. We don't want to confuse people, so we just - * don't print them, except at debug - */ - ipath_dbg("Ignoring packet errors %llx, because link not " - "ACTIVE\n", (unsigned long long) errs); - ignore_this_time = errs & E_SUM_LINK_PKTERRS; - } - - return ignore_this_time; -} - -/* generic hw error messages... */ -#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \ - { \ - .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \ - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \ - .msg = "TXE " #a " Memory Parity" \ - } -#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \ - { \ - .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \ - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \ - .msg = "RXE " #a " Memory Parity" \ - } - -static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = { - INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"), - INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"), - - INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF), - INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC), - INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO), - - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO), -}; - -/** - * ipath_format_hwmsg - format a single hwerror message - * @msg message buffer - * @msgl length of message buffer - * @hwmsg message to add to message buffer - */ -static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) -{ - strlcat(msg, "[", msgl); - strlcat(msg, hwmsg, msgl); - strlcat(msg, "]", msgl); -} - -/** - * ipath_format_hwerrors - format hardware error messages for display - * @hwerrs hardware errors bit vector - * @hwerrmsgs hardware error descriptions - * @nhwerrmsgs number of hwerrmsgs - * @msg message buffer - * @msgl message buffer length - */ -void ipath_format_hwerrors(u64 hwerrs, - const struct ipath_hwerror_msgs *hwerrmsgs, - size_t nhwerrmsgs, - char *msg, size_t msgl) -{ - int i; - const int glen = - ARRAY_SIZE(ipath_generic_hwerror_msgs); - - for (i=0; i<glen; i++) { - if (hwerrs & ipath_generic_hwerror_msgs[i].mask) { - ipath_format_hwmsg(msg, msgl, - ipath_generic_hwerror_msgs[i].msg); - } - } - - for (i=0; i<nhwerrmsgs; i++) { - if (hwerrs & hwerrmsgs[i].mask) { - ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg); - } - } -} - -/* return the strings for the most common link states */ -static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs) -{ - char *ret; - u32 state; - - state = ipath_ib_state(dd, ibcs); - if (state == dd->ib_init) - ret = "Init"; - else if (state == dd->ib_arm) - ret = "Arm"; - else if (state == dd->ib_active) - ret = "Active"; - else - ret = "Down"; - return ret; -} - -void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev) -{ - struct ib_event event; - - event.device = &dd->verbs_dev->ibdev; - event.element.port_num = 1; - event.event = ev; - ib_dispatch_event(&event); -} - -static void handle_e_ibstatuschanged(struct ipath_devdata *dd, - ipath_err_t errs) -{ - u32 ltstate, lstate, ibstate, lastlstate; - u32 init = dd->ib_init; - u32 arm = dd->ib_arm; - u32 active = dd->ib_active; - const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); - - lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */ - ibstate = ipath_ib_state(dd, ibcs); - /* linkstate at last interrupt */ - lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat); - ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */ - - /* - * Since going into a recovery state causes the link state to go - * down and since recovery is transitory, it is better if we "miss" - * ever seeing the link training state go into recovery (i.e., - * ignore this transition for link state special handling purposes) - * without even updating ipath_lastibcstat. - */ - if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) || - (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) || - (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE)) - goto done; - - /* - * if linkstate transitions into INIT from any of the various down - * states, or if it transitions from any of the up (INIT or better) - * states into any of the down states (except link recovery), then - * call the chip-specific code to take appropriate actions. - */ - if (lstate >= INFINIPATH_IBCS_L_STATE_INIT && - lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) { - /* transitioned to UP */ - if (dd->ipath_f_ib_updown(dd, 1, ibcs)) { - /* link came up, so we must no longer be disabled */ - dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; - ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n"); - goto skip_ibchange; /* chip-code handled */ - } - } else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT || - (dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) && - ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT && - ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) { - int handled; - handled = dd->ipath_f_ib_updown(dd, 0, ibcs); - dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY; - if (handled) { - ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n"); - goto skip_ibchange; /* chip-code handled */ - } - } - - /* - * Significant enough to always print and get into logs, if it was - * unexpected. If it was a requested state change, we'll have - * already cleared the flags, so we won't print this warning - */ - if ((ibstate != arm && ibstate != active) && - (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) { - dev_info(&dd->pcidev->dev, "Link state changed from %s " - "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ? - "ARM" : "ACTIVE", ib_linkstate(dd, ibcs)); - } - - if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE || - ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { - u32 lastlts; - lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat); - /* - * Ignore cycling back and forth from Polling.Active to - * Polling.Quiet while waiting for the other end of the link - * to come up, except to try and decide if we are connected - * to a live IB device or not. We will cycle back and - * forth between them if no cable is plugged in, the other - * device is powered off or disabled, etc. - */ - if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE || - lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { - if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) && - (++dd->ipath_ibpollcnt == 40)) { - dd->ipath_flags |= IPATH_NOCABLE; - *dd->ipath_statusp |= - IPATH_STATUS_IB_NOCABLE; - ipath_cdbg(LINKVERB, "Set NOCABLE\n"); - } - ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n", - ipath_ibcstatus_str[ltstate], ibstate); - goto skip_ibchange; - } - } - - dd->ipath_ibpollcnt = 0; /* not poll*, now */ - ipath_stats.sps_iblink++; - - if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) { - u64 linkrecov; - linkrecov = ipath_snap_cntr(dd, - dd->ipath_cregs->cr_iblinkerrrecovcnt); - if (linkrecov != dd->ipath_lastlinkrecov) { - ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n", - (unsigned long long) ibcs, - ib_linkstate(dd, ibcs), - ipath_ibcstatus_str[ltstate], - (unsigned long long) linkrecov); - /* and no more until active again */ - dd->ipath_lastlinkrecov = 0; - ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); - goto skip_ibchange; - } - } - - if (ibstate == init || ibstate == arm || ibstate == active) { - *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE; - if (ibstate == init || ibstate == arm) { - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - if (dd->ipath_flags & IPATH_LINKACTIVE) - signal_ib_event(dd, IB_EVENT_PORT_ERR); - } - if (ibstate == arm) { - dd->ipath_flags |= IPATH_LINKARMED; - dd->ipath_flags &= ~(IPATH_LINKUNK | - IPATH_LINKINIT | IPATH_LINKDOWN | - IPATH_LINKACTIVE | IPATH_NOCABLE); - ipath_hol_down(dd); - } else if (ibstate == init) { - /* - * set INIT and DOWN. Down is checked by - * most of the other code, but INIT is - * useful to know in a few places. - */ - dd->ipath_flags |= IPATH_LINKINIT | - IPATH_LINKDOWN; - dd->ipath_flags &= ~(IPATH_LINKUNK | - IPATH_LINKARMED | IPATH_LINKACTIVE | - IPATH_NOCABLE); - ipath_hol_down(dd); - } else { /* active */ - dd->ipath_lastlinkrecov = ipath_snap_cntr(dd, - dd->ipath_cregs->cr_iblinkerrrecovcnt); - *dd->ipath_statusp |= - IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; - dd->ipath_flags |= IPATH_LINKACTIVE; - dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT - | IPATH_LINKDOWN | IPATH_LINKARMED | - IPATH_NOCABLE); - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - ipath_restart_sdma(dd); - signal_ib_event(dd, IB_EVENT_PORT_ACTIVE); - /* LED active not handled in chip _f_updown */ - dd->ipath_f_setextled(dd, lstate, ltstate); - ipath_hol_up(dd); - } - - /* - * print after we've already done the work, so as not to - * delay the state changes and notifications, for debugging - */ - if (lstate == lastlstate) - ipath_cdbg(LINKVERB, "Unchanged from last: %s " - "(%x)\n", ib_linkstate(dd, ibcs), ibstate); - else - ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n", - dd->ipath_unit, ib_linkstate(dd, ibcs), - ipath_ibcstatus_str[ltstate], ibstate); - } else { /* down */ - if (dd->ipath_flags & IPATH_LINKACTIVE) - signal_ib_event(dd, IB_EVENT_PORT_ERR); - dd->ipath_flags |= IPATH_LINKDOWN; - dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT - | IPATH_LINKACTIVE | - IPATH_LINKARMED); - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - dd->ipath_lli_counter = 0; - - if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN) - ipath_cdbg(VERBOSE, "Unit %u link state down " - "(state 0x%x), from %s\n", - dd->ipath_unit, lstate, - ib_linkstate(dd, dd->ipath_lastibcstat)); - else - ipath_cdbg(LINKVERB, "Unit %u link state changed " - "to %s (0x%x) from down (%x)\n", - dd->ipath_unit, - ipath_ibcstatus_str[ltstate], - ibstate, lastlstate); - } - -skip_ibchange: - dd->ipath_lastibcstat = ibcs; -done: - return; -} - -static void handle_supp_msgs(struct ipath_devdata *dd, - unsigned supp_msgs, char *msg, u32 msgsz) -{ - /* - * Print the message unless it's ibc status change only, which - * happens so often we never want to count it. - */ - if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { - int iserr; - ipath_err_t mask; - iserr = ipath_decode_err(dd, msg, msgsz, - dd->ipath_lasterror & - ~INFINIPATH_E_IBSTATUSCHANGED); - - mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED; - - /* if we're in debug, then don't mask SDMADISABLED msgs */ - if (ipath_debug & __IPATH_DBG) - mask &= ~INFINIPATH_E_SDMADISABLED; - - if (dd->ipath_lasterror & ~mask) - ipath_dev_err(dd, "Suppressed %u messages for " - "fast-repeating errors (%s) (%llx)\n", - supp_msgs, msg, - (unsigned long long) - dd->ipath_lasterror); - else { - /* - * rcvegrfull and rcvhdrqfull are "normal", for some - * types of processes (mostly benchmarks) that send - * huge numbers of messages, while not processing - * them. So only complain about these at debug - * level. - */ - if (iserr) - ipath_dbg("Suppressed %u messages for %s\n", - supp_msgs, msg); - else - ipath_cdbg(ERRPKT, - "Suppressed %u messages for %s\n", - supp_msgs, msg); - } - } -} - -static unsigned handle_frequent_errors(struct ipath_devdata *dd, - ipath_err_t errs, char *msg, - u32 msgsz, int *noprint) -{ - unsigned long nc; - static unsigned long nextmsg_time; - static unsigned nmsgs, supp_msgs; - - /* - * Throttle back "fast" messages to no more than 10 per 5 seconds. - * This isn't perfect, but it's a reasonable heuristic. If we get - * more than 10, give a 6x longer delay. - */ - nc = jiffies; - if (nmsgs > 10) { - if (time_before(nc, nextmsg_time)) { - *noprint = 1; - if (!supp_msgs++) - nextmsg_time = nc + HZ * 3; - } - else if (supp_msgs) { - handle_supp_msgs(dd, supp_msgs, msg, msgsz); - supp_msgs = 0; - nmsgs = 0; - } - } - else if (!nmsgs++ || time_after(nc, nextmsg_time)) - nextmsg_time = nc + HZ / 2; - - return supp_msgs; -} - -static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs) -{ - unsigned long flags; - int expected; - - if (ipath_debug & __IPATH_DBG) { - char msg[128]; - ipath_decode_err(dd, msg, sizeof msg, errs & - INFINIPATH_E_SDMAERRS); - ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg); - } - if (ipath_debug & __IPATH_VERBDBG) { - unsigned long tl, hd, status, lengen; - tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail); - hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead); - status = ipath_read_kreg64(dd - , dd->ipath_kregs->kr_senddmastatus); - lengen = ipath_read_kreg64(dd, - dd->ipath_kregs->kr_senddmalengen); - ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx " - "lengen 0x%lx\n", tl, hd, status, lengen); - } - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); - expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (!expected) - ipath_cancel_sends(dd, 1); -} - -static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat) -{ - unsigned long flags; - int expected; - - if ((istat & INFINIPATH_I_SDMAINT) && - !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - ipath_sdma_intr(dd); - - if (istat & INFINIPATH_I_SDMADISABLED) { - expected = test_bit(IPATH_SDMA_ABORTING, - &dd->ipath_sdma_status); - ipath_dbg("%s SDmaDisabled intr\n", - expected ? "expected" : "unexpected"); - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (!expected) - ipath_cancel_sends(dd, 1); - if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - tasklet_hi_schedule(&dd->ipath_sdma_abort_task); - } -} - -static int handle_hdrq_full(struct ipath_devdata *dd) -{ - int chkerrpkts = 0; - u32 hd, tl; - u32 i; - - ipath_stats.sps_hdrqfull++; - for (i = 0; i < dd->ipath_cfgports; i++) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - - if (i == 0) { - /* - * For kernel receive queues, we just want to know - * if there are packets in the queue that we can - * process. - */ - if (pd->port_head != ipath_get_hdrqtail(pd)) - chkerrpkts |= 1 << i; - continue; - } - - /* Skip if user context is not open */ - if (!pd || !pd->port_cnt) - continue; - - /* Don't report the same point multiple times. */ - if (dd->ipath_flags & IPATH_NODMA_RTAIL) - tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i); - else - tl = ipath_get_rcvhdrtail(pd); - if (tl == pd->port_lastrcvhdrqtail) - continue; - - hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i); - if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) { - pd->port_lastrcvhdrqtail = tl; - pd->port_hdrqfull++; - /* flush hdrqfull so that poll() sees it */ - wmb(); - wake_up_interruptible(&pd->port_wait); - } - } - - return chkerrpkts; -} - -static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) -{ - char msg[128]; - u64 ignore_this_time = 0; - u64 iserr = 0; - int chkerrpkts = 0, noprint = 0; - unsigned supp_msgs; - int log_idx; - - /* - * don't report errors that are masked, either at init - * (not set in ipath_errormask), or temporarily (set in - * ipath_maskederrs) - */ - errs &= dd->ipath_errormask & ~dd->ipath_maskederrs; - - supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg, - &noprint); - - /* do these first, they are most important */ - if (errs & INFINIPATH_E_HARDWARE) { - /* reuse same msg buf */ - dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); - } else { - u64 mask; - for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) { - mask = dd->ipath_eep_st_masks[log_idx].errs_to_log; - if (errs & mask) - ipath_inc_eeprom_err(dd, log_idx, 1); - } - } - - if (errs & INFINIPATH_E_SDMAERRS) - handle_sdma_errors(dd, errs); - - if (!noprint && (errs & ~dd->ipath_e_bitsextant)) - ipath_dev_err(dd, "error interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (errs & ~dd->ipath_e_bitsextant)); - - if (errs & E_SUM_ERRS) - ignore_this_time = handle_e_sum_errs(dd, errs); - else if ((errs & E_SUM_LINK_PKTERRS) && - !(dd->ipath_flags & IPATH_LINKACTIVE)) { - /* - * This can happen when SMA is trying to bring the link - * up, but the IB link changes state at the "wrong" time. - * The IB logic then complains that the packet isn't - * valid. We don't want to confuse people, so we just - * don't print them, except at debug - */ - ipath_dbg("Ignoring packet errors %llx, because link not " - "ACTIVE\n", (unsigned long long) errs); - ignore_this_time = errs & E_SUM_LINK_PKTERRS; - } - - if (supp_msgs == 250000) { - int s_iserr; - /* - * It's not entirely reasonable assuming that the errors set - * in the last clear period are all responsible for the - * problem, but the alternative is to assume it's the only - * ones on this particular interrupt, which also isn't great - */ - dd->ipath_maskederrs |= dd->ipath_lasterror | errs; - - dd->ipath_errormask &= ~dd->ipath_maskederrs; - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); - s_iserr = ipath_decode_err(dd, msg, sizeof msg, - dd->ipath_maskederrs); - - if (dd->ipath_maskederrs & - ~(INFINIPATH_E_RRCVEGRFULL | - INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) - ipath_dev_err(dd, "Temporarily disabling " - "error(s) %llx reporting; too frequent (%s)\n", - (unsigned long long) dd->ipath_maskederrs, - msg); - else { - /* - * rcvegrfull and rcvhdrqfull are "normal", - * for some types of processes (mostly benchmarks) - * that send huge numbers of messages, while not - * processing them. So only complain about - * these at debug level. - */ - if (s_iserr) - ipath_dbg("Temporarily disabling reporting " - "too frequent queue full errors (%s)\n", - msg); - else - ipath_cdbg(ERRPKT, - "Temporarily disabling reporting too" - " frequent packet errors (%s)\n", - msg); - } - - /* - * Re-enable the masked errors after around 3 minutes. in - * ipath_get_faststats(). If we have a series of fast - * repeating but different errors, the interval will keep - * stretching out, but that's OK, as that's pretty - * catastrophic. - */ - dd->ipath_unmasktime = jiffies + HZ * 180; - } - - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs); - if (ignore_this_time) - errs &= ~ignore_this_time; - if (errs & ~dd->ipath_lasterror) { - errs &= ~dd->ipath_lasterror; - /* never suppress duplicate hwerrors or ibstatuschange */ - dd->ipath_lasterror |= errs & - ~(INFINIPATH_E_HARDWARE | - INFINIPATH_E_IBSTATUSCHANGED); - } - - if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) { - dd->ipath_spectriggerhit++; - ipath_dbg("%lu special trigger hits\n", - dd->ipath_spectriggerhit); - } - - /* likely due to cancel; so suppress message unless verbose */ - if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && - time_after(dd->ipath_lastcancel, jiffies)) { - /* armlaunch takes precedence; it often causes both. */ - ipath_cdbg(VERBOSE, - "Suppressed %s error (%llx) after sendbuf cancel\n", - (errs & INFINIPATH_E_SPIOARMLAUNCH) ? - "armlaunch" : "sendpktlen", (unsigned long long)errs); - errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN); - } - - if (!errs) - return 0; - - if (!noprint) { - ipath_err_t mask; - /* - * The ones we mask off are handled specially below - * or above. Also mask SDMADISABLED by default as it - * is too chatty. - */ - mask = INFINIPATH_E_IBSTATUSCHANGED | - INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED; - - /* if we're in debug, then don't mask SDMADISABLED msgs */ - if (ipath_debug & __IPATH_DBG) - mask &= ~INFINIPATH_E_SDMADISABLED; - - ipath_decode_err(dd, msg, sizeof msg, errs & ~mask); - } else - /* so we don't need if (!noprint) at strlcat's below */ - *msg = 0; - - if (errs & E_SUM_PKTERRS) { - ipath_stats.sps_pkterrs++; - chkerrpkts = 1; - } - if (errs & E_SUM_ERRS) - ipath_stats.sps_errs++; - - if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) { - ipath_stats.sps_crcerrs++; - chkerrpkts = 1; - } - iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS); - - - /* - * We don't want to print these two as they happen, or we can make - * the situation even worse, because it takes so long to print - * messages to serial consoles. Kernel ports get printed from - * fast_stats, no more than every 5 seconds, user ports get printed - * on close - */ - if (errs & INFINIPATH_E_RRCVHDRFULL) - chkerrpkts |= handle_hdrq_full(dd); - if (errs & INFINIPATH_E_RRCVEGRFULL) { - struct ipath_portdata *pd = dd->ipath_pd[0]; - - /* - * since this is of less importance and not likely to - * happen without also getting hdrfull, only count - * occurrences; don't check each port (or even the kernel - * vs user) - */ - ipath_stats.sps_etidfull++; - if (pd->port_head != ipath_get_hdrqtail(pd)) - chkerrpkts |= 1; - } - - /* - * do this before IBSTATUSCHANGED, in case both bits set in a single - * interrupt; we want the STATUSCHANGE to "win", so we do our - * internal copy of state machine correctly - */ - if (errs & INFINIPATH_E_RIBLOSTLINK) { - /* - * force through block below - */ - errs |= INFINIPATH_E_IBSTATUSCHANGED; - ipath_stats.sps_iblink++; - dd->ipath_flags |= IPATH_LINKDOWN; - dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT - | IPATH_LINKARMED | IPATH_LINKACTIVE); - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - - ipath_dbg("Lost link, link now down (%s)\n", - ipath_ibcstatus_str[ipath_read_kreg64(dd, - dd->ipath_kregs->kr_ibcstatus) & 0xf]); - } - if (errs & INFINIPATH_E_IBSTATUSCHANGED) - handle_e_ibstatuschanged(dd, errs); - - if (errs & INFINIPATH_E_RESET) { - if (!noprint) - ipath_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); - dd->ipath_flags &= ~IPATH_INITTED; /* needs re-init */ - /* mark as having had error */ - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF; - } - - if (!noprint && *msg) { - if (iserr) - ipath_dev_err(dd, "%s error\n", msg); - } - if (dd->ipath_state_wanted & dd->ipath_flags) { - ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " - "waking\n", dd->ipath_state_wanted, - dd->ipath_flags); - wake_up_interruptible(&ipath_state_wait); - } - - return chkerrpkts; -} - -/* - * try to cleanup as much as possible for anything that might have gone - * wrong while in freeze mode, such as pio buffers being written by user - * processes (causing armlaunch), send errors due to going into freeze mode, - * etc., and try to avoid causing extra interrupts while doing so. - * Forcibly update the in-memory pioavail register copies after cleanup - * because the chip won't do it while in freeze mode (the register values - * themselves are kept correct). - * Make sure that we don't lose any important interrupts by using the chip - * feature that says that writing 0 to a bit in *clear that is set in - * *status will cause an interrupt to be generated again (if allowed by - * the *mask value). - */ -void ipath_clear_freeze(struct ipath_devdata *dd) -{ - /* disable error interrupts, to avoid confusion */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); - - /* also disable interrupts; errormask is sometimes overwriten */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); - - ipath_cancel_sends(dd, 1); - - /* clear the freeze, and be sure chip saw it */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - - /* force in-memory update now we are out of freeze */ - ipath_force_pio_avail_update(dd); - - /* - * force new interrupt if any hwerr, error or interrupt bits are - * still set, and clear "safe" send packet errors related to freeze - * and cancelling sends. Re-enable error interrupts before possible - * force of re-interrupt on pending interrupts. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, - E_SPKT_ERRS_IGNORE); - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); -} - - -/* this is separate to allow for better optimization of ipath_intr() */ - -static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp) -{ - /* - * sometimes happen during driver init and unload, don't want - * to process any interrupts at that point - */ - - /* this is just a bandaid, not a fix, if something goes badly - * wrong */ - if (++*unexpectp > 100) { - if (++*unexpectp > 105) { - /* - * ok, we must be taking somebody else's interrupts, - * due to a messed up mptable and/or PIRQ table, so - * unregister the interrupt. We've seen this during - * linuxbios development work, and it may happen in - * the future again. - */ - if (dd->pcidev && dd->ipath_irq) { - ipath_dev_err(dd, "Now %u unexpected " - "interrupts, unregistering " - "interrupt handler\n", - *unexpectp); - ipath_dbg("free_irq of irq %d\n", - dd->ipath_irq); - dd->ipath_f_free_irq(dd); - } - } - if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) { - ipath_dev_err(dd, "%u unexpected interrupts, " - "disabling interrupts completely\n", - *unexpectp); - /* - * disable all interrupts, something is very wrong - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, - 0ULL); - } - } else if (*unexpectp > 1) - ipath_dbg("Interrupt when not ready, should not happen, " - "ignoring\n"); -} - -static noinline void ipath_bad_regread(struct ipath_devdata *dd) -{ - static int allbits; - - /* separate routine, for better optimization of ipath_intr() */ - - /* - * We print the message and disable interrupts, in hope of - * having a better chance of debugging the problem. - */ - ipath_dev_err(dd, - "Read of interrupt status failed (all bits set)\n"); - if (allbits++) { - /* disable all interrupts, something is very wrong */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); - if (allbits == 2) { - ipath_dev_err(dd, "Still bad interrupt status, " - "unregistering interrupt\n"); - dd->ipath_f_free_irq(dd); - } else if (allbits > 2) { - if ((allbits % 10000) == 0) - printk("."); - } else - ipath_dev_err(dd, "Disabling interrupts, " - "multiple errors\n"); - } -} - -static void handle_layer_pioavail(struct ipath_devdata *dd) -{ - unsigned long flags; - int ret; - - ret = ipath_ib_piobufavail(dd->verbs_dev); - if (ret > 0) - goto set; - - return; -set: - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); -} - -/* - * Handle receive interrupts for user ports; this means a user - * process was waiting for a packet to arrive, and didn't want - * to poll - */ -static void handle_urcv(struct ipath_devdata *dd, u64 istat) -{ - u64 portr; - int i; - int rcvdint = 0; - - /* - * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and - * test_and_clear_bit(IPATH_PORT_WAITING_URG) below - * would both like timely updates of the bits so that - * we don't pass them by unnecessarily. the rmb() - * here ensures that we see them promptly -- the - * corresponding wmb()'s are in ipath_poll_urgent() - * and ipath_poll_next()... - */ - rmb(); - portr = ((istat >> dd->ipath_i_rcvavail_shift) & - dd->ipath_i_rcvavail_mask) | - ((istat >> dd->ipath_i_rcvurg_shift) & - dd->ipath_i_rcvurg_mask); - for (i = 1; i < dd->ipath_cfgports; i++) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - - if (portr & (1 << i) && pd && pd->port_cnt) { - if (test_and_clear_bit(IPATH_PORT_WAITING_RCV, - &pd->port_flag)) { - clear_bit(i + dd->ipath_r_intravail_shift, - &dd->ipath_rcvctrl); - wake_up_interruptible(&pd->port_wait); - rcvdint = 1; - } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG, - &pd->port_flag)) { - pd->port_urgent++; - wake_up_interruptible(&pd->port_wait); - } - } - } - if (rcvdint) { - /* only want to take one interrupt, so turn off the rcv - * interrupt for all the ports that we set the rcv_waiting - * (but never for kernel port) - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - } -} - -irqreturn_t ipath_intr(int irq, void *data) -{ - struct ipath_devdata *dd = data; - u64 istat, chk0rcv = 0; - ipath_err_t estat = 0; - irqreturn_t ret; - static unsigned unexpected = 0; - u64 kportrbits; - - ipath_stats.sps_ints++; - - if (dd->ipath_int_counter != (u32) -1) - dd->ipath_int_counter++; - - if (!(dd->ipath_flags & IPATH_PRESENT)) { - /* - * This return value is not great, but we do not want the - * interrupt core code to remove our interrupt handler - * because we don't appear to be handling an interrupt - * during a chip reset. - */ - return IRQ_HANDLED; - } - - /* - * this needs to be flags&initted, not statusp, so we keep - * taking interrupts even after link goes down, etc. - * Also, we *must* clear the interrupt at some point, or we won't - * take it again, which can be real bad for errors, etc... - */ - - if (!(dd->ipath_flags & IPATH_INITTED)) { - ipath_bad_intr(dd, &unexpected); - ret = IRQ_NONE; - goto bail; - } - - istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus); - - if (unlikely(!istat)) { - ipath_stats.sps_nullintr++; - ret = IRQ_NONE; /* not our interrupt, or already handled */ - goto bail; - } - if (unlikely(istat == -1)) { - ipath_bad_regread(dd); - /* don't know if it was our interrupt or not */ - ret = IRQ_NONE; - goto bail; - } - - if (unexpected) - unexpected = 0; - - if (unlikely(istat & ~dd->ipath_i_bitsextant)) - ipath_dev_err(dd, - "interrupt with unknown interrupts %Lx set\n", - (unsigned long long) - istat & ~dd->ipath_i_bitsextant); - else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */ - ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", - (unsigned long long) istat); - - if (istat & INFINIPATH_I_ERROR) { - ipath_stats.sps_errints++; - estat = ipath_read_kreg64(dd, - dd->ipath_kregs->kr_errorstatus); - if (!estat) - dev_info(&dd->pcidev->dev, "error interrupt (%Lx), " - "but no error bits set!\n", - (unsigned long long) istat); - else if (estat == -1LL) - /* - * should we try clearing all, or hope next read - * works? - */ - ipath_dev_err(dd, "Read of error status failed " - "(all bits set); ignoring\n"); - else - chk0rcv |= handle_errors(dd, estat); - } - - if (istat & INFINIPATH_I_GPIO) { - /* - * GPIO interrupts fall in two broad classes: - * GPIO_2 indicates (on some HT4xx boards) that a packet - * has arrived for Port 0. Checking for this - * is controlled by flag IPATH_GPIO_INTR. - * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate - * errors that we need to count. Checking for this - * is controlled by flag IPATH_GPIO_ERRINTRS. - */ - u32 gpiostatus; - u32 to_clear = 0; - - gpiostatus = ipath_read_kreg32( - dd, dd->ipath_kregs->kr_gpio_status); - /* First the error-counter case. */ - if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) && - (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) { - /* want to clear the bits we see asserted. */ - to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK); - - /* - * Count appropriately, clear bits out of our copy, - * as they have been "handled". - */ - if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) { - ipath_dbg("FlowCtl on UnsupVL\n"); - dd->ipath_rxfc_unsupvl_errs++; - } - if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) { - ipath_dbg("Overrun Threshold exceeded\n"); - dd->ipath_overrun_thresh_errs++; - } - if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) { - ipath_dbg("Local Link Integrity error\n"); - dd->ipath_lli_errs++; - } - gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK; - } - /* Now the Port0 Receive case */ - if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) && - (dd->ipath_flags & IPATH_GPIO_INTR)) { - /* - * GPIO status bit 2 is set, and we expected it. - * clear it and indicate in p0bits. - * This probably only happens if a Port0 pkt - * arrives at _just_ the wrong time, and we - * handle that by seting chk0rcv; - */ - to_clear |= (1 << IPATH_GPIO_PORT0_BIT); - gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT); - chk0rcv = 1; - } - if (gpiostatus) { - /* - * Some unexpected bits remain. If they could have - * caused the interrupt, complain and clear. - * To avoid repetition of this condition, also clear - * the mask. It is almost certainly due to error. - */ - const u32 mask = (u32) dd->ipath_gpio_mask; - - if (mask & gpiostatus) { - ipath_dbg("Unexpected GPIO IRQ bits %x\n", - gpiostatus & mask); - to_clear |= (gpiostatus & mask); - dd->ipath_gpio_mask &= ~(gpiostatus & mask); - ipath_write_kreg(dd, - dd->ipath_kregs->kr_gpio_mask, - dd->ipath_gpio_mask); - } - } - if (to_clear) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - (u64) to_clear); - } - } - - /* - * Clear the interrupt bits we found set, unless they are receive - * related, in which case we already cleared them above, and don't - * want to clear them again, because we might lose an interrupt. - * Clear it early, so we "know" know the chip will have seen this by - * the time we process the queue, and will re-interrupt if necessary. - * The processor itself won't take the interrupt again until we return. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat); - - /* - * Handle kernel receive queues before checking for pio buffers - * available since receives can overflow; piobuf waiters can afford - * a few extra cycles, since they were waiting anyway, and user's - * waiting for receive are at the bottom. - */ - kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) | - (1ULL << dd->ipath_i_rcvurg_shift); - if (chk0rcv || (istat & kportrbits)) { - istat &= ~kportrbits; - ipath_kreceive(dd->ipath_pd[0]); - } - - if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) | - (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift))) - handle_urcv(dd, istat); - - if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED)) - handle_sdma_intr(dd, istat); - - if (istat & INFINIPATH_I_SPIOBUFAVAIL) { - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* always process; sdma verbs uses PIO for acks and VL15 */ - handle_layer_pioavail(dd); - } - - ret = IRQ_HANDLED; - -bail: - return ret; -} diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h deleted file mode 100644 index f0f947122..000000000 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ /dev/null @@ -1,1373 +0,0 @@ -#ifndef _IPATH_KERNEL_H -#define _IPATH_KERNEL_H -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * This header file is the base header file for infinipath kernel code - * ipath_user.h serves a similar purpose for user code. - */ - -#include <linux/interrupt.h> -#include <linux/pci.h> -#include <linux/dma-mapping.h> -#include <linux/mutex.h> -#include <linux/list.h> -#include <linux/scatterlist.h> -#include <asm/io.h> -#include <rdma/ib_verbs.h> - -#include "ipath_common.h" -#include "ipath_debug.h" -#include "ipath_registers.h" - -/* only s/w major version of InfiniPath we can handle */ -#define IPATH_CHIP_VERS_MAJ 2U - -/* don't care about this except printing */ -#define IPATH_CHIP_VERS_MIN 0U - -/* temporary, maybe always */ -extern struct infinipath_stats ipath_stats; - -#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ -/* - * First-cut critierion for "device is active" is - * two thousand dwords combined Tx, Rx traffic per - * 5-second interval. SMA packets are 64 dwords, - * and occur "a few per second", presumably each way. - */ -#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000) -/* - * Struct used to indicate which errors are logged in each of the - * error-counters that are logged to EEPROM. A counter is incremented - * _once_ (saturating at 255) for each event with any bits set in - * the error or hwerror register masks below. - */ -#define IPATH_EEP_LOG_CNT (4) -struct ipath_eep_log_mask { - u64 errs_to_log; - u64 hwerrs_to_log; -}; - -struct ipath_portdata { - void **port_rcvegrbuf; - dma_addr_t *port_rcvegrbuf_phys; - /* rcvhdrq base, needs mmap before useful */ - void *port_rcvhdrq; - /* kernel virtual address where hdrqtail is updated */ - void *port_rcvhdrtail_kvaddr; - /* - * temp buffer for expected send setup, allocated at open, instead - * of each setup call - */ - void *port_tid_pg_list; - /* when waiting for rcv or pioavail */ - wait_queue_head_t port_wait; - /* - * rcvegr bufs base, physical, must fit - * in 44 bits so 32 bit programs mmap64 44 bit works) - */ - dma_addr_t port_rcvegr_phys; - /* mmap of hdrq, must fit in 44 bits */ - dma_addr_t port_rcvhdrq_phys; - dma_addr_t port_rcvhdrqtailaddr_phys; - /* - * number of opens (including slave subports) on this instance - * (ignoring forks, dup, etc. for now) - */ - int port_cnt; - /* - * how much space to leave at start of eager TID entries for - * protocol use, on each TID - */ - /* instead of calculating it */ - unsigned port_port; - /* non-zero if port is being shared. */ - u16 port_subport_cnt; - /* non-zero if port is being shared. */ - u16 port_subport_id; - /* number of pio bufs for this port (all procs, if shared) */ - u32 port_piocnt; - /* first pio buffer for this port */ - u32 port_pio_base; - /* chip offset of PIO buffers for this port */ - u32 port_piobufs; - /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ - u32 port_rcvegrbuf_chunks; - /* how many egrbufs per chunk */ - u32 port_rcvegrbufs_perchunk; - /* order for port_rcvegrbuf_pages */ - size_t port_rcvegrbuf_size; - /* rcvhdrq size (for freeing) */ - size_t port_rcvhdrq_size; - /* next expected TID to check when looking for free */ - u32 port_tidcursor; - /* next expected TID to check */ - unsigned long port_flag; - /* what happened */ - unsigned long int_flag; - /* WAIT_RCV that timed out, no interrupt */ - u32 port_rcvwait_to; - /* WAIT_PIO that timed out, no interrupt */ - u32 port_piowait_to; - /* WAIT_RCV already happened, no wait */ - u32 port_rcvnowait; - /* WAIT_PIO already happened, no wait */ - u32 port_pionowait; - /* total number of rcvhdrqfull errors */ - u32 port_hdrqfull; - /* - * Used to suppress multiple instances of same - * port staying stuck at same point. - */ - u32 port_lastrcvhdrqtail; - /* saved total number of rcvhdrqfull errors for poll edge trigger */ - u32 port_hdrqfull_poll; - /* total number of polled urgent packets */ - u32 port_urgent; - /* saved total number of polled urgent packets for poll edge trigger */ - u32 port_urgent_poll; - /* pid of process using this port */ - struct pid *port_pid; - struct pid *port_subpid[INFINIPATH_MAX_SUBPORT]; - /* same size as task_struct .comm[] */ - char port_comm[16]; - /* pkeys set by this use of this port */ - u16 port_pkeys[4]; - /* so file ops can get at unit */ - struct ipath_devdata *port_dd; - /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ - void *subport_uregbase; - /* An array of pages for the eager receive buffers * N */ - void *subport_rcvegrbuf; - /* An array of pages for the eager header queue entries * N */ - void *subport_rcvhdr_base; - /* The version of the library which opened this port */ - u32 userversion; - /* Bitmask of active slaves */ - u32 active_slaves; - /* Type of packets or conditions we want to poll for */ - u16 poll_type; - /* port rcvhdrq head offset */ - u32 port_head; - /* receive packet sequence counter */ - u32 port_seq_cnt; -}; - -struct sk_buff; -struct ipath_sge_state; -struct ipath_verbs_txreq; - -/* - * control information for layered drivers - */ -struct _ipath_layer { - void *l_arg; -}; - -struct ipath_skbinfo { - struct sk_buff *skb; - dma_addr_t phys; -}; - -struct ipath_sdma_txreq { - int flags; - int sg_count; - union { - struct scatterlist *sg; - void *map_addr; - }; - void (*callback)(void *, int); - void *callback_cookie; - int callback_status; - u16 start_idx; /* sdma private */ - u16 next_descq_idx; /* sdma private */ - struct list_head list; /* sdma private */ -}; - -struct ipath_sdma_desc { - __le64 qw[2]; -}; - -#define IPATH_SDMA_TXREQ_F_USELARGEBUF 0x1 -#define IPATH_SDMA_TXREQ_F_HEADTOHOST 0x2 -#define IPATH_SDMA_TXREQ_F_INTREQ 0x4 -#define IPATH_SDMA_TXREQ_F_FREEBUF 0x8 -#define IPATH_SDMA_TXREQ_F_FREEDESC 0x10 -#define IPATH_SDMA_TXREQ_F_VL15 0x20 - -#define IPATH_SDMA_TXREQ_S_OK 0 -#define IPATH_SDMA_TXREQ_S_SENDERROR 1 -#define IPATH_SDMA_TXREQ_S_ABORTED 2 -#define IPATH_SDMA_TXREQ_S_SHUTDOWN 3 - -#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG (1ull << 63) -#define IPATH_SDMA_STATUS_ABORT_IN_PROG (1ull << 62) -#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE (1ull << 61) -#define IPATH_SDMA_STATUS_SCB_EMPTY (1ull << 30) - -/* max dwords in small buffer packet */ -#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2) - -/* - * Possible IB config parameters for ipath_f_get/set_ib_cfg() - */ -#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */ -#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */ -#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */ -#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */ -#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */ -#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */ -#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */ -#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */ -#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */ -#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */ -#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */ - - -struct ipath_devdata { - struct list_head ipath_list; - - struct ipath_kregs const *ipath_kregs; - struct ipath_cregs const *ipath_cregs; - - /* mem-mapped pointer to base of chip regs */ - u64 __iomem *ipath_kregbase; - /* end of mem-mapped chip space; range checking */ - u64 __iomem *ipath_kregend; - /* physical address of chip for io_remap, etc. */ - unsigned long ipath_physaddr; - /* base of memory alloced for ipath_kregbase, for free */ - u64 *ipath_kregalloc; - /* ipath_cfgports pointers */ - struct ipath_portdata **ipath_pd; - /* sk_buffs used by port 0 eager receive queue */ - struct ipath_skbinfo *ipath_port0_skbinfo; - /* kvirt address of 1st 2k pio buffer */ - void __iomem *ipath_pio2kbase; - /* kvirt address of 1st 4k pio buffer */ - void __iomem *ipath_pio4kbase; - /* - * points to area where PIOavail registers will be DMA'ed. - * Has to be on a page of it's own, because the page will be - * mapped into user program space. This copy is *ONLY* ever - * written by DMA, not by the driver! Need a copy per device - * when we get to multiple devices - */ - volatile __le64 *ipath_pioavailregs_dma; - /* physical address where updates occur */ - dma_addr_t ipath_pioavailregs_phys; - struct _ipath_layer ipath_layer; - /* setup intr */ - int (*ipath_f_intrsetup)(struct ipath_devdata *); - /* fallback to alternate interrupt type if possible */ - int (*ipath_f_intr_fallback)(struct ipath_devdata *); - /* setup on-chip bus config */ - int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *); - /* hard reset chip */ - int (*ipath_f_reset)(struct ipath_devdata *); - int (*ipath_f_get_boardname)(struct ipath_devdata *, char *, - size_t); - void (*ipath_f_init_hwerrors)(struct ipath_devdata *); - void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *, - size_t); - void (*ipath_f_quiet_serdes)(struct ipath_devdata *); - int (*ipath_f_bringup_serdes)(struct ipath_devdata *); - int (*ipath_f_early_init)(struct ipath_devdata *); - void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned); - void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*, - u32, unsigned long); - void (*ipath_f_tidtemplate)(struct ipath_devdata *); - void (*ipath_f_cleanup)(struct ipath_devdata *); - void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64); - /* fill out chip-specific fields */ - int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); - /* free irq */ - void (*ipath_f_free_irq)(struct ipath_devdata *); - struct ipath_message_header *(*ipath_f_get_msgheader) - (struct ipath_devdata *, __le32 *); - void (*ipath_f_config_ports)(struct ipath_devdata *, ushort); - int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int); - int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32); - void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16); - void (*ipath_f_read_counters)(struct ipath_devdata *, - struct infinipath_counters *); - void (*ipath_f_xgxs_reset)(struct ipath_devdata *); - /* per chip actions needed for IB Link up/down changes */ - int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64); - - unsigned ipath_lastegr_idx; - struct ipath_ibdev *verbs_dev; - struct timer_list verbs_timer; - /* total dwords sent (summed from counter) */ - u64 ipath_sword; - /* total dwords rcvd (summed from counter) */ - u64 ipath_rword; - /* total packets sent (summed from counter) */ - u64 ipath_spkts; - /* total packets rcvd (summed from counter) */ - u64 ipath_rpkts; - /* ipath_statusp initially points to this. */ - u64 _ipath_status; - /* GUID for this interface, in network order */ - __be64 ipath_guid; - /* - * aggregrate of error bits reported since last cleared, for - * limiting of error reporting - */ - ipath_err_t ipath_lasterror; - /* - * aggregrate of error bits reported since last cleared, for - * limiting of hwerror reporting - */ - ipath_err_t ipath_lasthwerror; - /* errors masked because they occur too fast */ - ipath_err_t ipath_maskederrs; - u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */ - /* these 5 fields are used to establish deltas for IB Symbol - * errors and linkrecovery errors. They can be reported on - * some chips during link negotiation prior to INIT, and with - * DDR when faking DDR negotiations with non-IBTA switches. - * The chip counters are adjusted at driver unload if there is - * a non-zero delta. - */ - u64 ibdeltainprog; - u64 ibsymdelta; - u64 ibsymsnap; - u64 iblnkerrdelta; - u64 iblnkerrsnap; - - /* time in jiffies at which to re-enable maskederrs */ - unsigned long ipath_unmasktime; - /* count of egrfull errors, combined for all ports */ - u64 ipath_last_tidfull; - /* for ipath_qcheck() */ - u64 ipath_lastport0rcv_cnt; - /* template for writing TIDs */ - u64 ipath_tidtemplate; - /* value to write to free TIDs */ - u64 ipath_tidinvalid; - /* IBA6120 rcv interrupt setup */ - u64 ipath_rhdrhead_intr_off; - - /* size of memory at ipath_kregbase */ - u32 ipath_kregsize; - /* number of registers used for pioavail */ - u32 ipath_pioavregs; - /* IPATH_POLL, etc. */ - u32 ipath_flags; - /* ipath_flags driver is waiting for */ - u32 ipath_state_wanted; - /* last buffer for user use, first buf for kernel use is this - * index. */ - u32 ipath_lastport_piobuf; - /* is a stats timer active */ - u32 ipath_stats_timer_active; - /* number of interrupts for this device -- saturates... */ - u32 ipath_int_counter; - /* dwords sent read from counter */ - u32 ipath_lastsword; - /* dwords received read from counter */ - u32 ipath_lastrword; - /* sent packets read from counter */ - u32 ipath_lastspkts; - /* received packets read from counter */ - u32 ipath_lastrpkts; - /* pio bufs allocated per port */ - u32 ipath_pbufsport; - /* if remainder on bufs/port, ports < extrabuf get 1 extra */ - u32 ipath_ports_extrabuf; - u32 ipath_pioupd_thresh; /* update threshold, some chips */ - /* - * number of ports configured as max; zero is set to number chip - * supports, less gives more pio bufs/port, etc. - */ - u32 ipath_cfgports; - /* count of port 0 hdrqfull errors */ - u32 ipath_p0_hdrqfull; - /* port 0 number of receive eager buffers */ - u32 ipath_p0_rcvegrcnt; - - /* - * index of last piobuffer we used. Speeds up searching, by - * starting at this point. Doesn't matter if multiple cpu's use and - * update, last updater is only write that matters. Whenever it - * wraps, we update shadow copies. Need a copy per device when we - * get to multiple devices - */ - u32 ipath_lastpioindex; - u32 ipath_lastpioindexl; - /* max length of freezemsg */ - u32 ipath_freezelen; - /* - * consecutive times we wanted a PIO buffer but were unable to - * get one - */ - u32 ipath_consec_nopiobuf; - /* - * hint that we should update ipath_pioavailshadow before - * looking for a PIO buffer - */ - u32 ipath_upd_pio_shadow; - /* so we can rewrite it after a chip reset */ - u32 ipath_pcibar0; - /* so we can rewrite it after a chip reset */ - u32 ipath_pcibar1; - u32 ipath_x1_fix_tries; - u32 ipath_autoneg_tries; - u32 serdes_first_init_done; - - struct ipath_relock { - atomic_t ipath_relock_timer_active; - struct timer_list ipath_relock_timer; - unsigned int ipath_relock_interval; /* in jiffies */ - } ipath_relock_singleton; - - /* interrupt number */ - int ipath_irq; - /* HT/PCI Vendor ID (here for NodeInfo) */ - u16 ipath_vendorid; - /* HT/PCI Device ID (here for NodeInfo) */ - u16 ipath_deviceid; - /* offset in HT config space of slave/primary interface block */ - u8 ipath_ht_slave_off; - /* for write combining settings */ - int wc_cookie; - /* ref count for each pkey */ - atomic_t ipath_pkeyrefs[4]; - /* shadow copy of struct page *'s for exp tid pages */ - struct page **ipath_pageshadow; - /* shadow copy of dma handles for exp tid pages */ - dma_addr_t *ipath_physshadow; - u64 __iomem *ipath_egrtidbase; - /* lock to workaround chip bug 9437 and others */ - spinlock_t ipath_kernel_tid_lock; - spinlock_t ipath_user_tid_lock; - spinlock_t ipath_sendctrl_lock; - /* around ipath_pd and (user ports) port_cnt use (intr vs free) */ - spinlock_t ipath_uctxt_lock; - - /* - * IPATH_STATUS_*, - * this address is mapped readonly into user processes so they can - * get status cheaply, whenever they want. - */ - u64 *ipath_statusp; - /* freeze msg if hw error put chip in freeze */ - char *ipath_freezemsg; - /* pci access data structure */ - struct pci_dev *pcidev; - struct cdev *user_cdev; - struct cdev *diag_cdev; - struct device *user_dev; - struct device *diag_dev; - /* timer used to prevent stats overflow, error throttling, etc. */ - struct timer_list ipath_stats_timer; - /* timer to verify interrupts work, and fallback if possible */ - struct timer_list ipath_intrchk_timer; - void *ipath_dummy_hdrq; /* used after port close */ - dma_addr_t ipath_dummy_hdrq_phys; - - /* SendDMA related entries */ - spinlock_t ipath_sdma_lock; - unsigned long ipath_sdma_status; - unsigned long ipath_sdma_abort_jiffies; - unsigned long ipath_sdma_abort_intr_timeout; - unsigned long ipath_sdma_buf_jiffies; - struct ipath_sdma_desc *ipath_sdma_descq; - u64 ipath_sdma_descq_added; - u64 ipath_sdma_descq_removed; - int ipath_sdma_desc_nreserved; - u16 ipath_sdma_descq_cnt; - u16 ipath_sdma_descq_tail; - u16 ipath_sdma_descq_head; - u16 ipath_sdma_next_intr; - u16 ipath_sdma_reset_wait; - u8 ipath_sdma_generation; - struct tasklet_struct ipath_sdma_abort_task; - struct tasklet_struct ipath_sdma_notify_task; - struct list_head ipath_sdma_activelist; - struct list_head ipath_sdma_notifylist; - atomic_t ipath_sdma_vl15_count; - struct timer_list ipath_sdma_vl15_timer; - - dma_addr_t ipath_sdma_descq_phys; - volatile __le64 *ipath_sdma_head_dma; - dma_addr_t ipath_sdma_head_phys; - - unsigned long ipath_ureg_align; /* user register alignment */ - - struct delayed_work ipath_autoneg_work; - wait_queue_head_t ipath_autoneg_wait; - - /* HoL blocking / user app forward-progress state */ - unsigned ipath_hol_state; - unsigned ipath_hol_next; - struct timer_list ipath_hol_timer; - - /* - * Shadow copies of registers; size indicates read access size. - * Most of them are readonly, but some are write-only register, - * where we manipulate the bits in the shadow copy, and then write - * the shadow copy to infinipath. - * - * We deliberately make most of these 32 bits, since they have - * restricted range. For any that we read, we won't to generate 32 - * bit accesses, since Opteron will generate 2 separate 32 bit HT - * transactions for a 64 bit read, and we want to avoid unnecessary - * HT transactions. - */ - - /* This is the 64 bit group */ - - /* - * shadow of pioavail, check to be sure it's large enough at - * init time. - */ - unsigned long ipath_pioavailshadow[8]; - /* bitmap of send buffers available for the kernel to use with PIO. */ - unsigned long ipath_pioavailkernel[8]; - /* shadow of kr_gpio_out, for rmw ops */ - u64 ipath_gpio_out; - /* shadow the gpio mask register */ - u64 ipath_gpio_mask; - /* shadow the gpio output enable, etc... */ - u64 ipath_extctrl; - /* kr_revision shadow */ - u64 ipath_revision; - /* - * shadow of ibcctrl, for interrupt handling of link changes, - * etc. - */ - u64 ipath_ibcctrl; - /* - * last ibcstatus, to suppress "duplicate" status change messages, - * mostly from 2 to 3 - */ - u64 ipath_lastibcstat; - /* hwerrmask shadow */ - ipath_err_t ipath_hwerrmask; - ipath_err_t ipath_errormask; /* errormask shadow */ - /* interrupt config reg shadow */ - u64 ipath_intconfig; - /* kr_sendpiobufbase value */ - u64 ipath_piobufbase; - /* kr_ibcddrctrl shadow */ - u64 ipath_ibcddrctrl; - - /* these are the "32 bit" regs */ - - /* - * number of GUIDs in the flash for this interface; may need some - * rethinking for setting on other ifaces - */ - u32 ipath_nguid; - /* - * the following two are 32-bit bitmasks, but {test,clear,set}_bit - * all expect bit fields to be "unsigned long" - */ - /* shadow kr_rcvctrl */ - unsigned long ipath_rcvctrl; - /* shadow kr_sendctrl */ - unsigned long ipath_sendctrl; - /* to not count armlaunch after cancel */ - unsigned long ipath_lastcancel; - /* count cases where special trigger was needed (double write) */ - unsigned long ipath_spectriggerhit; - - /* value we put in kr_rcvhdrcnt */ - u32 ipath_rcvhdrcnt; - /* value we put in kr_rcvhdrsize */ - u32 ipath_rcvhdrsize; - /* value we put in kr_rcvhdrentsize */ - u32 ipath_rcvhdrentsize; - /* offset of last entry in rcvhdrq */ - u32 ipath_hdrqlast; - /* kr_portcnt value */ - u32 ipath_portcnt; - /* kr_pagealign value */ - u32 ipath_palign; - /* number of "2KB" PIO buffers */ - u32 ipath_piobcnt2k; - /* size in bytes of "2KB" PIO buffers */ - u32 ipath_piosize2k; - /* number of "4KB" PIO buffers */ - u32 ipath_piobcnt4k; - /* size in bytes of "4KB" PIO buffers */ - u32 ipath_piosize4k; - u32 ipath_pioreserved; /* reserved special-inkernel; */ - /* kr_rcvegrbase value */ - u32 ipath_rcvegrbase; - /* kr_rcvegrcnt value */ - u32 ipath_rcvegrcnt; - /* kr_rcvtidbase value */ - u32 ipath_rcvtidbase; - /* kr_rcvtidcnt value */ - u32 ipath_rcvtidcnt; - /* kr_sendregbase */ - u32 ipath_sregbase; - /* kr_userregbase */ - u32 ipath_uregbase; - /* kr_counterregbase */ - u32 ipath_cregbase; - /* shadow the control register contents */ - u32 ipath_control; - /* PCI revision register (HTC rev on FPGA) */ - u32 ipath_pcirev; - - /* chip address space used by 4k pio buffers */ - u32 ipath_4kalign; - /* The MTU programmed for this unit */ - u32 ipath_ibmtu; - /* - * The max size IB packet, included IB headers that we can send. - * Starts same as ipath_piosize, but is affected when ibmtu is - * changed, or by size of eager buffers - */ - u32 ipath_ibmaxlen; - /* - * ibmaxlen at init time, limited by chip and by receive buffer - * size. Not changed after init. - */ - u32 ipath_init_ibmaxlen; - /* size of each rcvegrbuffer */ - u32 ipath_rcvegrbufsize; - /* localbus width (1, 2,4,8,16,32) from config space */ - u32 ipath_lbus_width; - /* localbus speed (HT: 200,400,800,1000; PCIe 2500) */ - u32 ipath_lbus_speed; - /* - * number of sequential ibcstatus change for polling active/quiet - * (i.e., link not coming up). - */ - u32 ipath_ibpollcnt; - /* low and high portions of MSI capability/vector */ - u32 ipath_msi_lo; - /* saved after PCIe init for restore after reset */ - u32 ipath_msi_hi; - /* MSI data (vector) saved for restore */ - u16 ipath_msi_data; - /* MLID programmed for this instance */ - u16 ipath_mlid; - /* LID programmed for this instance */ - u16 ipath_lid; - /* list of pkeys programmed; 0 if not set */ - u16 ipath_pkeys[4]; - /* - * ASCII serial number, from flash, large enough for original - * all digit strings, and longer QLogic serial number format - */ - u8 ipath_serial[16]; - /* human readable board version */ - u8 ipath_boardversion[96]; - u8 ipath_lbus_info[32]; /* human readable localbus info */ - /* chip major rev, from ipath_revision */ - u8 ipath_majrev; - /* chip minor rev, from ipath_revision */ - u8 ipath_minrev; - /* board rev, from ipath_revision */ - u8 ipath_boardrev; - /* saved for restore after reset */ - u8 ipath_pci_cacheline; - /* LID mask control */ - u8 ipath_lmc; - /* link width supported */ - u8 ipath_link_width_supported; - /* link speed supported */ - u8 ipath_link_speed_supported; - u8 ipath_link_width_enabled; - u8 ipath_link_speed_enabled; - u8 ipath_link_width_active; - u8 ipath_link_speed_active; - /* Rx Polarity inversion (compensate for ~tx on partner) */ - u8 ipath_rx_pol_inv; - - u8 ipath_r_portenable_shift; - u8 ipath_r_intravail_shift; - u8 ipath_r_tailupd_shift; - u8 ipath_r_portcfg_shift; - - /* unit # of this chip, if present */ - int ipath_unit; - - /* local link integrity counter */ - u32 ipath_lli_counter; - /* local link integrity errors */ - u32 ipath_lli_errors; - /* - * Above counts only cases where _successive_ LocalLinkIntegrity - * errors were seen in the receive headers of kern-packets. - * Below are the three (monotonically increasing) counters - * maintained via GPIO interrupts on iba6120-rev2. - */ - u32 ipath_rxfc_unsupvl_errs; - u32 ipath_overrun_thresh_errs; - u32 ipath_lli_errs; - - /* - * Not all devices managed by a driver instance are the same - * type, so these fields must be per-device. - */ - u64 ipath_i_bitsextant; - ipath_err_t ipath_e_bitsextant; - ipath_err_t ipath_hwe_bitsextant; - - /* - * Below should be computable from number of ports, - * since they are never modified. - */ - u64 ipath_i_rcvavail_mask; - u64 ipath_i_rcvurg_mask; - u16 ipath_i_rcvurg_shift; - u16 ipath_i_rcvavail_shift; - - /* - * Register bits for selecting i2c direction and values, used for - * I2C serial flash. - */ - u8 ipath_gpio_sda_num; - u8 ipath_gpio_scl_num; - u8 ipath_i2c_chain_type; - u64 ipath_gpio_sda; - u64 ipath_gpio_scl; - - /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */ - spinlock_t ipath_gpio_lock; - - /* - * IB link and linktraining states and masks that vary per chip in - * some way. Set at init, to avoid each IB status change interrupt - */ - u8 ibcs_ls_shift; - u8 ibcs_lts_mask; - u32 ibcs_mask; - u32 ib_init; - u32 ib_arm; - u32 ib_active; - - u16 ipath_rhf_offset; /* offset of RHF within receive header entry */ - - /* - * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol - * reg. Changes for IBA7220 - */ - u8 ibcc_lic_mask; /* LinkInitCmd */ - u8 ibcc_lc_shift; /* LinkCmd */ - u8 ibcc_mpl_shift; /* Maxpktlen */ - - u8 delay_mult; - - /* used to override LED behavior */ - u8 ipath_led_override; /* Substituted for normal value, if non-zero */ - u16 ipath_led_override_timeoff; /* delta to next timer event */ - u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */ - u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */ - atomic_t ipath_led_override_timer_active; - /* Used to flash LEDs in override mode */ - struct timer_list ipath_led_override_timer; - - /* Support (including locks) for EEPROM logging of errors and time */ - /* control access to actual counters, timer */ - spinlock_t ipath_eep_st_lock; - /* control high-level access to EEPROM */ - struct mutex ipath_eep_lock; - /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */ - uint64_t ipath_traffic_wds; - /* active time is kept in seconds, but logged in hours */ - atomic_t ipath_active_time; - /* Below are nominal shadow of EEPROM, new since last EEPROM update */ - uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT]; - uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT]; - uint16_t ipath_eep_hrs; - /* - * masks for which bits of errs, hwerrs that cause - * each of the counters to increment. - */ - struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT]; - - /* interrupt mitigation reload register info */ - u16 ipath_jint_idle_ticks; /* idle clock ticks */ - u16 ipath_jint_max_packets; /* max packets across all ports */ - - /* - * lock for access to SerDes, and flags to sequence preset - * versus steady-state. 7220-only at the moment. - */ - spinlock_t ipath_sdepb_lock; - u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */ -}; - -/* ipath_hol_state values (stopping/starting user proc, send flushing) */ -#define IPATH_HOL_UP 0 -#define IPATH_HOL_DOWN 1 -/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */ -#define IPATH_HOL_DOWNSTOP 0 -#define IPATH_HOL_DOWNCONT 1 - -/* bit positions for sdma_status */ -#define IPATH_SDMA_ABORTING 0 -#define IPATH_SDMA_DISARMED 1 -#define IPATH_SDMA_DISABLED 2 -#define IPATH_SDMA_LAYERBUF 3 -#define IPATH_SDMA_RUNNING 30 -#define IPATH_SDMA_SHUTDOWN 31 - -/* bit combinations that correspond to abort states */ -#define IPATH_SDMA_ABORT_NONE 0 -#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING) -#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \ - (1UL << IPATH_SDMA_DISARMED)) -#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \ - (1UL << IPATH_SDMA_DISABLED)) -#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \ - (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED)) -#define IPATH_SDMA_ABORT_MASK ((1UL<<IPATH_SDMA_ABORTING) | \ - (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED)) - -#define IPATH_SDMA_BUF_NONE 0 -#define IPATH_SDMA_BUF_MASK (1UL<<IPATH_SDMA_LAYERBUF) - -/* Private data for file operations */ -struct ipath_filedata { - struct ipath_portdata *pd; - unsigned subport; - unsigned tidcursor; - struct ipath_user_sdma_queue *pq; -}; -extern struct list_head ipath_dev_list; -extern spinlock_t ipath_devs_lock; -extern struct ipath_devdata *ipath_lookup(int unit); - -int ipath_init_chip(struct ipath_devdata *, int); -int ipath_enable_wc(struct ipath_devdata *dd); -void ipath_disable_wc(struct ipath_devdata *dd); -int ipath_count_units(int *npresentp, int *nupp, int *maxportsp); -void ipath_shutdown_device(struct ipath_devdata *); -void ipath_clear_freeze(struct ipath_devdata *); - -struct file_operations; -int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, - struct cdev **cdevp, struct device **devp); -void ipath_cdev_cleanup(struct cdev **cdevp, - struct device **devp); - -int ipath_diag_add(struct ipath_devdata *); -void ipath_diag_remove(struct ipath_devdata *); - -extern wait_queue_head_t ipath_state_wait; - -int ipath_user_add(struct ipath_devdata *dd); -void ipath_user_remove(struct ipath_devdata *dd); - -struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t); - -extern int ipath_diag_inuse; - -irqreturn_t ipath_intr(int irq, void *devid); -int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, - ipath_err_t err); -#if __IPATH_INFO || __IPATH_DBG -extern const char *ipath_ibcstatus_str[]; -#endif - -/* clean up any per-chip chip-specific stuff */ -void ipath_chip_cleanup(struct ipath_devdata *); -/* clean up any chip type-specific stuff */ -void ipath_chip_done(void); - -void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first, - unsigned cnt); -void ipath_cancel_sends(struct ipath_devdata *, int); - -int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *); -void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *); - -int ipath_parse_ushort(const char *str, unsigned short *valp); - -void ipath_kreceive(struct ipath_portdata *); -int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); -int ipath_reset_device(int); -void ipath_get_faststats(unsigned long); -int ipath_wait_linkstate(struct ipath_devdata *, u32, int); -int ipath_set_linkstate(struct ipath_devdata *, u8); -int ipath_set_mtu(struct ipath_devdata *, u16); -int ipath_set_lid(struct ipath_devdata *, u32, u8); -int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); -void ipath_enable_armlaunch(struct ipath_devdata *); -void ipath_disable_armlaunch(struct ipath_devdata *); -void ipath_hol_down(struct ipath_devdata *); -void ipath_hol_up(struct ipath_devdata *); -void ipath_hol_event(unsigned long); -void ipath_toggle_rclkrls(struct ipath_devdata *); -void ipath_sd7220_clr_ibpar(struct ipath_devdata *); -void ipath_set_relock_poll(struct ipath_devdata *, int); -void ipath_shutdown_relock_poll(struct ipath_devdata *); - -/* for use in system calls, where we want to know device type, etc. */ -#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd -#define subport_fp(fp) \ - ((struct ipath_filedata *)(fp)->private_data)->subport -#define tidcursor_fp(fp) \ - ((struct ipath_filedata *)(fp)->private_data)->tidcursor -#define user_sdma_queue_fp(fp) \ - ((struct ipath_filedata *)(fp)->private_data)->pq - -/* - * values for ipath_flags - */ - /* chip can report link latency (IB 1.2) */ -#define IPATH_HAS_LINK_LATENCY 0x1 - /* The chip is up and initted */ -#define IPATH_INITTED 0x2 - /* set if any user code has set kr_rcvhdrsize */ -#define IPATH_RCVHDRSZ_SET 0x4 - /* The chip is present and valid for accesses */ -#define IPATH_PRESENT 0x8 - /* HT link0 is only 8 bits wide, ignore upper byte crc - * errors, etc. */ -#define IPATH_8BIT_IN_HT0 0x10 - /* HT link1 is only 8 bits wide, ignore upper byte crc - * errors, etc. */ -#define IPATH_8BIT_IN_HT1 0x20 - /* The link is down */ -#define IPATH_LINKDOWN 0x40 - /* The link level is up (0x11) */ -#define IPATH_LINKINIT 0x80 - /* The link is in the armed (0x21) state */ -#define IPATH_LINKARMED 0x100 - /* The link is in the active (0x31) state */ -#define IPATH_LINKACTIVE 0x200 - /* link current state is unknown */ -#define IPATH_LINKUNK 0x400 - /* Write combining flush needed for PIO */ -#define IPATH_PIO_FLUSH_WC 0x1000 - /* DMA Receive tail pointer */ -#define IPATH_NODMA_RTAIL 0x2000 - /* no IB cable, or no device on IB cable */ -#define IPATH_NOCABLE 0x4000 - /* Supports port zero per packet receive interrupts via - * GPIO */ -#define IPATH_GPIO_INTR 0x8000 - /* uses the coded 4byte TID, not 8 byte */ -#define IPATH_4BYTE_TID 0x10000 - /* packet/word counters are 32 bit, else those 4 counters - * are 64bit */ -#define IPATH_32BITCOUNTERS 0x20000 - /* Interrupt register is 64 bits */ -#define IPATH_INTREG_64 0x40000 - /* can miss port0 rx interrupts */ -#define IPATH_DISABLED 0x80000 /* administratively disabled */ - /* Use GPIO interrupts for new counters */ -#define IPATH_GPIO_ERRINTRS 0x100000 -#define IPATH_SWAP_PIOBUFS 0x200000 - /* Supports Send DMA */ -#define IPATH_HAS_SEND_DMA 0x400000 - /* Supports Send Count (not just word count) in PBC */ -#define IPATH_HAS_PBC_CNT 0x800000 - /* Suppress heartbeat, even if turning off loopback */ -#define IPATH_NO_HRTBT 0x1000000 -#define IPATH_HAS_THRESH_UPDATE 0x4000000 -#define IPATH_HAS_MULT_IB_SPEED 0x8000000 -#define IPATH_IB_AUTONEG_INPROG 0x10000000 -#define IPATH_IB_AUTONEG_FAILED 0x20000000 - /* Linkdown-disable intentionally, Do not attempt to bring up */ -#define IPATH_IB_LINK_DISABLED 0x40000000 -#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */ - -/* Bits in GPIO for the added interrupts */ -#define IPATH_GPIO_PORT0_BIT 2 -#define IPATH_GPIO_RXUVL_BIT 3 -#define IPATH_GPIO_OVRUN_BIT 4 -#define IPATH_GPIO_LLI_BIT 5 -#define IPATH_GPIO_ERRINTR_MASK 0x38 - -/* portdata flag bit offsets */ - /* waiting for a packet to arrive */ -#define IPATH_PORT_WAITING_RCV 2 - /* master has not finished initializing */ -#define IPATH_PORT_MASTER_UNINIT 4 - /* waiting for an urgent packet to arrive */ -#define IPATH_PORT_WAITING_URG 5 - -/* free up any allocated data at closes */ -void ipath_free_data(struct ipath_portdata *dd); -u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *); -void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, - unsigned len, int avail); -void ipath_init_iba6110_funcs(struct ipath_devdata *); -void ipath_get_eeprom_info(struct ipath_devdata *); -int ipath_update_eeprom_log(struct ipath_devdata *dd); -void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); -u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); -void ipath_disarm_senderrbufs(struct ipath_devdata *); -void ipath_force_pio_avail_update(struct ipath_devdata *); -void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev); - -/* - * Set LED override, only the two LSBs have "public" meaning, but - * any non-zero value substitutes them for the Link and LinkTrain - * LED states. - */ -#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */ -#define IPATH_LED_LOG 2 /* Logical (link) YELLOW LED */ -void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val); - -/* send dma routines */ -int setup_sdma(struct ipath_devdata *); -void teardown_sdma(struct ipath_devdata *); -void ipath_restart_sdma(struct ipath_devdata *); -void ipath_sdma_intr(struct ipath_devdata *); -int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *, - u32, struct ipath_verbs_txreq *); -/* ipath_sdma_lock should be locked before calling this. */ -int ipath_sdma_make_progress(struct ipath_devdata *dd); - -/* must be called under ipath_sdma_lock */ -static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd) -{ - return dd->ipath_sdma_descq_cnt - - (dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) - - 1 - dd->ipath_sdma_desc_nreserved; -} - -static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt) -{ - dd->ipath_sdma_desc_nreserved += cnt; -} - -static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt) -{ - dd->ipath_sdma_desc_nreserved -= cnt; -} - -/* - * number of words used for protocol header if not set by ipath_userinit(); - */ -#define IPATH_DFLT_RCVHDRSIZE 9 - -int ipath_get_user_pages(unsigned long, size_t, struct page **); -void ipath_release_user_pages(struct page **, size_t); -void ipath_release_user_pages_on_close(struct page **, size_t); -int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int); -int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int); -int ipath_tempsense_read(struct ipath_devdata *, u8 regnum); -int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data); - -/* these are used for the registers that vary with port */ -void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg, - unsigned, u64); - -/* - * We could have a single register get/put routine, that takes a group type, - * but this is somewhat clearer and cleaner. It also gives us some error - * checking. 64 bit register reads should always work, but are inefficient - * on opteron (the northbridge always generates 2 separate HT 32 bit reads), - * so we use kreg32 wherever possible. User register and counter register - * reads are always 32 bit reads, so only one form of those routines. - */ - -/* - * At the moment, none of the s-registers are writable, so no - * ipath_write_sreg(). - */ - -/** - * ipath_read_ureg32 - read 32-bit virtualized per-port register - * @dd: device - * @regno: register number - * @port: port number - * - * Return the contents of a register that is virtualized to be per port. - * Returns -1 on errors (not distinguishable from valid contents at - * runtime; we may add a separate error variable at some point). - */ -static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd, - ipath_ureg regno, int port) -{ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) - return 0; - - return readl(regno + (u64 __iomem *) - (dd->ipath_uregbase + - (char __iomem *)dd->ipath_kregbase + - dd->ipath_ureg_align * port)); -} - -/** - * ipath_write_ureg - write 32-bit virtualized per-port register - * @dd: device - * @regno: register number - * @value: value - * @port: port - * - * Write the contents of a register that is virtualized to be per port. - */ -static inline void ipath_write_ureg(const struct ipath_devdata *dd, - ipath_ureg regno, u64 value, int port) -{ - u64 __iomem *ubase = (u64 __iomem *) - (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase + - dd->ipath_ureg_align * port); - if (dd->ipath_kregbase) - writeq(value, &ubase[regno]); -} - -static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd, - ipath_kreg regno) -{ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) - return -1; - return readl((u32 __iomem *) & dd->ipath_kregbase[regno]); -} - -static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd, - ipath_kreg regno) -{ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) - return -1; - - return readq(&dd->ipath_kregbase[regno]); -} - -static inline void ipath_write_kreg(const struct ipath_devdata *dd, - ipath_kreg regno, u64 value) -{ - if (dd->ipath_kregbase) - writeq(value, &dd->ipath_kregbase[regno]); -} - -static inline u64 ipath_read_creg(const struct ipath_devdata *dd, - ipath_sreg regno) -{ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) - return 0; - - return readq(regno + (u64 __iomem *) - (dd->ipath_cregbase + - (char __iomem *)dd->ipath_kregbase)); -} - -static inline u32 ipath_read_creg32(const struct ipath_devdata *dd, - ipath_sreg regno) -{ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) - return 0; - return readl(regno + (u64 __iomem *) - (dd->ipath_cregbase + - (char __iomem *)dd->ipath_kregbase)); -} - -static inline void ipath_write_creg(const struct ipath_devdata *dd, - ipath_creg regno, u64 value) -{ - if (dd->ipath_kregbase) - writeq(value, regno + (u64 __iomem *) - (dd->ipath_cregbase + - (char __iomem *)dd->ipath_kregbase)); -} - -static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd) -{ - *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL; -} - -static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd) -{ - return (u32) le64_to_cpu(*((volatile __le64 *) - pd->port_rcvhdrtail_kvaddr)); -} - -static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd) -{ - const struct ipath_devdata *dd = pd->port_dd; - u32 hdrqtail; - - if (dd->ipath_flags & IPATH_NODMA_RTAIL) { - __le32 *rhf_addr; - u32 seq; - - rhf_addr = (__le32 *) pd->port_rcvhdrq + - pd->port_head + dd->ipath_rhf_offset; - seq = ipath_hdrget_seq(rhf_addr); - hdrqtail = pd->port_head; - if (seq == pd->port_seq_cnt) - hdrqtail++; - } else - hdrqtail = ipath_get_rcvhdrtail(pd); - - return hdrqtail; -} - -static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r) -{ - return (dd->ipath_flags & IPATH_INTREG_64) ? - ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r); -} - -/* - * from contents of IBCStatus (or a saved copy), return linkstate - * Report ACTIVE_DEFER as ACTIVE, because we treat them the same - * everywhere, anyway (and should be, for almost all purposes). - */ -static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs) -{ - u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) & - INFINIPATH_IBCS_LINKSTATE_MASK; - if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER) - state = INFINIPATH_IBCS_L_STATE_ACTIVE; - return state; -} - -/* from contents of IBCStatus (or a saved copy), return linktrainingstate */ -static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs) -{ - return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & - dd->ibcs_lts_mask; -} - -/* - * from contents of IBCStatus (or a saved copy), return logical link state - * combination of link state and linktraining state (down, active, init, - * arm, etc. - */ -static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs) -{ - u32 ibs; - ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & - dd->ibcs_lts_mask; - ibs |= (u32)(ibcs & - (INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift)); - return ibs; -} - -/* - * sysfs interface. - */ - -struct device_driver; - -extern const char ib_ipath_version[]; - -extern const struct attribute_group *ipath_driver_attr_groups[]; - -int ipath_device_create_group(struct device *, struct ipath_devdata *); -void ipath_device_remove_group(struct device *, struct ipath_devdata *); -int ipath_expose_reset(struct device *); - -int ipath_init_ipathfs(void); -void ipath_exit_ipathfs(void); -int ipathfs_add_device(struct ipath_devdata *); -int ipathfs_remove_device(struct ipath_devdata *); - -/* - * dma_addr wrappers - all 0's invalid for hw - */ -dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long, - size_t, int); -dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int); -const char *ipath_get_unit_name(int unit); - -/* - * Flush write combining store buffers (if present) and perform a write - * barrier. - */ -#if defined(CONFIG_X86_64) -#define ipath_flush_wc() asm volatile("sfence" ::: "memory") -#else -#define ipath_flush_wc() wmb() -#endif - -extern unsigned ipath_debug; /* debugging bit mask */ -extern unsigned ipath_linkrecovery; -extern unsigned ipath_mtu4096; -extern struct mutex ipath_mutex; - -#define IPATH_DRV_NAME "ib_ipath" -#define IPATH_MAJOR 233 -#define IPATH_USER_MINOR_BASE 0 -#define IPATH_DIAGPKT_MINOR 127 -#define IPATH_DIAG_MINOR_BASE 129 -#define IPATH_NMINORS 255 - -#define ipath_dev_err(dd,fmt,...) \ - do { \ - const struct ipath_devdata *__dd = (dd); \ - if (__dd->pcidev) \ - dev_err(&__dd->pcidev->dev, "%s: " fmt, \ - ipath_get_unit_name(__dd->ipath_unit), \ - ##__VA_ARGS__); \ - else \ - printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \ - ipath_get_unit_name(__dd->ipath_unit), \ - ##__VA_ARGS__); \ - } while (0) - -#if _IPATH_DEBUGGING - -# define __IPATH_DBG_WHICH(which,fmt,...) \ - do { \ - if (unlikely(ipath_debug & (which))) \ - printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \ - __func__,##__VA_ARGS__); \ - } while(0) - -# define ipath_dbg(fmt,...) \ - __IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__) -# define ipath_cdbg(which,fmt,...) \ - __IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__) - -#else /* ! _IPATH_DEBUGGING */ - -# define ipath_dbg(fmt,...) -# define ipath_cdbg(which,fmt,...) - -#endif /* _IPATH_DEBUGGING */ - -/* - * this is used for formatting hw error messages... - */ -struct ipath_hwerror_msgs { - u64 mask; - const char *msg; -}; - -#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b } - -/* in ipath_intr.c... */ -void ipath_format_hwerrors(u64 hwerrs, - const struct ipath_hwerror_msgs *hwerrmsgs, - size_t nhwerrmsgs, - char *msg, size_t lmsg); - -#endif /* _IPATH_KERNEL_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c deleted file mode 100644 index c0e933fec..000000000 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <asm/io.h> - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -/** - * ipath_alloc_lkey - allocate an lkey - * @rkt: lkey table in which to allocate the lkey - * @mr: memory region that this lkey protects - * - * Returns 1 if successful, otherwise returns 0. - */ - -int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr) -{ - unsigned long flags; - u32 r; - u32 n; - int ret; - - spin_lock_irqsave(&rkt->lock, flags); - - /* Find the next available LKEY */ - r = n = rkt->next; - for (;;) { - if (rkt->table[r] == NULL) - break; - r = (r + 1) & (rkt->max - 1); - if (r == n) { - spin_unlock_irqrestore(&rkt->lock, flags); - ipath_dbg("LKEY table full\n"); - ret = 0; - goto bail; - } - } - rkt->next = (r + 1) & (rkt->max - 1); - /* - * Make sure lkey is never zero which is reserved to indicate an - * unrestricted LKEY. - */ - rkt->gen++; - mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) | - ((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen) - << 8); - if (mr->lkey == 0) { - mr->lkey |= 1 << 8; - rkt->gen++; - } - rkt->table[r] = mr; - spin_unlock_irqrestore(&rkt->lock, flags); - - ret = 1; - -bail: - return ret; -} - -/** - * ipath_free_lkey - free an lkey - * @rkt: table from which to free the lkey - * @lkey: lkey id to free - */ -void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey) -{ - unsigned long flags; - u32 r; - - if (lkey == 0) - return; - r = lkey >> (32 - ib_ipath_lkey_table_size); - spin_lock_irqsave(&rkt->lock, flags); - rkt->table[r] = NULL; - spin_unlock_irqrestore(&rkt->lock, flags); -} - -/** - * ipath_lkey_ok - check IB SGE for validity and initialize - * @rkt: table containing lkey to check SGE against - * @isge: outgoing internal SGE - * @sge: SGE to check - * @acc: access flags - * - * Return 1 if valid and successful, otherwise returns 0. - * - * Check the IB SGE for validity and initialize our internal version - * of it. - */ -int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, - struct ib_sge *sge, int acc) -{ - struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; - struct ipath_mregion *mr; - unsigned n, m; - size_t off; - int ret; - - /* - * We use LKEY == zero for kernel virtual addresses - * (see ipath_get_dma_mr and ipath_dma.c). - */ - if (sge->lkey == 0) { - /* always a kernel port, no locking needed */ - struct ipath_pd *pd = to_ipd(qp->ibqp.pd); - - if (pd->user) { - ret = 0; - goto bail; - } - isge->mr = NULL; - isge->vaddr = (void *) sge->addr; - isge->length = sge->length; - isge->sge_length = sge->length; - ret = 1; - goto bail; - } - mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != sge->lkey || - qp->ibqp.pd != mr->pd)) { - ret = 0; - goto bail; - } - - off = sge->addr - mr->user_base; - if (unlikely(sge->addr < mr->user_base || - off + sge->length > mr->length || - (mr->access_flags & acc) != acc)) { - ret = 0; - goto bail; - } - - off += mr->offset; - m = 0; - n = 0; - while (off >= mr->map[m]->segs[n].length) { - off -= mr->map[m]->segs[n].length; - n++; - if (n >= IPATH_SEGSZ) { - m++; - n = 0; - } - } - isge->mr = mr; - isge->vaddr = mr->map[m]->segs[n].vaddr + off; - isge->length = mr->map[m]->segs[n].length - off; - isge->sge_length = sge->length; - isge->m = m; - isge->n = n; - - ret = 1; - -bail: - return ret; -} - -/** - * ipath_rkey_ok - check the IB virtual address, length, and RKEY - * @dev: infiniband device - * @ss: SGE state - * @len: length of data - * @vaddr: virtual address to place data - * @rkey: rkey to check - * @acc: access flags - * - * Return 1 if successful, otherwise 0. - */ -int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, - u32 len, u64 vaddr, u32 rkey, int acc) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_lkey_table *rkt = &dev->lk_table; - struct ipath_sge *sge = &ss->sge; - struct ipath_mregion *mr; - unsigned n, m; - size_t off; - int ret; - - /* - * We use RKEY == zero for kernel virtual addresses - * (see ipath_get_dma_mr and ipath_dma.c). - */ - if (rkey == 0) { - /* always a kernel port, no locking needed */ - struct ipath_pd *pd = to_ipd(qp->ibqp.pd); - - if (pd->user) { - ret = 0; - goto bail; - } - sge->mr = NULL; - sge->vaddr = (void *) vaddr; - sge->length = len; - sge->sge_length = len; - ss->sg_list = NULL; - ss->num_sge = 1; - ret = 1; - goto bail; - } - - mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != rkey || - qp->ibqp.pd != mr->pd)) { - ret = 0; - goto bail; - } - - off = vaddr - mr->iova; - if (unlikely(vaddr < mr->iova || off + len > mr->length || - (mr->access_flags & acc) == 0)) { - ret = 0; - goto bail; - } - - off += mr->offset; - m = 0; - n = 0; - while (off >= mr->map[m]->segs[n].length) { - off -= mr->map[m]->segs[n].length; - n++; - if (n >= IPATH_SEGSZ) { - m++; - n = 0; - } - } - sge->mr = mr; - sge->vaddr = mr->map[m]->segs[n].vaddr + off; - sge->length = mr->map[m]->segs[n].length - off; - sge->sge_length = len; - sge->m = m; - sge->n = n; - ss->sg_list = NULL; - ss->num_sge = 1; - - ret = 1; - -bail: - return ret; -} diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c deleted file mode 100644 index ad3a926ab..000000000 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ /dev/null @@ -1,1521 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <rdma/ib_smi.h> -#include <rdma/ib_pma.h> - -#include "ipath_kernel.h" -#include "ipath_verbs.h" -#include "ipath_common.h" - -#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004) -#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008) -#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C) -#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C) - -static int reply(struct ib_smp *smp) -{ - /* - * The verbs framework will handle the directed/LID route - * packet changes. - */ - smp->method = IB_MGMT_METHOD_GET_RESP; - if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - smp->status |= IB_SMP_DIRECTION; - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; -} - -static int recv_subn_get_nodedescription(struct ib_smp *smp, - struct ib_device *ibdev) -{ - if (smp->attr_mod) - smp->status |= IB_SMP_INVALID_FIELD; - - memcpy(smp->data, ibdev->node_desc, sizeof(smp->data)); - - return reply(smp); -} - -struct nodeinfo { - u8 base_version; - u8 class_version; - u8 node_type; - u8 num_ports; - __be64 sys_guid; - __be64 node_guid; - __be64 port_guid; - __be16 partition_cap; - __be16 device_id; - __be32 revision; - u8 local_port_num; - u8 vendor_id[3]; -} __attribute__ ((packed)); - -static int recv_subn_get_nodeinfo(struct ib_smp *smp, - struct ib_device *ibdev, u8 port) -{ - struct nodeinfo *nip = (struct nodeinfo *)&smp->data; - struct ipath_devdata *dd = to_idev(ibdev)->dd; - u32 vendor, majrev, minrev; - - /* GUID 0 is illegal */ - if (smp->attr_mod || (dd->ipath_guid == 0)) - smp->status |= IB_SMP_INVALID_FIELD; - - nip->base_version = 1; - nip->class_version = 1; - nip->node_type = 1; /* channel adapter */ - /* - * XXX The num_ports value will need a layer function to get - * the value if we ever have more than one IB port on a chip. - * We will also need to get the GUID for the port. - */ - nip->num_ports = ibdev->phys_port_cnt; - /* This is already in network order */ - nip->sys_guid = to_idev(ibdev)->sys_image_guid; - nip->node_guid = dd->ipath_guid; - nip->port_guid = dd->ipath_guid; - nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd)); - nip->device_id = cpu_to_be16(dd->ipath_deviceid); - majrev = dd->ipath_majrev; - minrev = dd->ipath_minrev; - nip->revision = cpu_to_be32((majrev << 16) | minrev); - nip->local_port_num = port; - vendor = dd->ipath_vendorid; - nip->vendor_id[0] = IPATH_SRC_OUI_1; - nip->vendor_id[1] = IPATH_SRC_OUI_2; - nip->vendor_id[2] = IPATH_SRC_OUI_3; - - return reply(smp); -} - -static int recv_subn_get_guidinfo(struct ib_smp *smp, - struct ib_device *ibdev) -{ - u32 startgx = 8 * be32_to_cpu(smp->attr_mod); - __be64 *p = (__be64 *) smp->data; - - /* 32 blocks of 8 64-bit GUIDs per block */ - - memset(smp->data, 0, sizeof(smp->data)); - - /* - * We only support one GUID for now. If this changes, the - * portinfo.guid_cap field needs to be updated too. - */ - if (startgx == 0) { - __be64 g = to_idev(ibdev)->dd->ipath_guid; - if (g == 0) - /* GUID 0 is illegal */ - smp->status |= IB_SMP_INVALID_FIELD; - else - /* The first is a copy of the read-only HW GUID. */ - *p = g; - } else - smp->status |= IB_SMP_INVALID_FIELD; - - return reply(smp); -} - -static void set_link_width_enabled(struct ipath_devdata *dd, u32 w) -{ - (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w); -} - -static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s) -{ - (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s); -} - -static int get_overrunthreshold(struct ipath_devdata *dd) -{ - return (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; -} - -/** - * set_overrunthreshold - set the overrun threshold - * @dd: the infinipath device - * @n: the new threshold - * - * Note that this will only take effect when the link state changes. - */ -static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n) -{ - unsigned v; - - v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; - if (v != n) { - dd->ipath_ibcctrl &= - ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK << - INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT); - dd->ipath_ibcctrl |= - (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - } - return 0; -} - -static int get_phyerrthreshold(struct ipath_devdata *dd) -{ - return (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; -} - -/** - * set_phyerrthreshold - set the physical error threshold - * @dd: the infinipath device - * @n: the new threshold - * - * Note that this will only take effect when the link state changes. - */ -static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) -{ - unsigned v; - - v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; - if (v != n) { - dd->ipath_ibcctrl &= - ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK << - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT); - dd->ipath_ibcctrl |= - (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - } - return 0; -} - -/** - * get_linkdowndefaultstate - get the default linkdown state - * @dd: the infinipath device - * - * Returns zero if the default is POLL, 1 if the default is SLEEP. - */ -static int get_linkdowndefaultstate(struct ipath_devdata *dd) -{ - return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE); -} - -static int recv_subn_get_portinfo(struct ib_smp *smp, - struct ib_device *ibdev, u8 port) -{ - struct ipath_ibdev *dev; - struct ipath_devdata *dd; - struct ib_port_info *pip = (struct ib_port_info *)smp->data; - u16 lid; - u8 ibcstat; - u8 mtu; - int ret; - - if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) { - smp->status |= IB_SMP_INVALID_FIELD; - ret = reply(smp); - goto bail; - } - - dev = to_idev(ibdev); - dd = dev->dd; - - /* Clear all fields. Only set the non-zero fields. */ - memset(smp->data, 0, sizeof(smp->data)); - - /* Only return the mkey if the protection field allows it. */ - if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey || - dev->mkeyprot == 0) - pip->mkey = dev->mkey; - pip->gid_prefix = dev->gid_prefix; - lid = dd->ipath_lid; - pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE; - pip->sm_lid = cpu_to_be16(dev->sm_lid); - pip->cap_mask = cpu_to_be32(dev->port_cap_flags); - /* pip->diag_code; */ - pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period); - pip->local_port_num = port; - pip->link_width_enabled = dd->ipath_link_width_enabled; - pip->link_width_supported = dd->ipath_link_width_supported; - pip->link_width_active = dd->ipath_link_width_active; - pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4; - ibcstat = dd->ipath_lastibcstat; - /* map LinkState to IB portinfo values. */ - pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1; - - pip->portphysstate_linkdown = - (ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) | - (get_linkdowndefaultstate(dd) ? 1 : 2); - pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc; - pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) | - dd->ipath_link_speed_enabled; - switch (dd->ipath_ibmtu) { - case 4096: - mtu = IB_MTU_4096; - break; - case 2048: - mtu = IB_MTU_2048; - break; - case 1024: - mtu = IB_MTU_1024; - break; - case 512: - mtu = IB_MTU_512; - break; - case 256: - mtu = IB_MTU_256; - break; - default: /* oops, something is wrong */ - mtu = IB_MTU_2048; - break; - } - pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl; - pip->vlcap_inittype = 0x10; /* VLCap = VL0, InitType = 0 */ - pip->vl_high_limit = dev->vl_high_limit; - /* pip->vl_arb_high_cap; // only one VL */ - /* pip->vl_arb_low_cap; // only one VL */ - /* InitTypeReply = 0 */ - /* our mtu cap depends on whether 4K MTU enabled or not */ - pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048; - /* HCAs ignore VLStallCount and HOQLife */ - /* pip->vlstallcnt_hoqlife; */ - pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */ - pip->mkey_violations = cpu_to_be16(dev->mkey_violations); - /* P_KeyViolations are counted by hardware. */ - pip->pkey_violations = - cpu_to_be16((ipath_get_cr_errpkey(dd) - - dev->z_pkey_violations) & 0xFFFF); - pip->qkey_violations = cpu_to_be16(dev->qkey_violations); - /* Only the hardware GUID is supported for now */ - pip->guid_cap = 1; - pip->clientrereg_resv_subnetto = dev->subnet_timeout; - /* 32.768 usec. response time (guessing) */ - pip->resv_resptimevalue = 3; - pip->localphyerrors_overrunerrors = - (get_phyerrthreshold(dd) << 4) | - get_overrunthreshold(dd); - /* pip->max_credit_hint; */ - if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) { - u32 v; - - v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY); - pip->link_roundtrip_latency[0] = v >> 16; - pip->link_roundtrip_latency[1] = v >> 8; - pip->link_roundtrip_latency[2] = v; - } - - ret = reply(smp); - -bail: - return ret; -} - -/** - * get_pkeys - return the PKEY table for port 0 - * @dd: the infinipath device - * @pkeys: the pkey table is placed here - */ -static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys) -{ - /* always a kernel port, no locking needed */ - struct ipath_portdata *pd = dd->ipath_pd[0]; - - memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); - - return 0; -} - -static int recv_subn_get_pkeytable(struct ib_smp *smp, - struct ib_device *ibdev) -{ - u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); - u16 *p = (u16 *) smp->data; - __be16 *q = (__be16 *) smp->data; - - /* 64 blocks of 32 16-bit P_Key entries */ - - memset(smp->data, 0, sizeof(smp->data)); - if (startpx == 0) { - struct ipath_ibdev *dev = to_idev(ibdev); - unsigned i, n = ipath_get_npkeys(dev->dd); - - get_pkeys(dev->dd, p); - - for (i = 0; i < n; i++) - q[i] = cpu_to_be16(p[i]); - } else - smp->status |= IB_SMP_INVALID_FIELD; - - return reply(smp); -} - -static int recv_subn_set_guidinfo(struct ib_smp *smp, - struct ib_device *ibdev) -{ - /* The only GUID we support is the first read-only entry. */ - return recv_subn_get_guidinfo(smp, ibdev); -} - -/** - * set_linkdowndefaultstate - set the default linkdown state - * @dd: the infinipath device - * @sleep: the new state - * - * Note that this will only take effect when the link state changes. - */ -static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep) -{ - if (sleep) - dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; - else - dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - return 0; -} - -/** - * recv_subn_set_portinfo - set port information - * @smp: the incoming SM packet - * @ibdev: the infiniband device - * @port: the port on the device - * - * Set Portinfo (see ch. 14.2.5.6). - */ -static int recv_subn_set_portinfo(struct ib_smp *smp, - struct ib_device *ibdev, u8 port) -{ - struct ib_port_info *pip = (struct ib_port_info *)smp->data; - struct ib_event event; - struct ipath_ibdev *dev; - struct ipath_devdata *dd; - char clientrereg = 0; - u16 lid, smlid; - u8 lwe; - u8 lse; - u8 state; - u16 lstate; - u32 mtu; - int ret, ore; - - if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) - goto err; - - dev = to_idev(ibdev); - dd = dev->dd; - event.device = ibdev; - event.element.port_num = port; - - dev->mkey = pip->mkey; - dev->gid_prefix = pip->gid_prefix; - dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); - - lid = be16_to_cpu(pip->lid); - if (dd->ipath_lid != lid || - dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) { - /* Must be a valid unicast LID address. */ - if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) - goto err; - ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7); - event.event = IB_EVENT_LID_CHANGE; - ib_dispatch_event(&event); - } - - smlid = be16_to_cpu(pip->sm_lid); - if (smlid != dev->sm_lid) { - /* Must be a valid unicast LID address. */ - if (smlid == 0 || smlid >= IPATH_MULTICAST_LID_BASE) - goto err; - dev->sm_lid = smlid; - event.event = IB_EVENT_SM_CHANGE; - ib_dispatch_event(&event); - } - - /* Allow 1x or 4x to be set (see 14.2.6.6). */ - lwe = pip->link_width_enabled; - if (lwe) { - if (lwe == 0xFF) - lwe = dd->ipath_link_width_supported; - else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported)) - goto err; - set_link_width_enabled(dd, lwe); - } - - /* Allow 2.5 or 5.0 Gbs. */ - lse = pip->linkspeedactive_enabled & 0xF; - if (lse) { - if (lse == 15) - lse = dd->ipath_link_speed_supported; - else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported)) - goto err; - set_link_speed_enabled(dd, lse); - } - - /* Set link down default state. */ - switch (pip->portphysstate_linkdown & 0xF) { - case 0: /* NOP */ - break; - case 1: /* SLEEP */ - if (set_linkdowndefaultstate(dd, 1)) - goto err; - break; - case 2: /* POLL */ - if (set_linkdowndefaultstate(dd, 0)) - goto err; - break; - default: - goto err; - } - - dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6; - dev->vl_high_limit = pip->vl_high_limit; - - switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) { - case IB_MTU_256: - mtu = 256; - break; - case IB_MTU_512: - mtu = 512; - break; - case IB_MTU_1024: - mtu = 1024; - break; - case IB_MTU_2048: - mtu = 2048; - break; - case IB_MTU_4096: - if (!ipath_mtu4096) - goto err; - mtu = 4096; - break; - default: - /* XXX We have already partially updated our state! */ - goto err; - } - ipath_set_mtu(dd, mtu); - - dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF; - - /* We only support VL0 */ - if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1) - goto err; - - if (pip->mkey_violations == 0) - dev->mkey_violations = 0; - - /* - * Hardware counter can't be reset so snapshot and subtract - * later. - */ - if (pip->pkey_violations == 0) - dev->z_pkey_violations = ipath_get_cr_errpkey(dd); - - if (pip->qkey_violations == 0) - dev->qkey_violations = 0; - - ore = pip->localphyerrors_overrunerrors; - if (set_phyerrthreshold(dd, (ore >> 4) & 0xF)) - goto err; - - if (set_overrunthreshold(dd, (ore & 0xF))) - goto err; - - dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; - - if (pip->clientrereg_resv_subnetto & 0x80) { - clientrereg = 1; - event.event = IB_EVENT_CLIENT_REREGISTER; - ib_dispatch_event(&event); - } - - /* - * Do the port state change now that the other link parameters - * have been set. - * Changing the port physical state only makes sense if the link - * is down or is being set to down. - */ - state = pip->linkspeed_portstate & 0xF; - lstate = (pip->portphysstate_linkdown >> 4) & 0xF; - if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) - goto err; - - /* - * Only state changes of DOWN, ARM, and ACTIVE are valid - * and must be in the correct state to take effect (see 7.2.6). - */ - switch (state) { - case IB_PORT_NOP: - if (lstate == 0) - break; - /* FALLTHROUGH */ - case IB_PORT_DOWN: - if (lstate == 0) - lstate = IPATH_IB_LINKDOWN_ONLY; - else if (lstate == 1) - lstate = IPATH_IB_LINKDOWN_SLEEP; - else if (lstate == 2) - lstate = IPATH_IB_LINKDOWN; - else if (lstate == 3) - lstate = IPATH_IB_LINKDOWN_DISABLE; - else - goto err; - ipath_set_linkstate(dd, lstate); - if (lstate == IPATH_IB_LINKDOWN_DISABLE) { - ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - goto done; - } - ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED | - IPATH_LINKACTIVE, 1000); - break; - case IB_PORT_ARMED: - ipath_set_linkstate(dd, IPATH_IB_LINKARM); - break; - case IB_PORT_ACTIVE: - ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE); - break; - default: - /* XXX We have already partially updated our state! */ - goto err; - } - - ret = recv_subn_get_portinfo(smp, ibdev, port); - - if (clientrereg) - pip->clientrereg_resv_subnetto |= 0x80; - - goto done; - -err: - smp->status |= IB_SMP_INVALID_FIELD; - ret = recv_subn_get_portinfo(smp, ibdev, port); - -done: - return ret; -} - -/** - * rm_pkey - decrecment the reference count for the given PKEY - * @dd: the infinipath device - * @key: the PKEY index - * - * Return true if this was the last reference and the hardware table entry - * needs to be changed. - */ -static int rm_pkey(struct ipath_devdata *dd, u16 key) -{ - int i; - int ret; - - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (dd->ipath_pkeys[i] != key) - continue; - if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) { - dd->ipath_pkeys[i] = 0; - ret = 1; - goto bail; - } - break; - } - - ret = 0; - -bail: - return ret; -} - -/** - * add_pkey - add the given PKEY to the hardware table - * @dd: the infinipath device - * @key: the PKEY - * - * Return an error code if unable to add the entry, zero if no change, - * or 1 if the hardware PKEY register needs to be updated. - */ -static int add_pkey(struct ipath_devdata *dd, u16 key) -{ - int i; - u16 lkey = key & 0x7FFF; - int any = 0; - int ret; - - if (lkey == 0x7FFF) { - ret = 0; - goto bail; - } - - /* Look for an empty slot or a matching PKEY. */ - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i]) { - any++; - continue; - } - /* If it matches exactly, try to increment the ref count */ - if (dd->ipath_pkeys[i] == key) { - if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) { - ret = 0; - goto bail; - } - /* Lost the race. Look for an empty slot below. */ - atomic_dec(&dd->ipath_pkeyrefs[i]); - any++; - } - /* - * It makes no sense to have both the limited and unlimited - * PKEY set at the same time since the unlimited one will - * disable the limited one. - */ - if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { - ret = -EEXIST; - goto bail; - } - } - if (!any) { - ret = -EBUSY; - goto bail; - } - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i] && - atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { - /* for ipathstats, etc. */ - ipath_stats.sps_pkeys[i] = lkey; - dd->ipath_pkeys[i] = key; - ret = 1; - goto bail; - } - } - ret = -EBUSY; - -bail: - return ret; -} - -/** - * set_pkeys - set the PKEY table for port 0 - * @dd: the infinipath device - * @pkeys: the PKEY table - */ -static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port) -{ - struct ipath_portdata *pd; - int i; - int changed = 0; - - /* always a kernel port, no locking needed */ - pd = dd->ipath_pd[0]; - - for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { - u16 key = pkeys[i]; - u16 okey = pd->port_pkeys[i]; - - if (key == okey) - continue; - /* - * The value of this PKEY table entry is changing. - * Remove the old entry in the hardware's array of PKEYs. - */ - if (okey & 0x7FFF) - changed |= rm_pkey(dd, okey); - if (key & 0x7FFF) { - int ret = add_pkey(dd, key); - - if (ret < 0) - key = 0; - else - changed |= ret; - } - pd->port_pkeys[i] = key; - } - if (changed) { - u64 pkey; - struct ib_event event; - - pkey = (u64) dd->ipath_pkeys[0] | - ((u64) dd->ipath_pkeys[1] << 16) | - ((u64) dd->ipath_pkeys[2] << 32) | - ((u64) dd->ipath_pkeys[3] << 48); - ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n", - (unsigned long long) pkey); - ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, - pkey); - - event.event = IB_EVENT_PKEY_CHANGE; - event.device = &dd->verbs_dev->ibdev; - event.element.port_num = port; - ib_dispatch_event(&event); - } - return 0; -} - -static int recv_subn_set_pkeytable(struct ib_smp *smp, - struct ib_device *ibdev, u8 port) -{ - u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); - __be16 *p = (__be16 *) smp->data; - u16 *q = (u16 *) smp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - unsigned i, n = ipath_get_npkeys(dev->dd); - - for (i = 0; i < n; i++) - q[i] = be16_to_cpu(p[i]); - - if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0) - smp->status |= IB_SMP_INVALID_FIELD; - - return recv_subn_get_pkeytable(smp, ibdev); -} - -static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp) -{ - struct ib_class_port_info *p = - (struct ib_class_port_info *)pmp->data; - - memset(pmp->data, 0, sizeof(pmp->data)); - - if (pmp->mad_hdr.attr_mod != 0) - pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; - - /* Indicate AllPortSelect is valid (only one port anyway) */ - p->capability_mask = cpu_to_be16(1 << 8); - p->base_version = 1; - p->class_version = 1; - /* - * Expected response time is 4.096 usec. * 2^18 == 1.073741824 - * sec. - */ - p->resp_time_value = 18; - - return reply((struct ib_smp *) pmp); -} - -/* - * The PortSamplesControl.CounterMasks field is an array of 3 bit fields - * which specify the N'th counter's capabilities. See ch. 16.1.3.2. - * We support 5 counters which only count the mandatory quantities. - */ -#define COUNTER_MASK(q, n) (q << ((9 - n) * 3)) -#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \ - COUNTER_MASK(1, 1) | \ - COUNTER_MASK(1, 2) | \ - COUNTER_MASK(1, 3) | \ - COUNTER_MASK(1, 4)) - -static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portsamplescontrol *p = - (struct ib_pma_portsamplescontrol *)pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_cregs const *crp = dev->dd->ipath_cregs; - unsigned long flags; - u8 port_select = p->port_select; - - memset(pmp->data, 0, sizeof(pmp->data)); - - p->port_select = port_select; - if (pmp->mad_hdr.attr_mod != 0 || - (port_select != port && port_select != 0xFF)) - pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; - /* - * Ticks are 10x the link transfer period which for 2.5Gbs is 4 - * nsec. 0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec. Sample - * intervals are counted in ticks. Since we use Linux timers, that - * count in jiffies, we can't sample for less than 1000 ticks if HZ - * == 1000 (4000 ticks if HZ is 250). link_speed_active returns 2 for - * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that - * have hardware support for delaying packets. - */ - if (crp->cr_psstat) - p->tick = dev->dd->ipath_link_speed_active - 1; - else - p->tick = 250; /* 1 usec. */ - p->counter_width = 4; /* 32 bit counters */ - p->counter_mask0_9 = COUNTER_MASK0_9; - spin_lock_irqsave(&dev->pending_lock, flags); - if (crp->cr_psstat) - p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat); - else - p->sample_status = dev->pma_sample_status; - p->sample_start = cpu_to_be32(dev->pma_sample_start); - p->sample_interval = cpu_to_be32(dev->pma_sample_interval); - p->tag = cpu_to_be16(dev->pma_tag); - p->counter_select[0] = dev->pma_counter_select[0]; - p->counter_select[1] = dev->pma_counter_select[1]; - p->counter_select[2] = dev->pma_counter_select[2]; - p->counter_select[3] = dev->pma_counter_select[3]; - p->counter_select[4] = dev->pma_counter_select[4]; - spin_unlock_irqrestore(&dev->pending_lock, flags); - - return reply((struct ib_smp *) pmp); -} - -static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portsamplescontrol *p = - (struct ib_pma_portsamplescontrol *)pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_cregs const *crp = dev->dd->ipath_cregs; - unsigned long flags; - u8 status; - int ret; - - if (pmp->mad_hdr.attr_mod != 0 || - (p->port_select != port && p->port_select != 0xFF)) { - pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; - ret = reply((struct ib_smp *) pmp); - goto bail; - } - - spin_lock_irqsave(&dev->pending_lock, flags); - if (crp->cr_psstat) - status = ipath_read_creg32(dev->dd, crp->cr_psstat); - else - status = dev->pma_sample_status; - if (status == IB_PMA_SAMPLE_STATUS_DONE) { - dev->pma_sample_start = be32_to_cpu(p->sample_start); - dev->pma_sample_interval = be32_to_cpu(p->sample_interval); - dev->pma_tag = be16_to_cpu(p->tag); - dev->pma_counter_select[0] = p->counter_select[0]; - dev->pma_counter_select[1] = p->counter_select[1]; - dev->pma_counter_select[2] = p->counter_select[2]; - dev->pma_counter_select[3] = p->counter_select[3]; - dev->pma_counter_select[4] = p->counter_select[4]; - if (crp->cr_psstat) { - ipath_write_creg(dev->dd, crp->cr_psinterval, - dev->pma_sample_interval); - ipath_write_creg(dev->dd, crp->cr_psstart, - dev->pma_sample_start); - } else - dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED; - } - spin_unlock_irqrestore(&dev->pending_lock, flags); - - ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port); - -bail: - return ret; -} - -static u64 get_counter(struct ipath_ibdev *dev, - struct ipath_cregs const *crp, - __be16 sel) -{ - u64 ret; - - switch (sel) { - case IB_PMA_PORT_XMIT_DATA: - ret = (crp->cr_psxmitdatacount) ? - ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) : - dev->ipath_sword; - break; - case IB_PMA_PORT_RCV_DATA: - ret = (crp->cr_psrcvdatacount) ? - ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) : - dev->ipath_rword; - break; - case IB_PMA_PORT_XMIT_PKTS: - ret = (crp->cr_psxmitpktscount) ? - ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) : - dev->ipath_spkts; - break; - case IB_PMA_PORT_RCV_PKTS: - ret = (crp->cr_psrcvpktscount) ? - ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) : - dev->ipath_rpkts; - break; - case IB_PMA_PORT_XMIT_WAIT: - ret = (crp->cr_psxmitwaitcount) ? - ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) : - dev->ipath_xmit_wait; - break; - default: - ret = 0; - } - - return ret; -} - -static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp, - struct ib_device *ibdev) -{ - struct ib_pma_portsamplesresult *p = - (struct ib_pma_portsamplesresult *)pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_cregs const *crp = dev->dd->ipath_cregs; - u8 status; - int i; - - memset(pmp->data, 0, sizeof(pmp->data)); - p->tag = cpu_to_be16(dev->pma_tag); - if (crp->cr_psstat) - status = ipath_read_creg32(dev->dd, crp->cr_psstat); - else - status = dev->pma_sample_status; - p->sample_status = cpu_to_be16(status); - for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) - p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : - cpu_to_be32( - get_counter(dev, crp, dev->pma_counter_select[i])); - - return reply((struct ib_smp *) pmp); -} - -static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp, - struct ib_device *ibdev) -{ - struct ib_pma_portsamplesresult_ext *p = - (struct ib_pma_portsamplesresult_ext *)pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_cregs const *crp = dev->dd->ipath_cregs; - u8 status; - int i; - - memset(pmp->data, 0, sizeof(pmp->data)); - p->tag = cpu_to_be16(dev->pma_tag); - if (crp->cr_psstat) - status = ipath_read_creg32(dev->dd, crp->cr_psstat); - else - status = dev->pma_sample_status; - p->sample_status = cpu_to_be16(status); - /* 64 bits */ - p->extended_width = cpu_to_be32(0x80000000); - for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) - p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : - cpu_to_be64( - get_counter(dev, crp, dev->pma_counter_select[i])); - - return reply((struct ib_smp *) pmp); -} - -static int recv_pma_get_portcounters(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) - pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_verbs_counters cntrs; - u8 port_select = p->port_select; - - ipath_get_counters(dev->dd, &cntrs); - - /* Adjust counters for any resets done. */ - cntrs.symbol_error_counter -= dev->z_symbol_error_counter; - cntrs.link_error_recovery_counter -= - dev->z_link_error_recovery_counter; - cntrs.link_downed_counter -= dev->z_link_downed_counter; - cntrs.port_rcv_errors += dev->rcv_errors; - cntrs.port_rcv_errors -= dev->z_port_rcv_errors; - cntrs.port_rcv_remphys_errors -= dev->z_port_rcv_remphys_errors; - cntrs.port_xmit_discards -= dev->z_port_xmit_discards; - cntrs.port_xmit_data -= dev->z_port_xmit_data; - cntrs.port_rcv_data -= dev->z_port_rcv_data; - cntrs.port_xmit_packets -= dev->z_port_xmit_packets; - cntrs.port_rcv_packets -= dev->z_port_rcv_packets; - cntrs.local_link_integrity_errors -= - dev->z_local_link_integrity_errors; - cntrs.excessive_buffer_overrun_errors -= - dev->z_excessive_buffer_overrun_errors; - cntrs.vl15_dropped -= dev->z_vl15_dropped; - cntrs.vl15_dropped += dev->n_vl15_dropped; - - memset(pmp->data, 0, sizeof(pmp->data)); - - p->port_select = port_select; - if (pmp->mad_hdr.attr_mod != 0 || - (port_select != port && port_select != 0xFF)) - pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; - - if (cntrs.symbol_error_counter > 0xFFFFUL) - p->symbol_error_counter = cpu_to_be16(0xFFFF); - else - p->symbol_error_counter = - cpu_to_be16((u16)cntrs.symbol_error_counter); - if (cntrs.link_error_recovery_counter > 0xFFUL) - p->link_error_recovery_counter = 0xFF; - else - p->link_error_recovery_counter = - (u8)cntrs.link_error_recovery_counter; - if (cntrs.link_downed_counter > 0xFFUL) - p->link_downed_counter = 0xFF; - else - p->link_downed_counter = (u8)cntrs.link_downed_counter; - if (cntrs.port_rcv_errors > 0xFFFFUL) - p->port_rcv_errors = cpu_to_be16(0xFFFF); - else - p->port_rcv_errors = - cpu_to_be16((u16) cntrs.port_rcv_errors); - if (cntrs.port_rcv_remphys_errors > 0xFFFFUL) - p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF); - else - p->port_rcv_remphys_errors = - cpu_to_be16((u16)cntrs.port_rcv_remphys_errors); - if (cntrs.port_xmit_discards > 0xFFFFUL) - p->port_xmit_discards = cpu_to_be16(0xFFFF); - else - p->port_xmit_discards = - cpu_to_be16((u16)cntrs.port_xmit_discards); - if (cntrs.local_link_integrity_errors > 0xFUL) - cntrs.local_link_integrity_errors = 0xFUL; - if (cntrs.excessive_buffer_overrun_errors > 0xFUL) - cntrs.excessive_buffer_overrun_errors = 0xFUL; - p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) | - cntrs.excessive_buffer_overrun_errors; - if (cntrs.vl15_dropped > 0xFFFFUL) - p->vl15_dropped = cpu_to_be16(0xFFFF); - else - p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped); - if (cntrs.port_xmit_data > 0xFFFFFFFFUL) - p->port_xmit_data = cpu_to_be32(0xFFFFFFFF); - else - p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data); - if (cntrs.port_rcv_data > 0xFFFFFFFFUL) - p->port_rcv_data = cpu_to_be32(0xFFFFFFFF); - else - p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data); - if (cntrs.port_xmit_packets > 0xFFFFFFFFUL) - p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF); - else - p->port_xmit_packets = - cpu_to_be32((u32)cntrs.port_xmit_packets); - if (cntrs.port_rcv_packets > 0xFFFFFFFFUL) - p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF); - else - p->port_rcv_packets = - cpu_to_be32((u32) cntrs.port_rcv_packets); - - return reply((struct ib_smp *) pmp); -} - -static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portcounters_ext *p = - (struct ib_pma_portcounters_ext *)pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - u64 swords, rwords, spkts, rpkts, xwait; - u8 port_select = p->port_select; - - ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, - &rpkts, &xwait); - - /* Adjust counters for any resets done. */ - swords -= dev->z_port_xmit_data; - rwords -= dev->z_port_rcv_data; - spkts -= dev->z_port_xmit_packets; - rpkts -= dev->z_port_rcv_packets; - - memset(pmp->data, 0, sizeof(pmp->data)); - - p->port_select = port_select; - if (pmp->mad_hdr.attr_mod != 0 || - (port_select != port && port_select != 0xFF)) - pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; - - p->port_xmit_data = cpu_to_be64(swords); - p->port_rcv_data = cpu_to_be64(rwords); - p->port_xmit_packets = cpu_to_be64(spkts); - p->port_rcv_packets = cpu_to_be64(rpkts); - p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit); - p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv); - p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit); - p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv); - - return reply((struct ib_smp *) pmp); -} - -static int recv_pma_set_portcounters(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) - pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_verbs_counters cntrs; - - /* - * Since the HW doesn't support clearing counters, we save the - * current count and subtract it from future responses. - */ - ipath_get_counters(dev->dd, &cntrs); - - if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR) - dev->z_symbol_error_counter = cntrs.symbol_error_counter; - - if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY) - dev->z_link_error_recovery_counter = - cntrs.link_error_recovery_counter; - - if (p->counter_select & IB_PMA_SEL_LINK_DOWNED) - dev->z_link_downed_counter = cntrs.link_downed_counter; - - if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS) - dev->z_port_rcv_errors = - cntrs.port_rcv_errors + dev->rcv_errors; - - if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS) - dev->z_port_rcv_remphys_errors = - cntrs.port_rcv_remphys_errors; - - if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS) - dev->z_port_xmit_discards = cntrs.port_xmit_discards; - - if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS) - dev->z_local_link_integrity_errors = - cntrs.local_link_integrity_errors; - - if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS) - dev->z_excessive_buffer_overrun_errors = - cntrs.excessive_buffer_overrun_errors; - - if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) { - dev->n_vl15_dropped = 0; - dev->z_vl15_dropped = cntrs.vl15_dropped; - } - - if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA) - dev->z_port_xmit_data = cntrs.port_xmit_data; - - if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA) - dev->z_port_rcv_data = cntrs.port_rcv_data; - - if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS) - dev->z_port_xmit_packets = cntrs.port_xmit_packets; - - if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS) - dev->z_port_rcv_packets = cntrs.port_rcv_packets; - - return recv_pma_get_portcounters(pmp, ibdev, port); -} - -static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) - pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - u64 swords, rwords, spkts, rpkts, xwait; - - ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, - &rpkts, &xwait); - - if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA) - dev->z_port_xmit_data = swords; - - if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA) - dev->z_port_rcv_data = rwords; - - if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS) - dev->z_port_xmit_packets = spkts; - - if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) - dev->z_port_rcv_packets = rpkts; - - if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) - dev->n_unicast_xmit = 0; - - if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) - dev->n_unicast_rcv = 0; - - if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) - dev->n_multicast_xmit = 0; - - if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) - dev->n_multicast_rcv = 0; - - return recv_pma_get_portcounters_ext(pmp, ibdev, port); -} - -static int process_subn(struct ib_device *ibdev, int mad_flags, - u8 port_num, const struct ib_mad *in_mad, - struct ib_mad *out_mad) -{ - struct ib_smp *smp = (struct ib_smp *)out_mad; - struct ipath_ibdev *dev = to_idev(ibdev); - int ret; - - *out_mad = *in_mad; - if (smp->class_version != 1) { - smp->status |= IB_SMP_UNSUP_VERSION; - ret = reply(smp); - goto bail; - } - - /* Is the mkey in the process of expiring? */ - if (dev->mkey_lease_timeout && - time_after_eq(jiffies, dev->mkey_lease_timeout)) { - /* Clear timeout and mkey protection field. */ - dev->mkey_lease_timeout = 0; - dev->mkeyprot = 0; - } - - /* - * M_Key checking depends on - * Portinfo:M_Key_protect_bits - */ - if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 && - dev->mkey != smp->mkey && - (smp->method == IB_MGMT_METHOD_SET || - (smp->method == IB_MGMT_METHOD_GET && - dev->mkeyprot >= 2))) { - if (dev->mkey_violations != 0xFFFF) - ++dev->mkey_violations; - if (dev->mkey_lease_timeout || - dev->mkey_lease_period == 0) { - ret = IB_MAD_RESULT_SUCCESS | - IB_MAD_RESULT_CONSUMED; - goto bail; - } - dev->mkey_lease_timeout = jiffies + - dev->mkey_lease_period * HZ; - /* Future: Generate a trap notice. */ - ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - goto bail; - } else if (dev->mkey_lease_timeout) - dev->mkey_lease_timeout = 0; - - switch (smp->method) { - case IB_MGMT_METHOD_GET: - switch (smp->attr_id) { - case IB_SMP_ATTR_NODE_DESC: - ret = recv_subn_get_nodedescription(smp, ibdev); - goto bail; - case IB_SMP_ATTR_NODE_INFO: - ret = recv_subn_get_nodeinfo(smp, ibdev, port_num); - goto bail; - case IB_SMP_ATTR_GUID_INFO: - ret = recv_subn_get_guidinfo(smp, ibdev); - goto bail; - case IB_SMP_ATTR_PORT_INFO: - ret = recv_subn_get_portinfo(smp, ibdev, port_num); - goto bail; - case IB_SMP_ATTR_PKEY_TABLE: - ret = recv_subn_get_pkeytable(smp, ibdev); - goto bail; - case IB_SMP_ATTR_SM_INFO: - if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { - ret = IB_MAD_RESULT_SUCCESS | - IB_MAD_RESULT_CONSUMED; - goto bail; - } - if (dev->port_cap_flags & IB_PORT_SM) { - ret = IB_MAD_RESULT_SUCCESS; - goto bail; - } - /* FALLTHROUGH */ - default: - smp->status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply(smp); - goto bail; - } - - case IB_MGMT_METHOD_SET: - switch (smp->attr_id) { - case IB_SMP_ATTR_GUID_INFO: - ret = recv_subn_set_guidinfo(smp, ibdev); - goto bail; - case IB_SMP_ATTR_PORT_INFO: - ret = recv_subn_set_portinfo(smp, ibdev, port_num); - goto bail; - case IB_SMP_ATTR_PKEY_TABLE: - ret = recv_subn_set_pkeytable(smp, ibdev, port_num); - goto bail; - case IB_SMP_ATTR_SM_INFO: - if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { - ret = IB_MAD_RESULT_SUCCESS | - IB_MAD_RESULT_CONSUMED; - goto bail; - } - if (dev->port_cap_flags & IB_PORT_SM) { - ret = IB_MAD_RESULT_SUCCESS; - goto bail; - } - /* FALLTHROUGH */ - default: - smp->status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply(smp); - goto bail; - } - - case IB_MGMT_METHOD_TRAP: - case IB_MGMT_METHOD_REPORT: - case IB_MGMT_METHOD_REPORT_RESP: - case IB_MGMT_METHOD_TRAP_REPRESS: - case IB_MGMT_METHOD_GET_RESP: - /* - * The ib_mad module will call us to process responses - * before checking for other consumers. - * Just tell the caller to process it normally. - */ - ret = IB_MAD_RESULT_SUCCESS; - goto bail; - default: - smp->status |= IB_SMP_UNSUP_METHOD; - ret = reply(smp); - } - -bail: - return ret; -} - -static int process_perf(struct ib_device *ibdev, u8 port_num, - const struct ib_mad *in_mad, - struct ib_mad *out_mad) -{ - struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; - int ret; - - *out_mad = *in_mad; - if (pmp->mad_hdr.class_version != 1) { - pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION; - ret = reply((struct ib_smp *) pmp); - goto bail; - } - - switch (pmp->mad_hdr.method) { - case IB_MGMT_METHOD_GET: - switch (pmp->mad_hdr.attr_id) { - case IB_PMA_CLASS_PORT_INFO: - ret = recv_pma_get_classportinfo(pmp); - goto bail; - case IB_PMA_PORT_SAMPLES_CONTROL: - ret = recv_pma_get_portsamplescontrol(pmp, ibdev, - port_num); - goto bail; - case IB_PMA_PORT_SAMPLES_RESULT: - ret = recv_pma_get_portsamplesresult(pmp, ibdev); - goto bail; - case IB_PMA_PORT_SAMPLES_RESULT_EXT: - ret = recv_pma_get_portsamplesresult_ext(pmp, - ibdev); - goto bail; - case IB_PMA_PORT_COUNTERS: - ret = recv_pma_get_portcounters(pmp, ibdev, - port_num); - goto bail; - case IB_PMA_PORT_COUNTERS_EXT: - ret = recv_pma_get_portcounters_ext(pmp, ibdev, - port_num); - goto bail; - default: - pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply((struct ib_smp *) pmp); - goto bail; - } - - case IB_MGMT_METHOD_SET: - switch (pmp->mad_hdr.attr_id) { - case IB_PMA_PORT_SAMPLES_CONTROL: - ret = recv_pma_set_portsamplescontrol(pmp, ibdev, - port_num); - goto bail; - case IB_PMA_PORT_COUNTERS: - ret = recv_pma_set_portcounters(pmp, ibdev, - port_num); - goto bail; - case IB_PMA_PORT_COUNTERS_EXT: - ret = recv_pma_set_portcounters_ext(pmp, ibdev, - port_num); - goto bail; - default: - pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply((struct ib_smp *) pmp); - goto bail; - } - - case IB_MGMT_METHOD_GET_RESP: - /* - * The ib_mad module will call us to process responses - * before checking for other consumers. - * Just tell the caller to process it normally. - */ - ret = IB_MAD_RESULT_SUCCESS; - goto bail; - default: - pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD; - ret = reply((struct ib_smp *) pmp); - } - -bail: - return ret; -} - -/** - * ipath_process_mad - process an incoming MAD packet - * @ibdev: the infiniband device this packet came in on - * @mad_flags: MAD flags - * @port_num: the port number this packet came in on - * @in_wc: the work completion entry for this packet - * @in_grh: the global route header for this packet - * @in_mad: the incoming MAD - * @out_mad: any outgoing MAD reply - * - * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not - * interested in processing. - * - * Note that the verbs framework has already done the MAD sanity checks, - * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE - * MADs. - * - * This is called by the ib_mad module. - */ -int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - int ret; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - - switch (in_mad->mad_hdr.mgmt_class) { - case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: - case IB_MGMT_CLASS_SUBN_LID_ROUTED: - ret = process_subn(ibdev, mad_flags, port_num, - in_mad, out_mad); - goto bail; - case IB_MGMT_CLASS_PERF_MGMT: - ret = process_perf(ibdev, port_num, in_mad, out_mad); - goto bail; - default: - ret = IB_MAD_RESULT_SUCCESS; - } - -bail: - return ret; -} diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c deleted file mode 100644 index e73274229..000000000 --- a/drivers/infiniband/hw/ipath/ipath_mmap.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/module.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> -#include <linux/mm.h> -#include <linux/errno.h> -#include <asm/pgtable.h> - -#include "ipath_verbs.h" - -/** - * ipath_release_mmap_info - free mmap info structure - * @ref: a pointer to the kref within struct ipath_mmap_info - */ -void ipath_release_mmap_info(struct kref *ref) -{ - struct ipath_mmap_info *ip = - container_of(ref, struct ipath_mmap_info, ref); - struct ipath_ibdev *dev = to_idev(ip->context->device); - - spin_lock_irq(&dev->pending_lock); - list_del(&ip->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - - vfree(ip->obj); - kfree(ip); -} - -/* - * open and close keep track of how many times the CQ is mapped, - * to avoid releasing it. - */ -static void ipath_vma_open(struct vm_area_struct *vma) -{ - struct ipath_mmap_info *ip = vma->vm_private_data; - - kref_get(&ip->ref); -} - -static void ipath_vma_close(struct vm_area_struct *vma) -{ - struct ipath_mmap_info *ip = vma->vm_private_data; - - kref_put(&ip->ref, ipath_release_mmap_info); -} - -static const struct vm_operations_struct ipath_vm_ops = { - .open = ipath_vma_open, - .close = ipath_vma_close, -}; - -/** - * ipath_mmap - create a new mmap region - * @context: the IB user context of the process making the mmap() call - * @vma: the VMA to be initialized - * Return zero if the mmap is OK. Otherwise, return an errno. - */ -int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - struct ipath_ibdev *dev = to_idev(context->device); - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - unsigned long size = vma->vm_end - vma->vm_start; - struct ipath_mmap_info *ip, *pp; - int ret = -EINVAL; - - /* - * Search the device's list of objects waiting for a mmap call. - * Normally, this list is very short since a call to create a - * CQ, QP, or SRQ is soon followed by a call to mmap(). - */ - spin_lock_irq(&dev->pending_lock); - list_for_each_entry_safe(ip, pp, &dev->pending_mmaps, - pending_mmaps) { - /* Only the creator is allowed to mmap the object */ - if (context != ip->context || (__u64) offset != ip->offset) - continue; - /* Don't allow a mmap larger than the object. */ - if (size > ip->size) - break; - - list_del_init(&ip->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - - ret = remap_vmalloc_range(vma, ip->obj, 0); - if (ret) - goto done; - vma->vm_ops = &ipath_vm_ops; - vma->vm_private_data = ip; - ipath_vma_open(vma); - goto done; - } - spin_unlock_irq(&dev->pending_lock); -done: - return ret; -} - -/* - * Allocate information for ipath_mmap - */ -struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev, - u32 size, - struct ib_ucontext *context, - void *obj) { - struct ipath_mmap_info *ip; - - ip = kmalloc(sizeof *ip, GFP_KERNEL); - if (!ip) - goto bail; - - size = PAGE_ALIGN(size); - - spin_lock_irq(&dev->mmap_offset_lock); - if (dev->mmap_offset == 0) - dev->mmap_offset = PAGE_SIZE; - ip->offset = dev->mmap_offset; - dev->mmap_offset += size; - spin_unlock_irq(&dev->mmap_offset_lock); - - INIT_LIST_HEAD(&ip->pending_mmaps); - ip->size = size; - ip->context = context; - ip->obj = obj; - kref_init(&ip->ref); - -bail: - return ip; -} - -void ipath_update_mmap_info(struct ipath_ibdev *dev, - struct ipath_mmap_info *ip, - u32 size, void *obj) { - size = PAGE_ALIGN(size); - - spin_lock_irq(&dev->mmap_offset_lock); - if (dev->mmap_offset == 0) - dev->mmap_offset = PAGE_SIZE; - ip->offset = dev->mmap_offset; - dev->mmap_offset += size; - spin_unlock_irq(&dev->mmap_offset_lock); - - ip->size = size; - ip->obj = obj; -} diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c deleted file mode 100644 index c7278f6a8..000000000 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ /dev/null @@ -1,425 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/slab.h> - -#include <rdma/ib_umem.h> -#include <rdma/ib_pack.h> -#include <rdma/ib_smi.h> - -#include "ipath_verbs.h" - -/* Fast memory region */ -struct ipath_fmr { - struct ib_fmr ibfmr; - u8 page_shift; - struct ipath_mregion mr; /* must be last */ -}; - -static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct ipath_fmr, ibfmr); -} - -/** - * ipath_get_dma_mr - get a DMA memory region - * @pd: protection domain for this memory region - * @acc: access flags - * - * Returns the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the - * struct ib_dma_mapping_ops functions (see ipath_dma.c). - */ -struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc) -{ - struct ipath_mr *mr; - struct ib_mr *ret; - - mr = kzalloc(sizeof *mr, GFP_KERNEL); - if (!mr) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - mr->mr.access_flags = acc; - ret = &mr->ibmr; - -bail: - return ret; -} - -static struct ipath_mr *alloc_mr(int count, - struct ipath_lkey_table *lk_table) -{ - struct ipath_mr *mr; - int m, i = 0; - - /* Allocate struct plus pointers to first level page tables. */ - m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ; - mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); - if (!mr) - goto done; - - /* Allocate first level page tables. */ - for (; i < m; i++) { - mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL); - if (!mr->mr.map[i]) - goto bail; - } - mr->mr.mapsz = m; - - /* - * ib_reg_phys_mr() will initialize mr->ibmr except for - * lkey and rkey. - */ - if (!ipath_alloc_lkey(lk_table, &mr->mr)) - goto bail; - mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey; - - goto done; - -bail: - while (i) { - i--; - kfree(mr->mr.map[i]); - } - kfree(mr); - mr = NULL; - -done: - return mr; -} - -/** - * ipath_reg_phys_mr - register a physical memory region - * @pd: protection domain for this memory region - * @buffer_list: pointer to the list of physical buffers to register - * @num_phys_buf: the number of physical buffers to register - * @iova_start: the starting address passed over IB which maps to this MR - * - * Returns the memory region on success, otherwise returns an errno. - */ -struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 *iova_start) -{ - struct ipath_mr *mr; - int n, m, i; - struct ib_mr *ret; - - mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table); - if (mr == NULL) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - mr->mr.pd = pd; - mr->mr.user_base = *iova_start; - mr->mr.iova = *iova_start; - mr->mr.length = 0; - mr->mr.offset = 0; - mr->mr.access_flags = acc; - mr->mr.max_segs = num_phys_buf; - mr->umem = NULL; - - m = 0; - n = 0; - for (i = 0; i < num_phys_buf; i++) { - mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr; - mr->mr.map[m]->segs[n].length = buffer_list[i].size; - mr->mr.length += buffer_list[i].size; - n++; - if (n == IPATH_SEGSZ) { - m++; - n = 0; - } - } - - ret = &mr->ibmr; - -bail: - return ret; -} - -/** - * ipath_reg_user_mr - register a userspace memory region - * @pd: protection domain for this memory region - * @start: starting userspace address - * @length: length of region to register - * @virt_addr: virtual address to use (from HCA's point of view) - * @mr_access_flags: access flags for this memory region - * @udata: unused by the InfiniPath driver - * - * Returns the memory region on success, otherwise returns an errno. - */ -struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_udata *udata) -{ - struct ipath_mr *mr; - struct ib_umem *umem; - int n, m, entry; - struct scatterlist *sg; - struct ib_mr *ret; - - if (length == 0) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags, 0); - if (IS_ERR(umem)) - return (void *) umem; - - n = umem->nmap; - mr = alloc_mr(n, &to_idev(pd->device)->lk_table); - if (!mr) { - ret = ERR_PTR(-ENOMEM); - ib_umem_release(umem); - goto bail; - } - - mr->mr.pd = pd; - mr->mr.user_base = start; - mr->mr.iova = virt_addr; - mr->mr.length = length; - mr->mr.offset = ib_umem_offset(umem); - mr->mr.access_flags = mr_access_flags; - mr->mr.max_segs = n; - mr->umem = umem; - - m = 0; - n = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - void *vaddr; - - vaddr = page_address(sg_page(sg)); - if (!vaddr) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - mr->mr.map[m]->segs[n].vaddr = vaddr; - mr->mr.map[m]->segs[n].length = umem->page_size; - n++; - if (n == IPATH_SEGSZ) { - m++; - n = 0; - } - } - ret = &mr->ibmr; - -bail: - return ret; -} - -/** - * ipath_dereg_mr - unregister and free a memory region - * @ibmr: the memory region to free - * - * Returns 0 on success. - * - * Note that this is called to free MRs created by ipath_get_dma_mr() - * or ipath_reg_user_mr(). - */ -int ipath_dereg_mr(struct ib_mr *ibmr) -{ - struct ipath_mr *mr = to_imr(ibmr); - int i; - - ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey); - i = mr->mr.mapsz; - while (i) { - i--; - kfree(mr->mr.map[i]); - } - - if (mr->umem) - ib_umem_release(mr->umem); - - kfree(mr); - return 0; -} - -/** - * ipath_alloc_fmr - allocate a fast memory region - * @pd: the protection domain for this memory region - * @mr_access_flags: access flags for this memory region - * @fmr_attr: fast memory region attributes - * - * Returns the memory region on success, otherwise returns an errno. - */ -struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct ipath_fmr *fmr; - int m, i = 0; - struct ib_fmr *ret; - - /* Allocate struct plus pointers to first level page tables. */ - m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ; - fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); - if (!fmr) - goto bail; - - /* Allocate first level page tables. */ - for (; i < m; i++) { - fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0], - GFP_KERNEL); - if (!fmr->mr.map[i]) - goto bail; - } - fmr->mr.mapsz = m; - - /* - * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & - * rkey. - */ - if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr)) - goto bail; - fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey; - /* - * Resources are allocated but no valid mapping (RKEY can't be - * used). - */ - fmr->mr.pd = pd; - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - fmr->mr.offset = 0; - fmr->mr.access_flags = mr_access_flags; - fmr->mr.max_segs = fmr_attr->max_pages; - fmr->page_shift = fmr_attr->page_shift; - - ret = &fmr->ibfmr; - goto done; - -bail: - while (i) - kfree(fmr->mr.map[--i]); - kfree(fmr); - ret = ERR_PTR(-ENOMEM); - -done: - return ret; -} - -/** - * ipath_map_phys_fmr - set up a fast memory region - * @ibmfr: the fast memory region to set up - * @page_list: the list of pages to associate with the fast memory region - * @list_len: the number of pages to associate with the fast memory region - * @iova: the virtual address of the start of the fast memory region - * - * This may be called from interrupt context. - */ - -int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list, - int list_len, u64 iova) -{ - struct ipath_fmr *fmr = to_ifmr(ibfmr); - struct ipath_lkey_table *rkt; - unsigned long flags; - int m, n, i; - u32 ps; - int ret; - - if (list_len > fmr->mr.max_segs) { - ret = -EINVAL; - goto bail; - } - rkt = &to_idev(ibfmr->device)->lk_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = iova; - fmr->mr.iova = iova; - ps = 1 << fmr->page_shift; - fmr->mr.length = list_len * ps; - m = 0; - n = 0; - ps = 1 << fmr->page_shift; - for (i = 0; i < list_len; i++) { - fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i]; - fmr->mr.map[m]->segs[n].length = ps; - if (++n == IPATH_SEGSZ) { - m++; - n = 0; - } - } - spin_unlock_irqrestore(&rkt->lock, flags); - ret = 0; - -bail: - return ret; -} - -/** - * ipath_unmap_fmr - unmap fast memory regions - * @fmr_list: the list of fast memory regions to unmap - * - * Returns 0 on success. - */ -int ipath_unmap_fmr(struct list_head *fmr_list) -{ - struct ipath_fmr *fmr; - struct ipath_lkey_table *rkt; - unsigned long flags; - - list_for_each_entry(fmr, fmr_list, ibfmr.list) { - rkt = &to_idev(fmr->ibfmr.device)->lk_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - spin_unlock_irqrestore(&rkt->lock, flags); - } - return 0; -} - -/** - * ipath_dealloc_fmr - deallocate a fast memory region - * @ibfmr: the fast memory region to deallocate - * - * Returns 0 on success. - */ -int ipath_dealloc_fmr(struct ib_fmr *ibfmr) -{ - struct ipath_fmr *fmr = to_ifmr(ibfmr); - int i; - - ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey); - i = fmr->mr.mapsz; - while (i) - kfree(fmr->mr.map[--i]); - kfree(fmr); - return 0; -} diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c deleted file mode 100644 index face87602..000000000 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ /dev/null @@ -1,1080 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/err.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) -#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) -#define mk_qpn(qpt, map, off) (((map) - (qpt)->map) * BITS_PER_PAGE + \ - (off)) -#define find_next_offset(map, off) find_next_zero_bit((map)->page, \ - BITS_PER_PAGE, off) - -/* - * Convert the AETH credit code into the number of credits. - */ -static u32 credit_table[31] = { - 0, /* 0 */ - 1, /* 1 */ - 2, /* 2 */ - 3, /* 3 */ - 4, /* 4 */ - 6, /* 5 */ - 8, /* 6 */ - 12, /* 7 */ - 16, /* 8 */ - 24, /* 9 */ - 32, /* A */ - 48, /* B */ - 64, /* C */ - 96, /* D */ - 128, /* E */ - 192, /* F */ - 256, /* 10 */ - 384, /* 11 */ - 512, /* 12 */ - 768, /* 13 */ - 1024, /* 14 */ - 1536, /* 15 */ - 2048, /* 16 */ - 3072, /* 17 */ - 4096, /* 18 */ - 6144, /* 19 */ - 8192, /* 1A */ - 12288, /* 1B */ - 16384, /* 1C */ - 24576, /* 1D */ - 32768 /* 1E */ -}; - - -static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map) -{ - unsigned long page = get_zeroed_page(GFP_KERNEL); - unsigned long flags; - - /* - * Free the page if someone raced with us installing it. - */ - - spin_lock_irqsave(&qpt->lock, flags); - if (map->page) - free_page(page); - else - map->page = (void *)page; - spin_unlock_irqrestore(&qpt->lock, flags); -} - - -static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type) -{ - u32 i, offset, max_scan, qpn; - struct qpn_map *map; - u32 ret = -1; - - if (type == IB_QPT_SMI) - ret = 0; - else if (type == IB_QPT_GSI) - ret = 1; - - if (ret != -1) { - map = &qpt->map[0]; - if (unlikely(!map->page)) { - get_map_page(qpt, map); - if (unlikely(!map->page)) { - ret = -ENOMEM; - goto bail; - } - } - if (!test_and_set_bit(ret, map->page)) - atomic_dec(&map->n_free); - else - ret = -EBUSY; - goto bail; - } - - qpn = qpt->last + 1; - if (qpn >= QPN_MAX) - qpn = 2; - offset = qpn & BITS_PER_PAGE_MASK; - map = &qpt->map[qpn / BITS_PER_PAGE]; - max_scan = qpt->nmaps - !offset; - for (i = 0;;) { - if (unlikely(!map->page)) { - get_map_page(qpt, map); - if (unlikely(!map->page)) - break; - } - if (likely(atomic_read(&map->n_free))) { - do { - if (!test_and_set_bit(offset, map->page)) { - atomic_dec(&map->n_free); - qpt->last = qpn; - ret = qpn; - goto bail; - } - offset = find_next_offset(map, offset); - qpn = mk_qpn(qpt, map, offset); - /* - * This test differs from alloc_pidmap(). - * If find_next_offset() does find a zero - * bit, we don't need to check for QPN - * wrapping around past our starting QPN. - * We just need to be sure we don't loop - * forever. - */ - } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); - } - /* - * In order to keep the number of pages allocated to a - * minimum, we scan the all existing pages before increasing - * the size of the bitmap table. - */ - if (++i > max_scan) { - if (qpt->nmaps == QPNMAP_ENTRIES) - break; - map = &qpt->map[qpt->nmaps++]; - offset = 0; - } else if (map < &qpt->map[qpt->nmaps]) { - ++map; - offset = 0; - } else { - map = &qpt->map[0]; - offset = 2; - } - qpn = mk_qpn(qpt, map, offset); - } - - ret = -ENOMEM; - -bail: - return ret; -} - -static void free_qpn(struct ipath_qp_table *qpt, u32 qpn) -{ - struct qpn_map *map; - - map = qpt->map + qpn / BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); - atomic_inc(&map->n_free); -} - -/** - * ipath_alloc_qpn - allocate a QP number - * @qpt: the QP table - * @qp: the QP - * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special) - * - * Allocate the next available QPN and put the QP into the hash table. - * The hash table holds a reference to the QP. - */ -static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp, - enum ib_qp_type type) -{ - unsigned long flags; - int ret; - - ret = alloc_qpn(qpt, type); - if (ret < 0) - goto bail; - qp->ibqp.qp_num = ret; - - /* Add the QP to the hash table. */ - spin_lock_irqsave(&qpt->lock, flags); - - ret %= qpt->max; - qp->next = qpt->table[ret]; - qpt->table[ret] = qp; - atomic_inc(&qp->refcount); - - spin_unlock_irqrestore(&qpt->lock, flags); - ret = 0; - -bail: - return ret; -} - -/** - * ipath_free_qp - remove a QP from the QP table - * @qpt: the QP table - * @qp: the QP to remove - * - * Remove the QP from the table so it can't be found asynchronously by - * the receive interrupt routine. - */ -static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) -{ - struct ipath_qp *q, **qpp; - unsigned long flags; - - spin_lock_irqsave(&qpt->lock, flags); - - /* Remove QP from the hash table. */ - qpp = &qpt->table[qp->ibqp.qp_num % qpt->max]; - for (; (q = *qpp) != NULL; qpp = &q->next) { - if (q == qp) { - *qpp = qp->next; - qp->next = NULL; - atomic_dec(&qp->refcount); - break; - } - } - - spin_unlock_irqrestore(&qpt->lock, flags); -} - -/** - * ipath_free_all_qps - check for QPs still in use - * @qpt: the QP table to empty - * - * There should not be any QPs still in use. - * Free memory for table. - */ -unsigned ipath_free_all_qps(struct ipath_qp_table *qpt) -{ - unsigned long flags; - struct ipath_qp *qp; - u32 n, qp_inuse = 0; - - spin_lock_irqsave(&qpt->lock, flags); - for (n = 0; n < qpt->max; n++) { - qp = qpt->table[n]; - qpt->table[n] = NULL; - - for (; qp; qp = qp->next) - qp_inuse++; - } - spin_unlock_irqrestore(&qpt->lock, flags); - - for (n = 0; n < ARRAY_SIZE(qpt->map); n++) - if (qpt->map[n].page) - free_page((unsigned long) qpt->map[n].page); - return qp_inuse; -} - -/** - * ipath_lookup_qpn - return the QP with the given QPN - * @qpt: the QP table - * @qpn: the QP number to look up - * - * The caller is responsible for decrementing the QP reference count - * when done. - */ -struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn) -{ - unsigned long flags; - struct ipath_qp *qp; - - spin_lock_irqsave(&qpt->lock, flags); - - for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) { - if (qp->ibqp.qp_num == qpn) { - atomic_inc(&qp->refcount); - break; - } - } - - spin_unlock_irqrestore(&qpt->lock, flags); - return qp; -} - -/** - * ipath_reset_qp - initialize the QP state to the reset state - * @qp: the QP to reset - * @type: the QP type - */ -static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) -{ - qp->remote_qpn = 0; - qp->qkey = 0; - qp->qp_access_flags = 0; - atomic_set(&qp->s_dma_busy, 0); - qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; - qp->s_hdrwords = 0; - qp->s_wqe = NULL; - qp->s_pkt_delay = 0; - qp->s_draining = 0; - qp->s_psn = 0; - qp->r_psn = 0; - qp->r_msn = 0; - if (type == IB_QPT_RC) { - qp->s_state = IB_OPCODE_RC_SEND_LAST; - qp->r_state = IB_OPCODE_RC_SEND_LAST; - } else { - qp->s_state = IB_OPCODE_UC_SEND_LAST; - qp->r_state = IB_OPCODE_UC_SEND_LAST; - } - qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - qp->r_nak_state = 0; - qp->r_aflags = 0; - qp->r_flags = 0; - qp->s_rnr_timeout = 0; - qp->s_head = 0; - qp->s_tail = 0; - qp->s_cur = 0; - qp->s_last = 0; - qp->s_ssn = 1; - qp->s_lsn = 0; - memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); - qp->r_head_ack_queue = 0; - qp->s_tail_ack_queue = 0; - qp->s_num_rd_atomic = 0; - if (qp->r_rq.wq) { - qp->r_rq.wq->head = 0; - qp->r_rq.wq->tail = 0; - } -} - -/** - * ipath_error_qp - put a QP into the error state - * @qp: the QP to put into the error state - * @err: the receive completion error to signal if a RWQE is active - * - * Flushes both send and receive work queues. - * Returns true if last WQE event should be generated. - * The QP s_lock should be held and interrupts disabled. - * If we are already in error state, just return. - */ - -int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ib_wc wc; - int ret = 0; - - if (qp->state == IB_QPS_ERR) - goto bail; - - qp->state = IB_QPS_ERR; - - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - - /* Schedule the sending tasklet to drain the send work queue. */ - if (qp->s_last != qp->s_head) - ipath_schedule_send(qp); - - memset(&wc, 0, sizeof(wc)); - wc.qp = &qp->ibqp; - wc.opcode = IB_WC_RECV; - - if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) { - wc.wr_id = qp->r_wr_id; - wc.status = err; - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); - } - wc.status = IB_WC_WR_FLUSH_ERR; - - if (qp->r_rq.wq) { - struct ipath_rwq *wq; - u32 head; - u32 tail; - - spin_lock(&qp->r_rq.lock); - - /* sanity check pointers before trusting them */ - wq = qp->r_rq.wq; - head = wq->head; - if (head >= qp->r_rq.size) - head = 0; - tail = wq->tail; - if (tail >= qp->r_rq.size) - tail = 0; - while (tail != head) { - wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; - if (++tail >= qp->r_rq.size) - tail = 0; - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); - } - wq->tail = tail; - - spin_unlock(&qp->r_rq.lock); - } else if (qp->ibqp.event_handler) - ret = 1; - -bail: - return ret; -} - -/** - * ipath_modify_qp - modify the attributes of a queue pair - * @ibqp: the queue pair who's attributes we're modifying - * @attr: the new attributes - * @attr_mask: the mask of attributes to modify - * @udata: user data for ipathverbs.so - * - * Returns 0 on success, otherwise returns an errno. - */ -int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) -{ - struct ipath_ibdev *dev = to_idev(ibqp->device); - struct ipath_qp *qp = to_iqp(ibqp); - enum ib_qp_state cur_state, new_state; - int lastwqe = 0; - int ret; - - spin_lock_irq(&qp->s_lock); - - cur_state = attr_mask & IB_QP_CUR_STATE ? - attr->cur_qp_state : qp->state; - new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, - attr_mask, IB_LINK_LAYER_UNSPECIFIED)) - goto inval; - - if (attr_mask & IB_QP_AV) { - if (attr->ah_attr.dlid == 0 || - attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE) - goto inval; - - if ((attr->ah_attr.ah_flags & IB_AH_GRH) && - (attr->ah_attr.grh.sgid_index > 1)) - goto inval; - } - - if (attr_mask & IB_QP_PKEY_INDEX) - if (attr->pkey_index >= ipath_get_npkeys(dev->dd)) - goto inval; - - if (attr_mask & IB_QP_MIN_RNR_TIMER) - if (attr->min_rnr_timer > 31) - goto inval; - - if (attr_mask & IB_QP_PORT) - if (attr->port_num == 0 || - attr->port_num > ibqp->device->phys_port_cnt) - goto inval; - - /* - * don't allow invalid Path MTU values or greater than 2048 - * unless we are configured for a 4KB MTU - */ - if ((attr_mask & IB_QP_PATH_MTU) && - (ib_mtu_enum_to_int(attr->path_mtu) == -1 || - (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096))) - goto inval; - - if (attr_mask & IB_QP_PATH_MIG_STATE) - if (attr->path_mig_state != IB_MIG_MIGRATED && - attr->path_mig_state != IB_MIG_REARM) - goto inval; - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC) - goto inval; - - switch (new_state) { - case IB_QPS_RESET: - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - qp->s_flags &= ~IPATH_S_ANY_WAIT; - spin_unlock_irq(&qp->s_lock); - /* Stop the sending tasklet */ - tasklet_kill(&qp->s_task); - wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); - spin_lock_irq(&qp->s_lock); - } - ipath_reset_qp(qp, ibqp->qp_type); - break; - - case IB_QPS_SQD: - qp->s_draining = qp->s_last != qp->s_cur; - qp->state = new_state; - break; - - case IB_QPS_SQE: - if (qp->ibqp.qp_type == IB_QPT_RC) - goto inval; - qp->state = new_state; - break; - - case IB_QPS_ERR: - lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); - break; - - default: - qp->state = new_state; - break; - } - - if (attr_mask & IB_QP_PKEY_INDEX) - qp->s_pkey_index = attr->pkey_index; - - if (attr_mask & IB_QP_DEST_QPN) - qp->remote_qpn = attr->dest_qp_num; - - if (attr_mask & IB_QP_SQ_PSN) { - qp->s_psn = qp->s_next_psn = attr->sq_psn; - qp->s_last_psn = qp->s_next_psn - 1; - } - - if (attr_mask & IB_QP_RQ_PSN) - qp->r_psn = attr->rq_psn; - - if (attr_mask & IB_QP_ACCESS_FLAGS) - qp->qp_access_flags = attr->qp_access_flags; - - if (attr_mask & IB_QP_AV) { - qp->remote_ah_attr = attr->ah_attr; - qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate); - } - - if (attr_mask & IB_QP_PATH_MTU) - qp->path_mtu = attr->path_mtu; - - if (attr_mask & IB_QP_RETRY_CNT) - qp->s_retry = qp->s_retry_cnt = attr->retry_cnt; - - if (attr_mask & IB_QP_RNR_RETRY) { - qp->s_rnr_retry = attr->rnr_retry; - if (qp->s_rnr_retry > 7) - qp->s_rnr_retry = 7; - qp->s_rnr_retry_cnt = qp->s_rnr_retry; - } - - if (attr_mask & IB_QP_MIN_RNR_TIMER) - qp->r_min_rnr_timer = attr->min_rnr_timer; - - if (attr_mask & IB_QP_TIMEOUT) - qp->timeout = attr->timeout; - - if (attr_mask & IB_QP_QKEY) - qp->qkey = attr->qkey; - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - qp->r_max_rd_atomic = attr->max_dest_rd_atomic; - - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) - qp->s_max_rd_atomic = attr->max_rd_atomic; - - spin_unlock_irq(&qp->s_lock); - - if (lastwqe) { - struct ib_event ev; - - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = IB_EVENT_QP_LAST_WQE_REACHED; - qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); - } - ret = 0; - goto bail; - -inval: - spin_unlock_irq(&qp->s_lock); - ret = -EINVAL; - -bail: - return ret; -} - -int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_qp_init_attr *init_attr) -{ - struct ipath_qp *qp = to_iqp(ibqp); - - attr->qp_state = qp->state; - attr->cur_qp_state = attr->qp_state; - attr->path_mtu = qp->path_mtu; - attr->path_mig_state = 0; - attr->qkey = qp->qkey; - attr->rq_psn = qp->r_psn; - attr->sq_psn = qp->s_next_psn; - attr->dest_qp_num = qp->remote_qpn; - attr->qp_access_flags = qp->qp_access_flags; - attr->cap.max_send_wr = qp->s_size - 1; - attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; - attr->cap.max_send_sge = qp->s_max_sge; - attr->cap.max_recv_sge = qp->r_rq.max_sge; - attr->cap.max_inline_data = 0; - attr->ah_attr = qp->remote_ah_attr; - memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr)); - attr->pkey_index = qp->s_pkey_index; - attr->alt_pkey_index = 0; - attr->en_sqd_async_notify = 0; - attr->sq_draining = qp->s_draining; - attr->max_rd_atomic = qp->s_max_rd_atomic; - attr->max_dest_rd_atomic = qp->r_max_rd_atomic; - attr->min_rnr_timer = qp->r_min_rnr_timer; - attr->port_num = 1; - attr->timeout = qp->timeout; - attr->retry_cnt = qp->s_retry_cnt; - attr->rnr_retry = qp->s_rnr_retry_cnt; - attr->alt_port_num = 0; - attr->alt_timeout = 0; - - init_attr->event_handler = qp->ibqp.event_handler; - init_attr->qp_context = qp->ibqp.qp_context; - init_attr->send_cq = qp->ibqp.send_cq; - init_attr->recv_cq = qp->ibqp.recv_cq; - init_attr->srq = qp->ibqp.srq; - init_attr->cap = attr->cap; - if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR) - init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; - else - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr->qp_type = qp->ibqp.qp_type; - init_attr->port_num = 1; - return 0; -} - -/** - * ipath_compute_aeth - compute the AETH (syndrome + MSN) - * @qp: the queue pair to compute the AETH for - * - * Returns the AETH. - */ -__be32 ipath_compute_aeth(struct ipath_qp *qp) -{ - u32 aeth = qp->r_msn & IPATH_MSN_MASK; - - if (qp->ibqp.srq) { - /* - * Shared receive queues don't generate credits. - * Set the credit field to the invalid value. - */ - aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT; - } else { - u32 min, max, x; - u32 credits; - struct ipath_rwq *wq = qp->r_rq.wq; - u32 head; - u32 tail; - - /* sanity check pointers before trusting them */ - head = wq->head; - if (head >= qp->r_rq.size) - head = 0; - tail = wq->tail; - if (tail >= qp->r_rq.size) - tail = 0; - /* - * Compute the number of credits available (RWQEs). - * XXX Not holding the r_rq.lock here so there is a small - * chance that the pair of reads are not atomic. - */ - credits = head - tail; - if ((int)credits < 0) - credits += qp->r_rq.size; - /* - * Binary search the credit table to find the code to - * use. - */ - min = 0; - max = 31; - for (;;) { - x = (min + max) / 2; - if (credit_table[x] == credits) - break; - if (credit_table[x] > credits) - max = x; - else if (min == x) - break; - else - min = x; - } - aeth |= x << IPATH_AETH_CREDIT_SHIFT; - } - return cpu_to_be32(aeth); -} - -/** - * ipath_create_qp - create a queue pair for a device - * @ibpd: the protection domain who's device we create the queue pair for - * @init_attr: the attributes of the queue pair - * @udata: unused by InfiniPath - * - * Returns the queue pair on success, otherwise returns an errno. - * - * Called by the ib_create_qp() core verbs function. - */ -struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) -{ - struct ipath_qp *qp; - int err; - struct ipath_swqe *swq = NULL; - struct ipath_ibdev *dev; - size_t sz; - size_t sg_list_sz; - struct ib_qp *ret; - - if (init_attr->create_flags) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - if (init_attr->cap.max_send_sge > ib_ipath_max_sges || - init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - /* Check receive queue parameters if no SRQ is specified. */ - if (!init_attr->srq) { - if (init_attr->cap.max_recv_sge > ib_ipath_max_sges || - init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - if (init_attr->cap.max_send_sge + - init_attr->cap.max_send_wr + - init_attr->cap.max_recv_sge + - init_attr->cap.max_recv_wr == 0) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - } - - switch (init_attr->qp_type) { - case IB_QPT_UC: - case IB_QPT_RC: - case IB_QPT_UD: - case IB_QPT_SMI: - case IB_QPT_GSI: - sz = sizeof(struct ipath_sge) * - init_attr->cap.max_send_sge + - sizeof(struct ipath_swqe); - swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); - if (swq == NULL) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - sz = sizeof(*qp); - sg_list_sz = 0; - if (init_attr->srq) { - struct ipath_srq *srq = to_isrq(init_attr->srq); - - if (srq->rq.max_sge > 1) - sg_list_sz = sizeof(*qp->r_sg_list) * - (srq->rq.max_sge - 1); - } else if (init_attr->cap.max_recv_sge > 1) - sg_list_sz = sizeof(*qp->r_sg_list) * - (init_attr->cap.max_recv_sge - 1); - qp = kmalloc(sz + sg_list_sz, GFP_KERNEL); - if (!qp) { - ret = ERR_PTR(-ENOMEM); - goto bail_swq; - } - if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD || - init_attr->qp_type == IB_QPT_SMI || - init_attr->qp_type == IB_QPT_GSI)) { - qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL); - if (!qp->r_ud_sg_list) { - ret = ERR_PTR(-ENOMEM); - goto bail_qp; - } - } else - qp->r_ud_sg_list = NULL; - if (init_attr->srq) { - sz = 0; - qp->r_rq.size = 0; - qp->r_rq.max_sge = 0; - qp->r_rq.wq = NULL; - init_attr->cap.max_recv_wr = 0; - init_attr->cap.max_recv_sge = 0; - } else { - qp->r_rq.size = init_attr->cap.max_recv_wr + 1; - qp->r_rq.max_sge = init_attr->cap.max_recv_sge; - sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + - sizeof(struct ipath_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + - qp->r_rq.size * sz); - if (!qp->r_rq.wq) { - ret = ERR_PTR(-ENOMEM); - goto bail_sg_list; - } - } - - /* - * ib_create_qp() will initialize qp->ibqp - * except for qp->ibqp.qp_num. - */ - spin_lock_init(&qp->s_lock); - spin_lock_init(&qp->r_rq.lock); - atomic_set(&qp->refcount, 0); - init_waitqueue_head(&qp->wait); - init_waitqueue_head(&qp->wait_dma); - tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); - INIT_LIST_HEAD(&qp->piowait); - INIT_LIST_HEAD(&qp->timerwait); - qp->state = IB_QPS_RESET; - qp->s_wq = swq; - qp->s_size = init_attr->cap.max_send_wr + 1; - qp->s_max_sge = init_attr->cap.max_send_sge; - if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) - qp->s_flags = IPATH_S_SIGNAL_REQ_WR; - else - qp->s_flags = 0; - dev = to_idev(ibpd->device); - err = ipath_alloc_qpn(&dev->qp_table, qp, - init_attr->qp_type); - if (err) { - ret = ERR_PTR(err); - vfree(qp->r_rq.wq); - goto bail_sg_list; - } - qp->ip = NULL; - qp->s_tx = NULL; - ipath_reset_qp(qp, init_attr->qp_type); - break; - - default: - /* Don't support raw QPs */ - ret = ERR_PTR(-ENOSYS); - goto bail; - } - - init_attr->cap.max_inline_data = 0; - - /* - * Return the address of the RWQ as the offset to mmap. - * See ipath_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - if (!qp->r_rq.wq) { - __u64 offset = 0; - - err = ib_copy_to_udata(udata, &offset, - sizeof(offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else { - u32 s = sizeof(struct ipath_rwq) + - qp->r_rq.size * sz; - - qp->ip = - ipath_create_mmap_info(dev, s, - ibpd->uobject->context, - qp->r_rq.wq); - if (!qp->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - err = ib_copy_to_udata(udata, &(qp->ip->offset), - sizeof(qp->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } - } - - spin_lock(&dev->n_qps_lock); - if (dev->n_qps_allocated == ib_ipath_max_qps) { - spin_unlock(&dev->n_qps_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_qps_allocated++; - spin_unlock(&dev->n_qps_lock); - - if (qp->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - } - - ret = &qp->ibqp; - goto bail; - -bail_ip: - if (qp->ip) - kref_put(&qp->ip->ref, ipath_release_mmap_info); - else - vfree(qp->r_rq.wq); - ipath_free_qp(&dev->qp_table, qp); - free_qpn(&dev->qp_table, qp->ibqp.qp_num); -bail_sg_list: - kfree(qp->r_ud_sg_list); -bail_qp: - kfree(qp); -bail_swq: - vfree(swq); -bail: - return ret; -} - -/** - * ipath_destroy_qp - destroy a queue pair - * @ibqp: the queue pair to destroy - * - * Returns 0 on success. - * - * Note that this can be called while the QP is actively sending or - * receiving! - */ -int ipath_destroy_qp(struct ib_qp *ibqp) -{ - struct ipath_qp *qp = to_iqp(ibqp); - struct ipath_ibdev *dev = to_idev(ibqp->device); - - /* Make sure HW and driver activity is stopped. */ - spin_lock_irq(&qp->s_lock); - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - qp->s_flags &= ~IPATH_S_ANY_WAIT; - spin_unlock_irq(&qp->s_lock); - /* Stop the sending tasklet */ - tasklet_kill(&qp->s_task); - wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); - } else - spin_unlock_irq(&qp->s_lock); - - ipath_free_qp(&dev->qp_table, qp); - - if (qp->s_tx) { - atomic_dec(&qp->refcount); - if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) - kfree(qp->s_tx->txreq.map_addr); - spin_lock_irq(&dev->pending_lock); - list_add(&qp->s_tx->txreq.list, &dev->txreq_free); - spin_unlock_irq(&dev->pending_lock); - qp->s_tx = NULL; - } - - wait_event(qp->wait, !atomic_read(&qp->refcount)); - - /* all user's cleaned up, mark it available */ - free_qpn(&dev->qp_table, qp->ibqp.qp_num); - spin_lock(&dev->n_qps_lock); - dev->n_qps_allocated--; - spin_unlock(&dev->n_qps_lock); - - if (qp->ip) - kref_put(&qp->ip->ref, ipath_release_mmap_info); - else - vfree(qp->r_rq.wq); - kfree(qp->r_ud_sg_list); - vfree(qp->s_wq); - kfree(qp); - return 0; -} - -/** - * ipath_init_qp_table - initialize the QP table for a device - * @idev: the device who's QP table we're initializing - * @size: the size of the QP table - * - * Returns 0 on success, otherwise returns an errno. - */ -int ipath_init_qp_table(struct ipath_ibdev *idev, int size) -{ - int i; - int ret; - - idev->qp_table.last = 1; /* QPN 0 and 1 are special. */ - idev->qp_table.max = size; - idev->qp_table.nmaps = 1; - idev->qp_table.table = kzalloc(size * sizeof(*idev->qp_table.table), - GFP_KERNEL); - if (idev->qp_table.table == NULL) { - ret = -ENOMEM; - goto bail; - } - - for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) { - atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE); - idev->qp_table.map[i].page = NULL; - } - - ret = 0; - -bail: - return ret; -} - -/** - * ipath_get_credit - flush the send work queue of a QP - * @qp: the qp who's send work queue to flush - * @aeth: the Acknowledge Extended Transport Header - * - * The QP s_lock should be held. - */ -void ipath_get_credit(struct ipath_qp *qp, u32 aeth) -{ - u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK; - - /* - * If the credit is invalid, we can send - * as many packets as we like. Otherwise, we have to - * honor the credit field. - */ - if (credit == IPATH_AETH_CREDIT_INVAL) - qp->s_lsn = (u32) -1; - else if (qp->s_lsn != (u32) -1) { - /* Compute new LSN (i.e., MSN + credit) */ - credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK; - if (ipath_cmp24(credit, qp->s_lsn) > 0) - qp->s_lsn = credit; - } - - /* Restart sending if it was blocked due to lack of credits. */ - if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) && - qp->s_cur != qp->s_head && - (qp->s_lsn == (u32) -1 || - ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, - qp->s_lsn + 1) <= 0)) - ipath_schedule_send(qp); -} diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c deleted file mode 100644 index 79b3dbc97..000000000 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ /dev/null @@ -1,1969 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/io.h> - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -/* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_RC_##x - -static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe, - u32 psn, u32 pmtu) -{ - u32 len; - - len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; - ss->sge = wqe->sg_list[0]; - ss->sg_list = wqe->sg_list + 1; - ss->num_sge = wqe->wr.num_sge; - ipath_skip_sge(ss, len); - return wqe->length - len; -} - -/** - * ipath_init_restart- initialize the qp->s_sge after a restart - * @qp: the QP who's SGE we're restarting - * @wqe: the work queue to initialize the QP's SGE from - * - * The QP s_lock should be held and interrupts disabled. - */ -static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) -{ - struct ipath_ibdev *dev; - - qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, - ib_mtu_enum_to_int(qp->path_mtu)); - dev = to_idev(qp->ibqp.device); - spin_lock(&dev->pending_lock); - if (list_empty(&qp->timerwait)) - list_add_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); - spin_unlock(&dev->pending_lock); -} - -/** - * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) - * @qp: a pointer to the QP - * @ohdr: a pointer to the IB header being constructed - * @pmtu: the path MTU - * - * Return 1 if constructed; otherwise, return 0. - * Note that we are in the responder's side of the QP context. - * Note the QP s_lock must be held. - */ -static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, - struct ipath_other_headers *ohdr, u32 pmtu) -{ - struct ipath_ack_entry *e; - u32 hwords; - u32 len; - u32 bth0; - u32 bth2; - - /* Don't send an ACK if we aren't supposed to. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) - goto bail; - - /* header size in 32-bit words LRH+BTH = (8+12)/4. */ - hwords = 5; - - switch (qp->s_ack_state) { - case OP(RDMA_READ_RESPONSE_LAST): - case OP(RDMA_READ_RESPONSE_ONLY): - case OP(ATOMIC_ACKNOWLEDGE): - /* - * We can increment the tail pointer now that the last - * response has been sent instead of only being - * constructed. - */ - if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) - qp->s_tail_ack_queue = 0; - /* FALLTHROUGH */ - case OP(SEND_ONLY): - case OP(ACKNOWLEDGE): - /* Check for no next entry in the queue. */ - if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { - if (qp->s_flags & IPATH_S_ACK_PENDING) - goto normal; - qp->s_ack_state = OP(ACKNOWLEDGE); - goto bail; - } - - e = &qp->s_ack_queue[qp->s_tail_ack_queue]; - if (e->opcode == OP(RDMA_READ_REQUEST)) { - /* Copy SGE state in case we need to resend */ - qp->s_ack_rdma_sge = e->rdma_sge; - qp->s_cur_sge = &qp->s_ack_rdma_sge; - len = e->rdma_sge.sge.sge_length; - if (len > pmtu) { - len = pmtu; - qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); - } else { - qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); - e->sent = 1; - } - ohdr->u.aeth = ipath_compute_aeth(qp); - hwords++; - qp->s_ack_rdma_psn = e->psn; - bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; - } else { - /* COMPARE_SWAP or FETCH_ADD */ - qp->s_cur_sge = NULL; - len = 0; - qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); - ohdr->u.at.aeth = ipath_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth[0] = - cpu_to_be32(e->atomic_data >> 32); - ohdr->u.at.atomic_ack_eth[1] = - cpu_to_be32(e->atomic_data); - hwords += sizeof(ohdr->u.at) / sizeof(u32); - bth2 = e->psn; - e->sent = 1; - } - bth0 = qp->s_ack_state << 24; - break; - - case OP(RDMA_READ_RESPONSE_FIRST): - qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); - /* FALLTHROUGH */ - case OP(RDMA_READ_RESPONSE_MIDDLE): - len = qp->s_ack_rdma_sge.sge.sge_length; - if (len > pmtu) - len = pmtu; - else { - ohdr->u.aeth = ipath_compute_aeth(qp); - hwords++; - qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); - qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1; - } - bth0 = qp->s_ack_state << 24; - bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; - break; - - default: - normal: - /* - * Send a regular ACK. - * Set the s_ack_state so we wait until after sending - * the ACK before setting s_ack_state to ACKNOWLEDGE - * (see above). - */ - qp->s_ack_state = OP(SEND_ONLY); - qp->s_flags &= ~IPATH_S_ACK_PENDING; - qp->s_cur_sge = NULL; - if (qp->s_nak_state) - ohdr->u.aeth = - cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | - (qp->s_nak_state << - IPATH_AETH_CREDIT_SHIFT)); - else - ohdr->u.aeth = ipath_compute_aeth(qp); - hwords++; - len = 0; - bth0 = OP(ACKNOWLEDGE) << 24; - bth2 = qp->s_ack_psn & IPATH_PSN_MASK; - } - qp->s_hdrwords = hwords; - qp->s_cur_size = len; - ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2); - return 1; - -bail: - return 0; -} - -/** - * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) - * @qp: a pointer to the QP - * - * Return 1 if constructed; otherwise, return 0. - */ -int ipath_make_rc_req(struct ipath_qp *qp) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_other_headers *ohdr; - struct ipath_sge_state *ss; - struct ipath_swqe *wqe; - u32 hwords; - u32 len; - u32 bth0; - u32 bth2; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); - char newreq; - unsigned long flags; - int ret = 0; - - ohdr = &qp->s_hdr.u.oth; - if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; - - /* - * The lock is needed to synchronize between the sending tasklet, - * the receive interrupt handler, and timeout resends. - */ - spin_lock_irqsave(&qp->s_lock, flags); - - /* Sending responses has higher priority over sending requests. */ - if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || - (qp->s_flags & IPATH_S_ACK_PENDING) || - qp->s_ack_state != OP(ACKNOWLEDGE)) && - ipath_make_rc_ack(dev, qp, ohdr, pmtu)) - goto done; - - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { - if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) - goto bail; - /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) - goto bail; - /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_dma_busy)) { - qp->s_flags |= IPATH_S_WAIT_DMA; - goto bail; - } - wqe = get_swqe_ptr(qp, qp->s_last); - ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); - goto done; - } - - /* Leave BUSY set until RNR timeout. */ - if (qp->s_rnr_timeout) { - qp->s_flags |= IPATH_S_WAITING; - goto bail; - } - - /* header size in 32-bit words LRH+BTH = (8+12)/4. */ - hwords = 5; - bth0 = 1 << 22; /* Set M bit */ - - /* Send a request. */ - wqe = get_swqe_ptr(qp, qp->s_cur); - switch (qp->s_state) { - default: - if (!(ib_ipath_state_ops[qp->state] & - IPATH_PROCESS_NEXT_SEND_OK)) - goto bail; - /* - * Resend an old request or start a new one. - * - * We keep track of the current SWQE so that - * we don't reset the "furthest progress" state - * if we need to back up. - */ - newreq = 0; - if (qp->s_cur == qp->s_tail) { - /* Check if send work queue is empty. */ - if (qp->s_tail == qp->s_head) - goto bail; - /* - * If a fence is requested, wait for previous - * RDMA read and atomic operations to finish. - */ - if ((wqe->wr.send_flags & IB_SEND_FENCE) && - qp->s_num_rd_atomic) { - qp->s_flags |= IPATH_S_FENCE_PENDING; - goto bail; - } - wqe->psn = qp->s_next_psn; - newreq = 1; - } - /* - * Note that we have to be careful not to modify the - * original work request since we may need to resend - * it. - */ - len = wqe->length; - ss = &qp->s_sge; - bth2 = 0; - switch (wqe->wr.opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: - /* If no credit, return. */ - if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; - goto bail; - } - wqe->lpsn = wqe->psn; - if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; - qp->s_state = OP(SEND_FIRST); - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_SEND) - qp->s_state = OP(SEND_ONLY); - else { - qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - } - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - bth2 = 1 << 31; /* Request ACK. */ - if (++qp->s_cur == qp->s_size) - qp->s_cur = 0; - break; - - case IB_WR_RDMA_WRITE: - if (newreq && qp->s_lsn != (u32) -1) - qp->s_lsn++; - /* FALLTHROUGH */ - case IB_WR_RDMA_WRITE_WITH_IMM: - /* If no credit, return. */ - if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; - goto bail; - } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); - ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); - ohdr->u.rc.reth.length = cpu_to_be32(len); - hwords += sizeof(struct ib_reth) / sizeof(u32); - wqe->lpsn = wqe->psn; - if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; - qp->s_state = OP(RDMA_WRITE_FIRST); - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) - qp->s_state = OP(RDMA_WRITE_ONLY); - else { - qp->s_state = - OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); - /* Immediate data comes after RETH */ - ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - } - bth2 = 1 << 31; /* Request ACK. */ - if (++qp->s_cur == qp->s_size) - qp->s_cur = 0; - break; - - case IB_WR_RDMA_READ: - /* - * Don't allow more operations to be started - * than the QP limits allow. - */ - if (newreq) { - if (qp->s_num_rd_atomic >= - qp->s_max_rd_atomic) { - qp->s_flags |= IPATH_S_RDMAR_PENDING; - goto bail; - } - qp->s_num_rd_atomic++; - if (qp->s_lsn != (u32) -1) - qp->s_lsn++; - /* - * Adjust s_next_psn to count the - * expected number of responses. - */ - if (len > pmtu) - qp->s_next_psn += (len - 1) / pmtu; - wqe->lpsn = qp->s_next_psn++; - } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); - ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); - ohdr->u.rc.reth.length = cpu_to_be32(len); - qp->s_state = OP(RDMA_READ_REQUEST); - hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); - ss = NULL; - len = 0; - if (++qp->s_cur == qp->s_size) - qp->s_cur = 0; - break; - - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - /* - * Don't allow more operations to be started - * than the QP limits allow. - */ - if (newreq) { - if (qp->s_num_rd_atomic >= - qp->s_max_rd_atomic) { - qp->s_flags |= IPATH_S_RDMAR_PENDING; - goto bail; - } - qp->s_num_rd_atomic++; - if (qp->s_lsn != (u32) -1) - qp->s_lsn++; - wqe->lpsn = wqe->psn; - } - if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - qp->s_state = OP(COMPARE_SWAP); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); - } else { - qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); - ohdr->u.atomic_eth.compare_data = 0; - } - ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr >> 32); - ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr); - ohdr->u.atomic_eth.rkey = cpu_to_be32( - wqe->wr.wr.atomic.rkey); - hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); - ss = NULL; - len = 0; - if (++qp->s_cur == qp->s_size) - qp->s_cur = 0; - break; - - default: - goto bail; - } - qp->s_sge.sge = wqe->sg_list[0]; - qp->s_sge.sg_list = wqe->sg_list + 1; - qp->s_sge.num_sge = wqe->wr.num_sge; - qp->s_len = wqe->length; - if (newreq) { - qp->s_tail++; - if (qp->s_tail >= qp->s_size) - qp->s_tail = 0; - } - bth2 |= qp->s_psn & IPATH_PSN_MASK; - if (wqe->wr.opcode == IB_WR_RDMA_READ) - qp->s_psn = wqe->lpsn + 1; - else { - qp->s_psn++; - if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - } - /* - * Put the QP on the pending list so lost ACKs will cause - * a retry. More than one request can be pending so the - * QP may already be on the dev->pending list. - */ - spin_lock(&dev->pending_lock); - if (list_empty(&qp->timerwait)) - list_add_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); - spin_unlock(&dev->pending_lock); - break; - - case OP(RDMA_READ_RESPONSE_FIRST): - /* - * This case can only happen if a send is restarted. - * See ipath_restart_rc(). - */ - ipath_init_restart(qp, wqe); - /* FALLTHROUGH */ - case OP(SEND_FIRST): - qp->s_state = OP(SEND_MIDDLE); - /* FALLTHROUGH */ - case OP(SEND_MIDDLE): - bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - ss = &qp->s_sge; - len = qp->s_len; - if (len > pmtu) { - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_SEND) - qp->s_state = OP(SEND_LAST); - else { - qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - } - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - bth2 |= 1 << 31; /* Request ACK. */ - qp->s_cur++; - if (qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - - case OP(RDMA_READ_RESPONSE_LAST): - /* - * This case can only happen if a RDMA write is restarted. - * See ipath_restart_rc(). - */ - ipath_init_restart(qp, wqe); - /* FALLTHROUGH */ - case OP(RDMA_WRITE_FIRST): - qp->s_state = OP(RDMA_WRITE_MIDDLE); - /* FALLTHROUGH */ - case OP(RDMA_WRITE_MIDDLE): - bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - ss = &qp->s_sge; - len = qp->s_len; - if (len > pmtu) { - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) - qp->s_state = OP(RDMA_WRITE_LAST); - else { - qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - } - bth2 |= 1 << 31; /* Request ACK. */ - qp->s_cur++; - if (qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - - case OP(RDMA_READ_RESPONSE_MIDDLE): - /* - * This case can only happen if a RDMA read is restarted. - * See ipath_restart_rc(). - */ - ipath_init_restart(qp, wqe); - len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); - ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); - ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); - qp->s_state = OP(RDMA_READ_REQUEST); - hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); - bth2 = qp->s_psn & IPATH_PSN_MASK; - qp->s_psn = wqe->lpsn + 1; - ss = NULL; - len = 0; - qp->s_cur++; - if (qp->s_cur == qp->s_size) - qp->s_cur = 0; - break; - } - if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) - bth2 |= 1 << 31; /* Request ACK. */ - qp->s_len -= len; - qp->s_hdrwords = hwords; - qp->s_cur_sge = ss; - qp->s_cur_size = len; - ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); -done: - ret = 1; - goto unlock; - -bail: - qp->s_flags &= ~IPATH_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); - return ret; -} - -/** - * send_rc_ack - Construct an ACK packet and send it - * @qp: a pointer to the QP - * - * This is called from ipath_rc_rcv() and only uses the receive - * side QP state. - * Note that RDMA reads and atomics are handled in the - * send side QP state and tasklet. - */ -static void send_rc_ack(struct ipath_qp *qp) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_devdata *dd; - u16 lrh0; - u32 bth0; - u32 hwords; - u32 __iomem *piobuf; - struct ipath_ib_header hdr; - struct ipath_other_headers *ohdr; - unsigned long flags; - - spin_lock_irqsave(&qp->s_lock, flags); - - /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ - if (qp->r_head_ack_queue != qp->s_tail_ack_queue || - (qp->s_flags & IPATH_S_ACK_PENDING) || - qp->s_ack_state != OP(ACKNOWLEDGE)) - goto queue_ack; - - spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Don't try to send ACKs if the link isn't ACTIVE */ - dd = dev->dd; - if (!(dd->ipath_flags & IPATH_LINKACTIVE)) - goto done; - - piobuf = ipath_getpiobuf(dd, 0, NULL); - if (!piobuf) { - /* - * We are out of PIO buffers at the moment. - * Pass responsibility for sending the ACK to the - * send tasklet so that when a PIO buffer becomes - * available, the ACK is sent ahead of other outgoing - * packets. - */ - spin_lock_irqsave(&qp->s_lock, flags); - goto queue_ack; - } - - /* Construct the header. */ - ohdr = &hdr.u.oth; - lrh0 = IPATH_LRH_BTH; - /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */ - hwords = 6; - if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - hwords += ipath_make_grh(dev, &hdr.u.l.grh, - &qp->remote_ah_attr.grh, - hwords, 0); - ohdr = &hdr.u.l.oth; - lrh0 = IPATH_LRH_GRH; - } - /* read pkey_index w/o lock (its atomic) */ - bth0 = ipath_get_pkey(dd, qp->s_pkey_index) | - (OP(ACKNOWLEDGE) << 24) | (1 << 22); - if (qp->r_nak_state) - ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | - (qp->r_nak_state << - IPATH_AETH_CREDIT_SHIFT)); - else - ohdr->u.aeth = ipath_compute_aeth(qp); - lrh0 |= qp->remote_ah_attr.sl << 4; - hdr.lrh[0] = cpu_to_be16(lrh0); - hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(dd->ipath_lid | - qp->remote_ah_attr.src_path_bits); - ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); - ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); - - writeq(hwords + 1, piobuf); - - if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { - u32 *hdrp = (u32 *) &hdr; - - ipath_flush_wc(); - __iowrite32_copy(piobuf + 2, hdrp, hwords - 1); - ipath_flush_wc(); - __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1); - } else - __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords); - - ipath_flush_wc(); - - dev->n_unicast_xmit++; - goto done; - -queue_ack: - if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) { - dev->n_rc_qacks++; - qp->s_flags |= IPATH_S_ACK_PENDING; - qp->s_nak_state = qp->r_nak_state; - qp->s_ack_psn = qp->r_ack_psn; - - /* Schedule the send tasklet. */ - ipath_schedule_send(qp); - } - spin_unlock_irqrestore(&qp->s_lock, flags); -done: - return; -} - -/** - * reset_psn - reset the QP state to send starting from PSN - * @qp: the QP - * @psn: the packet sequence number to restart at - * - * This is called from ipath_rc_rcv() to process an incoming RC ACK - * for the given QP. - * Called at interrupt level with the QP s_lock held. - */ -static void reset_psn(struct ipath_qp *qp, u32 psn) -{ - u32 n = qp->s_last; - struct ipath_swqe *wqe = get_swqe_ptr(qp, n); - u32 opcode; - - qp->s_cur = n; - - /* - * If we are starting the request from the beginning, - * let the normal send code handle initialization. - */ - if (ipath_cmp24(psn, wqe->psn) <= 0) { - qp->s_state = OP(SEND_LAST); - goto done; - } - - /* Find the work request opcode corresponding to the given PSN. */ - opcode = wqe->wr.opcode; - for (;;) { - int diff; - - if (++n == qp->s_size) - n = 0; - if (n == qp->s_tail) - break; - wqe = get_swqe_ptr(qp, n); - diff = ipath_cmp24(psn, wqe->psn); - if (diff < 0) - break; - qp->s_cur = n; - /* - * If we are starting the request from the beginning, - * let the normal send code handle initialization. - */ - if (diff == 0) { - qp->s_state = OP(SEND_LAST); - goto done; - } - opcode = wqe->wr.opcode; - } - - /* - * Set the state to restart in the middle of a request. - * Don't change the s_sge, s_cur_sge, or s_cur_size. - * See ipath_make_rc_req(). - */ - switch (opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: - qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); - break; - - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - qp->s_state = OP(RDMA_READ_RESPONSE_LAST); - break; - - case IB_WR_RDMA_READ: - qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); - break; - - default: - /* - * This case shouldn't happen since its only - * one PSN per req. - */ - qp->s_state = OP(SEND_LAST); - } -done: - qp->s_psn = psn; -} - -/** - * ipath_restart_rc - back up requester to resend the last un-ACKed request - * @qp: the QP to restart - * @psn: packet sequence number for the request - * @wc: the work completion request - * - * The QP s_lock should be held and interrupts disabled. - */ -void ipath_restart_rc(struct ipath_qp *qp, u32 psn) -{ - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - struct ipath_ibdev *dev; - - if (qp->s_retry == 0) { - ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); - ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); - goto bail; - } - qp->s_retry--; - - /* - * Remove the QP from the timeout queue. - * Note: it may already have been removed by ipath_ib_timer(). - */ - dev = to_idev(qp->ibqp.device); - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - - if (wqe->wr.opcode == IB_WR_RDMA_READ) - dev->n_rc_resends++; - else - dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; - - reset_psn(qp, psn); - ipath_schedule_send(qp); - -bail: - return; -} - -static inline void update_last_psn(struct ipath_qp *qp, u32 psn) -{ - qp->s_last_psn = psn; -} - -/** - * do_rc_ack - process an incoming RC ACK - * @qp: the QP the ACK came in on - * @psn: the packet sequence number of the ACK - * @opcode: the opcode of the request that resulted in the ACK - * - * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK - * for the given QP. - * Called at interrupt level with the QP s_lock held and interrupts disabled. - * Returns 1 if OK, 0 if current operation should be aborted (NAK). - */ -static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, - u64 val) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ib_wc wc; - enum ib_wc_status status; - struct ipath_swqe *wqe; - int ret = 0; - u32 ack_psn; - int diff; - - /* - * Remove the QP from the timeout queue (or RNR timeout queue). - * If ipath_ib_timer() has already removed it, - * it's OK since we hold the QP s_lock and ipath_restart_rc() - * just won't find anything to restart if we ACK everything. - */ - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - spin_unlock(&dev->pending_lock); - - /* - * Note that NAKs implicitly ACK outstanding SEND and RDMA write - * requests and implicitly NAK RDMA read and atomic requests issued - * before the NAK'ed request. The MSN won't include the NAK'ed - * request but will include an ACK'ed request(s). - */ - ack_psn = psn; - if (aeth >> 29) - ack_psn--; - wqe = get_swqe_ptr(qp, qp->s_last); - - /* - * The MSN might be for a later WQE than the PSN indicates so - * only complete WQEs that the PSN finishes. - */ - while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) { - /* - * RDMA_READ_RESPONSE_ONLY is a special case since - * we want to generate completion events for everything - * before the RDMA read, copy the data, then generate - * the completion for the read. - */ - if (wqe->wr.opcode == IB_WR_RDMA_READ && - opcode == OP(RDMA_READ_RESPONSE_ONLY) && - diff == 0) { - ret = 1; - goto bail; - } - /* - * If this request is a RDMA read or atomic, and the ACK is - * for a later operation, this ACK NAKs the RDMA read or - * atomic. In other words, only a RDMA_READ_LAST or ONLY - * can ACK a RDMA read and likewise for atomic ops. Note - * that the NAK case can only happen if relaxed ordering is - * used and requests are sent after an RDMA read or atomic - * is sent but before the response is received. - */ - if ((wqe->wr.opcode == IB_WR_RDMA_READ && - (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || - ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && - (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { - /* - * The last valid PSN seen is the previous - * request's. - */ - update_last_psn(qp, wqe->psn - 1); - /* Retry this request. */ - ipath_restart_rc(qp, wqe->psn); - /* - * No need to process the ACK/NAK since we are - * restarting an earlier request. - */ - goto bail; - } - if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - *(u64 *) wqe->sg_list[0].vaddr = val; - if (qp->s_num_rd_atomic && - (wqe->wr.opcode == IB_WR_RDMA_READ || - wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { - qp->s_num_rd_atomic--; - /* Restart sending task if fence is complete */ - if (((qp->s_flags & IPATH_S_FENCE_PENDING) && - !qp->s_num_rd_atomic) || - qp->s_flags & IPATH_S_RDMAR_PENDING) - ipath_schedule_send(qp); - } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof wc); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); - } - qp->s_retry = qp->s_retry_cnt; - /* - * If we are completing a request which is in the process of - * being resent, we can stop resending it since we know the - * responder has already seen it. - */ - if (qp->s_last == qp->s_cur) { - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; - qp->s_last = qp->s_cur; - if (qp->s_last == qp->s_tail) - break; - wqe = get_swqe_ptr(qp, qp->s_cur); - qp->s_state = OP(SEND_LAST); - qp->s_psn = wqe->psn; - } else { - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) - qp->s_draining = 0; - if (qp->s_last == qp->s_tail) - break; - wqe = get_swqe_ptr(qp, qp->s_last); - } - } - - switch (aeth >> 29) { - case 0: /* ACK */ - dev->n_rc_acks++; - /* If this is a partial ACK, reset the retransmit timer. */ - if (qp->s_last != qp->s_tail) { - spin_lock(&dev->pending_lock); - if (list_empty(&qp->timerwait)) - list_add_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); - spin_unlock(&dev->pending_lock); - /* - * If we get a partial ACK for a resent operation, - * we can stop resending the earlier packets and - * continue with the next packet the receiver wants. - */ - if (ipath_cmp24(qp->s_psn, psn) <= 0) { - reset_psn(qp, psn + 1); - ipath_schedule_send(qp); - } - } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { - qp->s_state = OP(SEND_LAST); - qp->s_psn = psn + 1; - } - ipath_get_credit(qp, aeth); - qp->s_rnr_retry = qp->s_rnr_retry_cnt; - qp->s_retry = qp->s_retry_cnt; - update_last_psn(qp, psn); - ret = 1; - goto bail; - - case 1: /* RNR NAK */ - dev->n_rnr_naks++; - if (qp->s_last == qp->s_tail) - goto bail; - if (qp->s_rnr_retry == 0) { - status = IB_WC_RNR_RETRY_EXC_ERR; - goto class_b; - } - if (qp->s_rnr_retry_cnt < 7) - qp->s_rnr_retry--; - - /* The last valid PSN is the previous PSN. */ - update_last_psn(qp, psn - 1); - - if (wqe->wr.opcode == IB_WR_RDMA_READ) - dev->n_rc_resends++; - else - dev->n_rc_resends += - (qp->s_psn - psn) & IPATH_PSN_MASK; - - reset_psn(qp, psn); - - qp->s_rnr_timeout = - ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & - IPATH_AETH_CREDIT_MASK]; - ipath_insert_rnr_queue(qp); - ipath_schedule_send(qp); - goto bail; - - case 3: /* NAK */ - if (qp->s_last == qp->s_tail) - goto bail; - /* The last valid PSN is the previous PSN. */ - update_last_psn(qp, psn - 1); - switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & - IPATH_AETH_CREDIT_MASK) { - case 0: /* PSN sequence error */ - dev->n_seq_naks++; - /* - * Back up to the responder's expected PSN. - * Note that we might get a NAK in the middle of an - * RDMA READ response which terminates the RDMA - * READ. - */ - ipath_restart_rc(qp, psn); - break; - - case 1: /* Invalid Request */ - status = IB_WC_REM_INV_REQ_ERR; - dev->n_other_naks++; - goto class_b; - - case 2: /* Remote Access Error */ - status = IB_WC_REM_ACCESS_ERR; - dev->n_other_naks++; - goto class_b; - - case 3: /* Remote Operation Error */ - status = IB_WC_REM_OP_ERR; - dev->n_other_naks++; - class_b: - ipath_send_complete(qp, wqe, status); - ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); - break; - - default: - /* Ignore other reserved NAK error codes */ - goto reserved; - } - qp->s_rnr_retry = qp->s_rnr_retry_cnt; - goto bail; - - default: /* 2: reserved */ - reserved: - /* Ignore reserved NAK codes. */ - goto bail; - } - -bail: - return ret; -} - -/** - * ipath_rc_rcv_resp - process an incoming RC response packet - * @dev: the device this packet came in on - * @ohdr: the other headers for this packet - * @data: the packet data - * @tlen: the packet length - * @qp: the QP for this packet - * @opcode: the opcode for this packet - * @psn: the packet sequence number for this packet - * @hdrsize: the header length - * @pmtu: the path MTU - * @header_in_data: true if part of the header data is in the data buffer - * - * This is called from ipath_rc_rcv() to process an incoming RC response - * packet for the given QP. - * Called at interrupt level. - */ -static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, - struct ipath_other_headers *ohdr, - void *data, u32 tlen, - struct ipath_qp *qp, - u32 opcode, - u32 psn, u32 hdrsize, u32 pmtu, - int header_in_data) -{ - struct ipath_swqe *wqe; - enum ib_wc_status status; - unsigned long flags; - int diff; - u32 pad; - u32 aeth; - u64 val; - - spin_lock_irqsave(&qp->s_lock, flags); - - /* Double check we can process this now that we hold the s_lock. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) - goto ack_done; - - /* Ignore invalid responses. */ - if (ipath_cmp24(psn, qp->s_next_psn) >= 0) - goto ack_done; - - /* Ignore duplicate responses. */ - diff = ipath_cmp24(psn, qp->s_last_psn); - if (unlikely(diff <= 0)) { - /* Update credits for "ghost" ACKs */ - if (diff == 0 && opcode == OP(ACKNOWLEDGE)) { - if (!header_in_data) - aeth = be32_to_cpu(ohdr->u.aeth); - else { - aeth = be32_to_cpu(((__be32 *) data)[0]); - data += sizeof(__be32); - } - if ((aeth >> 29) == 0) - ipath_get_credit(qp, aeth); - } - goto ack_done; - } - - if (unlikely(qp->s_last == qp->s_tail)) - goto ack_done; - wqe = get_swqe_ptr(qp, qp->s_last); - status = IB_WC_SUCCESS; - - switch (opcode) { - case OP(ACKNOWLEDGE): - case OP(ATOMIC_ACKNOWLEDGE): - case OP(RDMA_READ_RESPONSE_FIRST): - if (!header_in_data) - aeth = be32_to_cpu(ohdr->u.aeth); - else { - aeth = be32_to_cpu(((__be32 *) data)[0]); - data += sizeof(__be32); - } - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - if (!header_in_data) { - __be32 *p = ohdr->u.at.atomic_ack_eth; - - val = ((u64) be32_to_cpu(p[0]) << 32) | - be32_to_cpu(p[1]); - } else - val = be64_to_cpu(((__be64 *) data)[0]); - } else - val = 0; - if (!do_rc_ack(qp, aeth, psn, opcode, val) || - opcode != OP(RDMA_READ_RESPONSE_FIRST)) - goto ack_done; - hdrsize += 4; - wqe = get_swqe_ptr(qp, qp->s_last); - if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) - goto ack_op_err; - qp->r_flags &= ~IPATH_R_RDMAR_SEQ; - /* - * If this is a response to a resent RDMA read, we - * have to be careful to copy the data to the right - * location. - */ - qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, - wqe, psn, pmtu); - goto read_middle; - - case OP(RDMA_READ_RESPONSE_MIDDLE): - /* no AETH, no ACK */ - if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { - dev->n_rdma_seq++; - if (qp->r_flags & IPATH_R_RDMAR_SEQ) - goto ack_done; - qp->r_flags |= IPATH_R_RDMAR_SEQ; - ipath_restart_rc(qp, qp->s_last_psn + 1); - goto ack_done; - } - if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) - goto ack_op_err; - read_middle: - if (unlikely(tlen != (hdrsize + pmtu + 4))) - goto ack_len_err; - if (unlikely(pmtu >= qp->s_rdma_read_len)) - goto ack_len_err; - - /* We got a response so update the timeout. */ - spin_lock(&dev->pending_lock); - if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) - list_move_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); - spin_unlock(&dev->pending_lock); - - if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE)) - qp->s_retry = qp->s_retry_cnt; - - /* - * Update the RDMA receive state but do the copy w/o - * holding the locks and blocking interrupts. - */ - qp->s_rdma_read_len -= pmtu; - update_last_psn(qp, psn); - spin_unlock_irqrestore(&qp->s_lock, flags); - ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu); - goto bail; - - case OP(RDMA_READ_RESPONSE_ONLY): - if (!header_in_data) - aeth = be32_to_cpu(ohdr->u.aeth); - else - aeth = be32_to_cpu(((__be32 *) data)[0]); - if (!do_rc_ack(qp, aeth, psn, opcode, 0)) - goto ack_done; - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* - * Check that the data size is >= 0 && <= pmtu. - * Remember to account for the AETH header (4) and - * ICRC (4). - */ - if (unlikely(tlen < (hdrsize + pad + 8))) - goto ack_len_err; - /* - * If this is a response to a resent RDMA read, we - * have to be careful to copy the data to the right - * location. - */ - wqe = get_swqe_ptr(qp, qp->s_last); - qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, - wqe, psn, pmtu); - goto read_last; - - case OP(RDMA_READ_RESPONSE_LAST): - /* ACKs READ req. */ - if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { - dev->n_rdma_seq++; - if (qp->r_flags & IPATH_R_RDMAR_SEQ) - goto ack_done; - qp->r_flags |= IPATH_R_RDMAR_SEQ; - ipath_restart_rc(qp, qp->s_last_psn + 1); - goto ack_done; - } - if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) - goto ack_op_err; - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* - * Check that the data size is >= 1 && <= pmtu. - * Remember to account for the AETH header (4) and - * ICRC (4). - */ - if (unlikely(tlen <= (hdrsize + pad + 8))) - goto ack_len_err; - read_last: - tlen -= hdrsize + pad + 8; - if (unlikely(tlen != qp->s_rdma_read_len)) - goto ack_len_err; - if (!header_in_data) - aeth = be32_to_cpu(ohdr->u.aeth); - else { - aeth = be32_to_cpu(((__be32 *) data)[0]); - data += sizeof(__be32); - } - ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); - (void) do_rc_ack(qp, aeth, psn, - OP(RDMA_READ_RESPONSE_LAST), 0); - goto ack_done; - } - -ack_op_err: - status = IB_WC_LOC_QP_OP_ERR; - goto ack_err; - -ack_len_err: - status = IB_WC_LOC_LEN_ERR; -ack_err: - ipath_send_complete(qp, wqe, status); - ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); -ack_done: - spin_unlock_irqrestore(&qp->s_lock, flags); -bail: - return; -} - -/** - * ipath_rc_rcv_error - process an incoming duplicate or error RC packet - * @dev: the device this packet came in on - * @ohdr: the other headers for this packet - * @data: the packet data - * @qp: the QP for this packet - * @opcode: the opcode for this packet - * @psn: the packet sequence number for this packet - * @diff: the difference between the PSN and the expected PSN - * @header_in_data: true if part of the header data is in the data buffer - * - * This is called from ipath_rc_rcv() to process an unexpected - * incoming RC packet for the given QP. - * Called at interrupt level. - * Return 1 if no more processing is needed; otherwise return 0 to - * schedule a response to be sent. - */ -static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, - struct ipath_other_headers *ohdr, - void *data, - struct ipath_qp *qp, - u32 opcode, - u32 psn, - int diff, - int header_in_data) -{ - struct ipath_ack_entry *e; - u8 i, prev; - int old_req; - unsigned long flags; - - if (diff > 0) { - /* - * Packet sequence error. - * A NAK will ACK earlier sends and RDMA writes. - * Don't queue the NAK if we already sent one. - */ - if (!qp->r_nak_state) { - qp->r_nak_state = IB_NAK_PSN_ERROR; - /* Use the expected PSN. */ - qp->r_ack_psn = qp->r_psn; - goto send_ack; - } - goto done; - } - - /* - * Handle a duplicate request. Don't re-execute SEND, RDMA - * write or atomic op. Don't NAK errors, just silently drop - * the duplicate request. Note that r_sge, r_len, and - * r_rcv_len may be in use so don't modify them. - * - * We are supposed to ACK the earliest duplicate PSN but we - * can coalesce an outstanding duplicate ACK. We have to - * send the earliest so that RDMA reads can be restarted at - * the requester's expected PSN. - * - * First, find where this duplicate PSN falls within the - * ACKs previously sent. - */ - psn &= IPATH_PSN_MASK; - e = NULL; - old_req = 1; - - spin_lock_irqsave(&qp->s_lock, flags); - /* Double check we can process this now that we hold the s_lock. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) - goto unlock_done; - - for (i = qp->r_head_ack_queue; ; i = prev) { - if (i == qp->s_tail_ack_queue) - old_req = 0; - if (i) - prev = i - 1; - else - prev = IPATH_MAX_RDMA_ATOMIC; - if (prev == qp->r_head_ack_queue) { - e = NULL; - break; - } - e = &qp->s_ack_queue[prev]; - if (!e->opcode) { - e = NULL; - break; - } - if (ipath_cmp24(psn, e->psn) >= 0) { - if (prev == qp->s_tail_ack_queue) - old_req = 0; - break; - } - } - switch (opcode) { - case OP(RDMA_READ_REQUEST): { - struct ib_reth *reth; - u32 offset; - u32 len; - - /* - * If we didn't find the RDMA read request in the ack queue, - * or the send tasklet is already backed up to send an - * earlier entry, we can ignore this request. - */ - if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req) - goto unlock_done; - /* RETH comes after BTH */ - if (!header_in_data) - reth = &ohdr->u.rc.reth; - else { - reth = (struct ib_reth *)data; - data += sizeof(*reth); - } - /* - * Address range must be a subset of the original - * request and start on pmtu boundaries. - * We reuse the old ack_queue slot since the requester - * should not back up and request an earlier PSN for the - * same request. - */ - offset = ((psn - e->psn) & IPATH_PSN_MASK) * - ib_mtu_enum_to_int(qp->path_mtu); - len = be32_to_cpu(reth->length); - if (unlikely(offset + len > e->rdma_sge.sge.sge_length)) - goto unlock_done; - if (len != 0) { - u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); - int ok; - - ok = ipath_rkey_ok(qp, &e->rdma_sge, - len, vaddr, rkey, - IB_ACCESS_REMOTE_READ); - if (unlikely(!ok)) - goto unlock_done; - } else { - e->rdma_sge.sg_list = NULL; - e->rdma_sge.num_sge = 0; - e->rdma_sge.sge.mr = NULL; - e->rdma_sge.sge.vaddr = NULL; - e->rdma_sge.sge.length = 0; - e->rdma_sge.sge.sge_length = 0; - } - e->psn = psn; - qp->s_ack_state = OP(ACKNOWLEDGE); - qp->s_tail_ack_queue = prev; - break; - } - - case OP(COMPARE_SWAP): - case OP(FETCH_ADD): { - /* - * If we didn't find the atomic request in the ack queue - * or the send tasklet is already backed up to send an - * earlier entry, we can ignore this request. - */ - if (!e || e->opcode != (u8) opcode || old_req) - goto unlock_done; - qp->s_ack_state = OP(ACKNOWLEDGE); - qp->s_tail_ack_queue = prev; - break; - } - - default: - if (old_req) - goto unlock_done; - /* - * Resend the most recent ACK if this request is - * after all the previous RDMA reads and atomics. - */ - if (i == qp->r_head_ack_queue) { - spin_unlock_irqrestore(&qp->s_lock, flags); - qp->r_nak_state = 0; - qp->r_ack_psn = qp->r_psn - 1; - goto send_ack; - } - /* - * Try to send a simple ACK to work around a Mellanox bug - * which doesn't accept a RDMA read response or atomic - * response as an ACK for earlier SENDs or RDMA writes. - */ - if (qp->r_head_ack_queue == qp->s_tail_ack_queue && - !(qp->s_flags & IPATH_S_ACK_PENDING) && - qp->s_ack_state == OP(ACKNOWLEDGE)) { - spin_unlock_irqrestore(&qp->s_lock, flags); - qp->r_nak_state = 0; - qp->r_ack_psn = qp->s_ack_queue[i].psn - 1; - goto send_ack; - } - /* - * Resend the RDMA read or atomic op which - * ACKs this duplicate request. - */ - qp->s_ack_state = OP(ACKNOWLEDGE); - qp->s_tail_ack_queue = i; - break; - } - qp->r_nak_state = 0; - ipath_schedule_send(qp); - -unlock_done: - spin_unlock_irqrestore(&qp->s_lock, flags); -done: - return 1; - -send_ack: - return 0; -} - -void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) -{ - unsigned long flags; - int lastwqe; - - spin_lock_irqsave(&qp->s_lock, flags); - lastwqe = ipath_error_qp(qp, err); - spin_unlock_irqrestore(&qp->s_lock, flags); - - if (lastwqe) { - struct ib_event ev; - - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = IB_EVENT_QP_LAST_WQE_REACHED; - qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); - } -} - -static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) -{ - unsigned next; - - next = n + 1; - if (next > IPATH_MAX_RDMA_ATOMIC) - next = 0; - if (n == qp->s_tail_ack_queue) { - qp->s_tail_ack_queue = next; - qp->s_ack_state = OP(ACKNOWLEDGE); - } -} - -/** - * ipath_rc_rcv - process an incoming RC packet - * @dev: the device this packet came in on - * @hdr: the header of this packet - * @has_grh: true if the header has a GRH - * @data: the packet data - * @tlen: the packet length - * @qp: the QP for this packet - * - * This is called from ipath_qp_rcv() to process an incoming RC packet - * for the given QP. - * Called at interrupt level. - */ -void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp) -{ - struct ipath_other_headers *ohdr; - u32 opcode; - u32 hdrsize; - u32 psn; - u32 pad; - struct ib_wc wc; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); - int diff; - struct ib_reth *reth; - int header_in_data; - unsigned long flags; - - /* Validate the SLID. See Ch. 9.6.1.5 */ - if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) - goto done; - - /* Check for GRH */ - if (!has_grh) { - ohdr = &hdr->u.oth; - hdrsize = 8 + 12; /* LRH + BTH */ - psn = be32_to_cpu(ohdr->bth[2]); - header_in_data = 0; - } else { - ohdr = &hdr->u.l.oth; - hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ - /* - * The header with GRH is 60 bytes and the core driver sets - * the eager header buffer size to 56 bytes so the last 4 - * bytes of the BTH header (PSN) is in the data buffer. - */ - header_in_data = dev->dd->ipath_rcvhdrentsize == 16; - if (header_in_data) { - psn = be32_to_cpu(((__be32 *) data)[0]); - data += sizeof(__be32); - } else - psn = be32_to_cpu(ohdr->bth[2]); - } - - /* - * Process responses (ACKs) before anything else. Note that the - * packet sequence number will be for something in the send work - * queue rather than the expected receive packet sequence number. - * In other words, this QP is the requester. - */ - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && - opcode <= OP(ATOMIC_ACKNOWLEDGE)) { - ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, - hdrsize, pmtu, header_in_data); - goto done; - } - - /* Compute 24 bits worth of difference. */ - diff = ipath_cmp24(psn, qp->r_psn); - if (unlikely(diff)) { - if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, - psn, diff, header_in_data)) - goto done; - goto send_ack; - } - - /* Check for opcode sequence errors. */ - switch (qp->r_state) { - case OP(SEND_FIRST): - case OP(SEND_MIDDLE): - if (opcode == OP(SEND_MIDDLE) || - opcode == OP(SEND_LAST) || - opcode == OP(SEND_LAST_WITH_IMMEDIATE)) - break; - goto nack_inv; - - case OP(RDMA_WRITE_FIRST): - case OP(RDMA_WRITE_MIDDLE): - if (opcode == OP(RDMA_WRITE_MIDDLE) || - opcode == OP(RDMA_WRITE_LAST) || - opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) - break; - goto nack_inv; - - default: - if (opcode == OP(SEND_MIDDLE) || - opcode == OP(SEND_LAST) || - opcode == OP(SEND_LAST_WITH_IMMEDIATE) || - opcode == OP(RDMA_WRITE_MIDDLE) || - opcode == OP(RDMA_WRITE_LAST) || - opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) - goto nack_inv; - /* - * Note that it is up to the requester to not send a new - * RDMA read or atomic operation before receiving an ACK - * for the previous operation. - */ - break; - } - - memset(&wc, 0, sizeof wc); - - /* OK, process the packet. */ - switch (opcode) { - case OP(SEND_FIRST): - if (!ipath_get_rwqe(qp, 0)) - goto rnr_nak; - qp->r_rcv_len = 0; - /* FALLTHROUGH */ - case OP(SEND_MIDDLE): - case OP(RDMA_WRITE_MIDDLE): - send_middle: - /* Check for invalid length PMTU or posted rwqe len. */ - if (unlikely(tlen != (hdrsize + pmtu + 4))) - goto nack_inv; - qp->r_rcv_len += pmtu; - if (unlikely(qp->r_rcv_len > qp->r_len)) - goto nack_inv; - ipath_copy_sge(&qp->r_sge, data, pmtu); - break; - - case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): - /* consume RWQE */ - if (!ipath_get_rwqe(qp, 1)) - goto rnr_nak; - goto send_last_imm; - - case OP(SEND_ONLY): - case OP(SEND_ONLY_WITH_IMMEDIATE): - if (!ipath_get_rwqe(qp, 0)) - goto rnr_nak; - qp->r_rcv_len = 0; - if (opcode == OP(SEND_ONLY)) - goto send_last; - /* FALLTHROUGH */ - case OP(SEND_LAST_WITH_IMMEDIATE): - send_last_imm: - if (header_in_data) { - wc.ex.imm_data = *(__be32 *) data; - data += sizeof(__be32); - } else { - /* Immediate data comes after BTH */ - wc.ex.imm_data = ohdr->u.imm_data; - } - hdrsize += 4; - wc.wc_flags = IB_WC_WITH_IMM; - /* FALLTHROUGH */ - case OP(SEND_LAST): - case OP(RDMA_WRITE_LAST): - send_last: - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* Check for invalid length. */ - /* XXX LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) - goto nack_inv; - /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); - wc.byte_len = tlen + qp->r_rcv_len; - if (unlikely(wc.byte_len > qp->r_len)) - goto nack_inv; - ipath_copy_sge(&qp->r_sge, data, tlen); - qp->r_msn++; - if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) - break; - wc.wr_id = qp->r_wr_id; - wc.status = IB_WC_SUCCESS; - if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || - opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) - wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; - else - wc.opcode = IB_WC_RECV; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - /* Signal completion event if the solicited bit is set. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - (ohdr->bth[0] & - cpu_to_be32(1 << 23)) != 0); - break; - - case OP(RDMA_WRITE_FIRST): - case OP(RDMA_WRITE_ONLY): - case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) - goto nack_inv; - /* consume RWQE */ - /* RETH comes after BTH */ - if (!header_in_data) - reth = &ohdr->u.rc.reth; - else { - reth = (struct ib_reth *)data; - data += sizeof(*reth); - } - hdrsize += sizeof(*reth); - qp->r_len = be32_to_cpu(reth->length); - qp->r_rcv_len = 0; - if (qp->r_len != 0) { - u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); - int ok; - - /* Check rkey & NAK */ - ok = ipath_rkey_ok(qp, &qp->r_sge, - qp->r_len, vaddr, rkey, - IB_ACCESS_REMOTE_WRITE); - if (unlikely(!ok)) - goto nack_acc; - } else { - qp->r_sge.sg_list = NULL; - qp->r_sge.sge.mr = NULL; - qp->r_sge.sge.vaddr = NULL; - qp->r_sge.sge.length = 0; - qp->r_sge.sge.sge_length = 0; - } - if (opcode == OP(RDMA_WRITE_FIRST)) - goto send_middle; - else if (opcode == OP(RDMA_WRITE_ONLY)) - goto send_last; - if (!ipath_get_rwqe(qp, 1)) - goto rnr_nak; - goto send_last_imm; - - case OP(RDMA_READ_REQUEST): { - struct ipath_ack_entry *e; - u32 len; - u8 next; - - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_READ))) - goto nack_inv; - next = qp->r_head_ack_queue + 1; - if (next > IPATH_MAX_RDMA_ATOMIC) - next = 0; - spin_lock_irqsave(&qp->s_lock, flags); - /* Double check we can process this while holding the s_lock. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) - goto unlock; - if (unlikely(next == qp->s_tail_ack_queue)) { - if (!qp->s_ack_queue[next].sent) - goto nack_inv_unlck; - ipath_update_ack_queue(qp, next); - } - e = &qp->s_ack_queue[qp->r_head_ack_queue]; - /* RETH comes after BTH */ - if (!header_in_data) - reth = &ohdr->u.rc.reth; - else { - reth = (struct ib_reth *)data; - data += sizeof(*reth); - } - len = be32_to_cpu(reth->length); - if (len) { - u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); - int ok; - - /* Check rkey & NAK */ - ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, - rkey, IB_ACCESS_REMOTE_READ); - if (unlikely(!ok)) - goto nack_acc_unlck; - /* - * Update the next expected PSN. We add 1 later - * below, so only add the remainder here. - */ - if (len > pmtu) - qp->r_psn += (len - 1) / pmtu; - } else { - e->rdma_sge.sg_list = NULL; - e->rdma_sge.num_sge = 0; - e->rdma_sge.sge.mr = NULL; - e->rdma_sge.sge.vaddr = NULL; - e->rdma_sge.sge.length = 0; - e->rdma_sge.sge.sge_length = 0; - } - e->opcode = opcode; - e->sent = 0; - e->psn = psn; - /* - * We need to increment the MSN here instead of when we - * finish sending the result since a duplicate request would - * increment it more than once. - */ - qp->r_msn++; - qp->r_psn++; - qp->r_state = opcode; - qp->r_nak_state = 0; - qp->r_head_ack_queue = next; - - /* Schedule the send tasklet. */ - ipath_schedule_send(qp); - - goto unlock; - } - - case OP(COMPARE_SWAP): - case OP(FETCH_ADD): { - struct ib_atomic_eth *ateth; - struct ipath_ack_entry *e; - u64 vaddr; - atomic64_t *maddr; - u64 sdata; - u32 rkey; - u8 next; - - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC))) - goto nack_inv; - next = qp->r_head_ack_queue + 1; - if (next > IPATH_MAX_RDMA_ATOMIC) - next = 0; - spin_lock_irqsave(&qp->s_lock, flags); - /* Double check we can process this while holding the s_lock. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) - goto unlock; - if (unlikely(next == qp->s_tail_ack_queue)) { - if (!qp->s_ack_queue[next].sent) - goto nack_inv_unlck; - ipath_update_ack_queue(qp, next); - } - if (!header_in_data) - ateth = &ohdr->u.atomic_eth; - else - ateth = (struct ib_atomic_eth *)data; - vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | - be32_to_cpu(ateth->vaddr[1]); - if (unlikely(vaddr & (sizeof(u64) - 1))) - goto nack_inv_unlck; - rkey = be32_to_cpu(ateth->rkey); - /* Check rkey & NAK */ - if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, - sizeof(u64), vaddr, rkey, - IB_ACCESS_REMOTE_ATOMIC))) - goto nack_acc_unlck; - /* Perform atomic OP and save result. */ - maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = be64_to_cpu(ateth->swap_data); - e = &qp->s_ack_queue[qp->r_head_ack_queue]; - e->atomic_data = (opcode == OP(FETCH_ADD)) ? - (u64) atomic64_add_return(sdata, maddr) - sdata : - (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - be64_to_cpu(ateth->compare_data), - sdata); - e->opcode = opcode; - e->sent = 0; - e->psn = psn & IPATH_PSN_MASK; - qp->r_msn++; - qp->r_psn++; - qp->r_state = opcode; - qp->r_nak_state = 0; - qp->r_head_ack_queue = next; - - /* Schedule the send tasklet. */ - ipath_schedule_send(qp); - - goto unlock; - } - - default: - /* NAK unknown opcodes. */ - goto nack_inv; - } - qp->r_psn++; - qp->r_state = opcode; - qp->r_ack_psn = psn; - qp->r_nak_state = 0; - /* Send an ACK if requested or required. */ - if (psn & (1 << 31)) - goto send_ack; - goto done; - -rnr_nak: - qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; - qp->r_ack_psn = qp->r_psn; - goto send_ack; - -nack_inv_unlck: - spin_unlock_irqrestore(&qp->s_lock, flags); -nack_inv: - ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); - qp->r_nak_state = IB_NAK_INVALID_REQUEST; - qp->r_ack_psn = qp->r_psn; - goto send_ack; - -nack_acc_unlck: - spin_unlock_irqrestore(&qp->s_lock, flags); -nack_acc: - ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); - qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; - qp->r_ack_psn = qp->r_psn; -send_ack: - send_rc_ack(qp); - goto done; - -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); -done: - return; -} diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h deleted file mode 100644 index 8f44d0cf3..000000000 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ /dev/null @@ -1,512 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _IPATH_REGISTERS_H -#define _IPATH_REGISTERS_H - -/* - * This file should only be included by kernel source, and by the diags. It - * defines the registers, and their contents, for InfiniPath chips. - */ - -/* - * These are the InfiniPath register and buffer bit definitions, - * that are visible to software, and needed only by the kernel - * and diag code. A few, that are visible to protocol and user - * code are in ipath_common.h. Some bits are specific - * to a given chip implementation, and have been moved to the - * chip-specific source file - */ - -/* kr_revision bits */ -#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF -#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0 -#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF -#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8 -#define INFINIPATH_R_ARCH_MASK 0xFF -#define INFINIPATH_R_ARCH_SHIFT 16 -#define INFINIPATH_R_SOFTWARE_MASK 0xFF -#define INFINIPATH_R_SOFTWARE_SHIFT 24 -#define INFINIPATH_R_BOARDID_MASK 0xFF -#define INFINIPATH_R_BOARDID_SHIFT 32 - -/* kr_control bits */ -#define INFINIPATH_C_FREEZEMODE 0x00000002 -#define INFINIPATH_C_LINKENABLE 0x00000004 - -/* kr_sendctrl bits */ -#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16 -#define INFINIPATH_S_UPDTHRESH_SHIFT 24 -#define INFINIPATH_S_UPDTHRESH_MASK 0x1f - -#define IPATH_S_ABORT 0 -#define IPATH_S_PIOINTBUFAVAIL 1 -#define IPATH_S_PIOBUFAVAILUPD 2 -#define IPATH_S_PIOENABLE 3 -#define IPATH_S_SDMAINTENABLE 9 -#define IPATH_S_SDMASINGLEDESCRIPTOR 10 -#define IPATH_S_SDMAENABLE 11 -#define IPATH_S_SDMAHALT 12 -#define IPATH_S_DISARM 31 - -#define INFINIPATH_S_ABORT (1U << IPATH_S_ABORT) -#define INFINIPATH_S_PIOINTBUFAVAIL (1U << IPATH_S_PIOINTBUFAVAIL) -#define INFINIPATH_S_PIOBUFAVAILUPD (1U << IPATH_S_PIOBUFAVAILUPD) -#define INFINIPATH_S_PIOENABLE (1U << IPATH_S_PIOENABLE) -#define INFINIPATH_S_SDMAINTENABLE (1U << IPATH_S_SDMAINTENABLE) -#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \ - (1U << IPATH_S_SDMASINGLEDESCRIPTOR) -#define INFINIPATH_S_SDMAENABLE (1U << IPATH_S_SDMAENABLE) -#define INFINIPATH_S_SDMAHALT (1U << IPATH_S_SDMAHALT) -#define INFINIPATH_S_DISARM (1U << IPATH_S_DISARM) - -/* kr_rcvctrl bits that are the same on multiple chips */ -#define INFINIPATH_R_PORTENABLE_SHIFT 0 -#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38) - -/* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_SDMAINT 0x8000000000000000ULL -#define INFINIPATH_I_SDMADISABLED 0x4000000000000000ULL -#define INFINIPATH_I_ERROR 0x0000000080000000ULL -#define INFINIPATH_I_SPIOSENT 0x0000000040000000ULL -#define INFINIPATH_I_SPIOBUFAVAIL 0x0000000020000000ULL -#define INFINIPATH_I_GPIO 0x0000000010000000ULL -#define INFINIPATH_I_JINT 0x0000000004000000ULL - -/* kr_errorstatus, kr_errorclear, kr_errormask bits */ -#define INFINIPATH_E_RFORMATERR 0x0000000000000001ULL -#define INFINIPATH_E_RVCRC 0x0000000000000002ULL -#define INFINIPATH_E_RICRC 0x0000000000000004ULL -#define INFINIPATH_E_RMINPKTLEN 0x0000000000000008ULL -#define INFINIPATH_E_RMAXPKTLEN 0x0000000000000010ULL -#define INFINIPATH_E_RLONGPKTLEN 0x0000000000000020ULL -#define INFINIPATH_E_RSHORTPKTLEN 0x0000000000000040ULL -#define INFINIPATH_E_RUNEXPCHAR 0x0000000000000080ULL -#define INFINIPATH_E_RUNSUPVL 0x0000000000000100ULL -#define INFINIPATH_E_REBP 0x0000000000000200ULL -#define INFINIPATH_E_RIBFLOW 0x0000000000000400ULL -#define INFINIPATH_E_RBADVERSION 0x0000000000000800ULL -#define INFINIPATH_E_RRCVEGRFULL 0x0000000000001000ULL -#define INFINIPATH_E_RRCVHDRFULL 0x0000000000002000ULL -#define INFINIPATH_E_RBADTID 0x0000000000004000ULL -#define INFINIPATH_E_RHDRLEN 0x0000000000008000ULL -#define INFINIPATH_E_RHDR 0x0000000000010000ULL -#define INFINIPATH_E_RIBLOSTLINK 0x0000000000020000ULL -#define INFINIPATH_E_SENDSPECIALTRIGGER 0x0000000008000000ULL -#define INFINIPATH_E_SDMADISABLED 0x0000000010000000ULL -#define INFINIPATH_E_SMINPKTLEN 0x0000000020000000ULL -#define INFINIPATH_E_SMAXPKTLEN 0x0000000040000000ULL -#define INFINIPATH_E_SUNDERRUN 0x0000000080000000ULL -#define INFINIPATH_E_SPKTLEN 0x0000000100000000ULL -#define INFINIPATH_E_SDROPPEDSMPPKT 0x0000000200000000ULL -#define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL -#define INFINIPATH_E_SPIOARMLAUNCH 0x0000000800000000ULL -#define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL -#define INFINIPATH_E_SUNSUPVL 0x0000002000000000ULL -#define INFINIPATH_E_SENDBUFMISUSE 0x0000004000000000ULL -#define INFINIPATH_E_SDMAGENMISMATCH 0x0000008000000000ULL -#define INFINIPATH_E_SDMAOUTOFBOUND 0x0000010000000000ULL -#define INFINIPATH_E_SDMATAILOUTOFBOUND 0x0000020000000000ULL -#define INFINIPATH_E_SDMABASE 0x0000040000000000ULL -#define INFINIPATH_E_SDMA1STDESC 0x0000080000000000ULL -#define INFINIPATH_E_SDMARPYTAG 0x0000100000000000ULL -#define INFINIPATH_E_SDMADWEN 0x0000200000000000ULL -#define INFINIPATH_E_SDMAMISSINGDW 0x0000400000000000ULL -#define INFINIPATH_E_SDMAUNEXPDATA 0x0000800000000000ULL -#define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL -#define INFINIPATH_E_INVALIDADDR 0x0002000000000000ULL -#define INFINIPATH_E_RESET 0x0004000000000000ULL -#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL -#define INFINIPATH_E_SDMADESCADDRMISALIGN 0x0010000000000000ULL -#define INFINIPATH_E_INVALIDEEPCMD 0x0020000000000000ULL - -/* - * this is used to print "common" packet errors only when the - * __IPATH_ERRPKTDBG bit is set in ipath_debug. - */ -#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \ - | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \ - | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \ - | INFINIPATH_E_REBP ) - -/* Convenience for decoding Send DMA errors */ -#define INFINIPATH_E_SDMAERRS ( \ - INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \ - INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \ - INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \ - INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \ - INFINIPATH_E_SDMAUNEXPDATA | \ - INFINIPATH_E_SDMADESCADDRMISALIGN | \ - INFINIPATH_E_SDMADISABLED | \ - INFINIPATH_E_SENDBUFMISUSE) - -/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ -/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo - * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID - * bit 4: flag buffer, 5: datainfo, 6: header info */ -#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL -#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 -#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44 -#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL -#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL -/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */ -#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL -#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL -#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL -/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */ -#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL -/* waldo specific -- find the rest in ipath_6110.c */ -#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL -/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */ -#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL - -/* kr_hwdiagctrl bits */ -#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL -#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40 -#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL -#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44 -#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR 0x0000000400000000ULL -#define INFINIPATH_DC_COUNTERDISABLE 0x1000000000000000ULL -#define INFINIPATH_DC_COUNTERWREN 0x2000000000000000ULL -#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL -#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL - -/* kr_ibcctrl bits */ -#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL -#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0 -#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL -#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8 -#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL -#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1 -/* cycle through TS1/TS2 till OK */ -#define INFINIPATH_IBCC_LINKINITCMD_POLL 2 -/* wait for TS1, then go on */ -#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3 -#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16 -#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL -#define INFINIPATH_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */ -#define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */ -#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */ -#define INFINIPATH_IBCC_LINKCMD_SHIFT 18 -#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL -#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20 -#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL -#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32 -#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL -#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36 -#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL -#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40 -#define INFINIPATH_IBCC_LOOPBACK 0x8000000000000000ULL -#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL - -/* kr_ibcstatus bits */ -#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0 -#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7 - -#define INFINIPATH_IBCS_TXREADY 0x40000000 -#define INFINIPATH_IBCS_TXCREDITOK 0x80000000 -/* link training states (shift by - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */ -#define INFINIPATH_IBCS_LT_STATE_DISABLED 0x00 -#define INFINIPATH_IBCS_LT_STATE_LINKUP 0x01 -#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE 0x02 -#define INFINIPATH_IBCS_LT_STATE_POLLQUIET 0x03 -#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY 0x04 -#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET 0x05 -#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE 0x08 -#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG 0x09 -#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT 0x0a -#define INFINIPATH_IBCS_LT_STATE_CFGIDLE 0x0b -#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c -#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e -#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f -/* link state machine states (shift by ibcs_ls_shift) */ -#define INFINIPATH_IBCS_L_STATE_DOWN 0x0 -#define INFINIPATH_IBCS_L_STATE_INIT 0x1 -#define INFINIPATH_IBCS_L_STATE_ARM 0x2 -#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3 -#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4 - - -/* kr_extstatus bits */ -#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1 -#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL -#define INFINIPATH_EXTS_GPIOIN_SHIFT 48 - -/* kr_extctrl bits */ -#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL -#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32 -#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL -#define INFINIPATH_EXTC_GPIOOE_SHIFT 48 -#define INFINIPATH_EXTC_SERDESENABLE 0x80000000ULL -#define INFINIPATH_EXTC_SERDESCONNECT 0x40000000ULL -#define INFINIPATH_EXTC_SERDESENTRUNKING 0x20000000ULL -#define INFINIPATH_EXTC_SERDESDISRXFIFO 0x10000000ULL -#define INFINIPATH_EXTC_SERDESENPLPBK1 0x08000000ULL -#define INFINIPATH_EXTC_SERDESENPLPBK2 0x04000000ULL -#define INFINIPATH_EXTC_SERDESENENCDEC 0x02000000ULL -#define INFINIPATH_EXTC_LED1SECPORT_ON 0x00000020ULL -#define INFINIPATH_EXTC_LED2SECPORT_ON 0x00000010ULL -#define INFINIPATH_EXTC_LED1PRIPORT_ON 0x00000008ULL -#define INFINIPATH_EXTC_LED2PRIPORT_ON 0x00000004ULL -#define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL -#define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL - -/* kr_partitionkey bits */ -#define INFINIPATH_PKEY_SIZE 16 -#define INFINIPATH_PKEY_MASK 0xFFFF -#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF - -/* kr_serdesconfig0 bits */ -#define INFINIPATH_SERDC0_RESET_MASK 0xfULL /* overal reset bits */ -#define INFINIPATH_SERDC0_RESET_PLL 0x10000000ULL /* pll reset */ -/* tx idle enables (per lane) */ -#define INFINIPATH_SERDC0_TXIDLE 0xF000ULL -/* rx detect enables (per lane) */ -#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL -/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */ -#define INFINIPATH_SERDC0_L1PWR_DN 0xF0ULL - -/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */ -#define INFINIPATH_XGXS_RX_POL_SHIFT 19 -#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL - - -/* - * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our - * PIO send buffers. This is well beyond anything currently - * defined in the InfiniBand spec. - */ -#define IPATH_PIO_MAXIBHDR 128 - -typedef u64 ipath_err_t; - -/* The following change with the type of device, so - * need to be part of the ipath_devdata struct, or - * we could have problems plugging in devices of - * different types (e.g. one HT, one PCIE) - * in one system, to be managed by one driver. - * On the other hand, this file is may also be included - * by other code, so leave the declarations here - * temporarily. Minor footprint issue if common-model - * linker used, none if C89+ linker used. - */ - -/* mask of defined bits for various registers */ -extern u64 infinipath_i_bitsextant; -extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; - -/* masks that are different in various chips, or only exist in some chips */ -extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask; - -/* - * These are the infinipath general register numbers (not offsets). - * The kernel registers are used directly, those beyond the kernel - * registers are calculated from one of the base registers. The use of - * an integer type doesn't allow type-checking as thorough as, say, - * an enum but allows for better hiding of chip differences. - */ -typedef const u16 ipath_kreg, /* infinipath general registers */ - ipath_creg, /* infinipath counter registers */ - ipath_sreg; /* kernel-only, infinipath send registers */ - -/* - * These are the chip registers common to all infinipath chips, and - * used both by the kernel and the diagnostics or other user code. - * They are all implemented such that 64 bit accesses work. - * Some implement no more than 32 bits. Because 64 bit reads - * require 2 HT cmds on opteron, we access those with 32 bit - * reads for efficiency (they are written as 64 bits, since - * the extra 32 bits are nearly free on writes, and it slightly reduces - * complexity). The rest are all accessed as 64 bits. - */ -struct ipath_kregs { - /* These are the 32 bit group */ - ipath_kreg kr_control; - ipath_kreg kr_counterregbase; - ipath_kreg kr_intmask; - ipath_kreg kr_intstatus; - ipath_kreg kr_pagealign; - ipath_kreg kr_portcnt; - ipath_kreg kr_rcvtidbase; - ipath_kreg kr_rcvtidcnt; - ipath_kreg kr_rcvegrbase; - ipath_kreg kr_rcvegrcnt; - ipath_kreg kr_scratch; - ipath_kreg kr_sendctrl; - ipath_kreg kr_sendpiobufbase; - ipath_kreg kr_sendpiobufcnt; - ipath_kreg kr_sendpiosize; - ipath_kreg kr_sendregbase; - ipath_kreg kr_userregbase; - /* These are the 64 bit group */ - ipath_kreg kr_debugport; - ipath_kreg kr_debugportselect; - ipath_kreg kr_errorclear; - ipath_kreg kr_errormask; - ipath_kreg kr_errorstatus; - ipath_kreg kr_extctrl; - ipath_kreg kr_extstatus; - ipath_kreg kr_gpio_clear; - ipath_kreg kr_gpio_mask; - ipath_kreg kr_gpio_out; - ipath_kreg kr_gpio_status; - ipath_kreg kr_hwdiagctrl; - ipath_kreg kr_hwerrclear; - ipath_kreg kr_hwerrmask; - ipath_kreg kr_hwerrstatus; - ipath_kreg kr_ibcctrl; - ipath_kreg kr_ibcstatus; - ipath_kreg kr_intblocked; - ipath_kreg kr_intclear; - ipath_kreg kr_interruptconfig; - ipath_kreg kr_mdio; - ipath_kreg kr_partitionkey; - ipath_kreg kr_rcvbthqp; - ipath_kreg kr_rcvbufbase; - ipath_kreg kr_rcvbufsize; - ipath_kreg kr_rcvctrl; - ipath_kreg kr_rcvhdrcnt; - ipath_kreg kr_rcvhdrentsize; - ipath_kreg kr_rcvhdrsize; - ipath_kreg kr_rcvintmembase; - ipath_kreg kr_rcvintmemsize; - ipath_kreg kr_revision; - ipath_kreg kr_sendbuffererror; - ipath_kreg kr_sendpioavailaddr; - ipath_kreg kr_serdesconfig0; - ipath_kreg kr_serdesconfig1; - ipath_kreg kr_serdesstatus; - ipath_kreg kr_txintmembase; - ipath_kreg kr_txintmemsize; - ipath_kreg kr_xgxsconfig; - ipath_kreg kr_ibpllcfg; - /* use these two (and the following N ports) only with - * ipath_k*_kreg64_port(); not *kreg64() */ - ipath_kreg kr_rcvhdraddr; - ipath_kreg kr_rcvhdrtailaddr; - - /* remaining registers are not present on all types of infinipath - chips */ - ipath_kreg kr_rcvpktledcnt; - ipath_kreg kr_pcierbuftestreg0; - ipath_kreg kr_pcierbuftestreg1; - ipath_kreg kr_pcieq0serdesconfig0; - ipath_kreg kr_pcieq0serdesconfig1; - ipath_kreg kr_pcieq0serdesstatus; - ipath_kreg kr_pcieq1serdesconfig0; - ipath_kreg kr_pcieq1serdesconfig1; - ipath_kreg kr_pcieq1serdesstatus; - ipath_kreg kr_hrtbt_guid; - ipath_kreg kr_ibcddrctrl; - ipath_kreg kr_ibcddrstatus; - ipath_kreg kr_jintreload; - - /* send dma related regs */ - ipath_kreg kr_senddmabase; - ipath_kreg kr_senddmalengen; - ipath_kreg kr_senddmatail; - ipath_kreg kr_senddmahead; - ipath_kreg kr_senddmaheadaddr; - ipath_kreg kr_senddmabufmask0; - ipath_kreg kr_senddmabufmask1; - ipath_kreg kr_senddmabufmask2; - ipath_kreg kr_senddmastatus; - - /* SerDes related regs (IBA7220-only) */ - ipath_kreg kr_ibserdesctrl; - ipath_kreg kr_ib_epbacc; - ipath_kreg kr_ib_epbtrans; - ipath_kreg kr_pcie_epbacc; - ipath_kreg kr_pcie_epbtrans; - ipath_kreg kr_ib_ddsrxeq; -}; - -struct ipath_cregs { - ipath_creg cr_badformatcnt; - ipath_creg cr_erricrccnt; - ipath_creg cr_errlinkcnt; - ipath_creg cr_errlpcrccnt; - ipath_creg cr_errpkey; - ipath_creg cr_errrcvflowctrlcnt; - ipath_creg cr_err_rlencnt; - ipath_creg cr_errslencnt; - ipath_creg cr_errtidfull; - ipath_creg cr_errtidvalid; - ipath_creg cr_errvcrccnt; - ipath_creg cr_ibstatuschange; - ipath_creg cr_intcnt; - ipath_creg cr_invalidrlencnt; - ipath_creg cr_invalidslencnt; - ipath_creg cr_lbflowstallcnt; - ipath_creg cr_iblinkdowncnt; - ipath_creg cr_iblinkerrrecovcnt; - ipath_creg cr_ibsymbolerrcnt; - ipath_creg cr_pktrcvcnt; - ipath_creg cr_pktrcvflowctrlcnt; - ipath_creg cr_pktsendcnt; - ipath_creg cr_pktsendflowcnt; - ipath_creg cr_portovflcnt; - ipath_creg cr_rcvebpcnt; - ipath_creg cr_rcvovflcnt; - ipath_creg cr_rxdroppktcnt; - ipath_creg cr_senddropped; - ipath_creg cr_sendstallcnt; - ipath_creg cr_sendunderruncnt; - ipath_creg cr_unsupvlcnt; - ipath_creg cr_wordrcvcnt; - ipath_creg cr_wordsendcnt; - ipath_creg cr_vl15droppedpktcnt; - ipath_creg cr_rxotherlocalphyerrcnt; - ipath_creg cr_excessbufferovflcnt; - ipath_creg cr_locallinkintegrityerrcnt; - ipath_creg cr_rxvlerrcnt; - ipath_creg cr_rxdlidfltrcnt; - ipath_creg cr_psstat; - ipath_creg cr_psstart; - ipath_creg cr_psinterval; - ipath_creg cr_psrcvdatacount; - ipath_creg cr_psrcvpktscount; - ipath_creg cr_psxmitdatacount; - ipath_creg cr_psxmitpktscount; - ipath_creg cr_psxmitwaitcount; -}; - -#endif /* _IPATH_REGISTERS_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c deleted file mode 100644 index 1f95bbaf7..000000000 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ /dev/null @@ -1,734 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/sched.h> -#include <linux/spinlock.h> - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -/* - * Convert the AETH RNR timeout code into the number of milliseconds. - */ -const u32 ib_ipath_rnr_table[32] = { - 656, /* 0 */ - 1, /* 1 */ - 1, /* 2 */ - 1, /* 3 */ - 1, /* 4 */ - 1, /* 5 */ - 1, /* 6 */ - 1, /* 7 */ - 1, /* 8 */ - 1, /* 9 */ - 1, /* A */ - 1, /* B */ - 1, /* C */ - 1, /* D */ - 2, /* E */ - 2, /* F */ - 3, /* 10 */ - 4, /* 11 */ - 6, /* 12 */ - 8, /* 13 */ - 11, /* 14 */ - 16, /* 15 */ - 21, /* 16 */ - 31, /* 17 */ - 41, /* 18 */ - 62, /* 19 */ - 82, /* 1A */ - 123, /* 1B */ - 164, /* 1C */ - 246, /* 1D */ - 328, /* 1E */ - 492 /* 1F */ -}; - -/** - * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device - * @qp: the QP - * - * Called with the QP s_lock held and interrupts disabled. - * XXX Use a simple list for now. We might need a priority - * queue if we have lots of QPs waiting for RNR timeouts - * but that should be rare. - */ -void ipath_insert_rnr_queue(struct ipath_qp *qp) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - - /* We already did a spin_lock_irqsave(), so just use spin_lock */ - spin_lock(&dev->pending_lock); - if (list_empty(&dev->rnrwait)) - list_add(&qp->timerwait, &dev->rnrwait); - else { - struct list_head *l = &dev->rnrwait; - struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp, - timerwait); - - while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) { - qp->s_rnr_timeout -= nqp->s_rnr_timeout; - l = l->next; - if (l->next == &dev->rnrwait) { - nqp = NULL; - break; - } - nqp = list_entry(l->next, struct ipath_qp, - timerwait); - } - if (nqp) - nqp->s_rnr_timeout -= qp->s_rnr_timeout; - list_add(&qp->timerwait, l); - } - spin_unlock(&dev->pending_lock); -} - -/** - * ipath_init_sge - Validate a RWQE and fill in the SGE state - * @qp: the QP - * - * Return 1 if OK. - */ -int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, - u32 *lengthp, struct ipath_sge_state *ss) -{ - int i, j, ret; - struct ib_wc wc; - - *lengthp = 0; - for (i = j = 0; i < wqe->num_sge; i++) { - if (wqe->sg_list[i].length == 0) - continue; - /* Check LKEY */ - if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge, - &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) - goto bad_lkey; - *lengthp += wqe->sg_list[i].length; - j++; - } - ss->num_sge = j; - ret = 1; - goto bail; - -bad_lkey: - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr_id; - wc.status = IB_WC_LOC_PROT_ERR; - wc.opcode = IB_WC_RECV; - wc.qp = &qp->ibqp; - /* Signal solicited completion event. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); - ret = 0; -bail: - return ret; -} - -/** - * ipath_get_rwqe - copy the next RWQE into the QP's RWQE - * @qp: the QP - * @wr_id_only: update qp->r_wr_id only, not qp->r_sge - * - * Return 0 if no RWQE is available, otherwise return 1. - * - * Can be called from interrupt level. - */ -int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) -{ - unsigned long flags; - struct ipath_rq *rq; - struct ipath_rwq *wq; - struct ipath_srq *srq; - struct ipath_rwqe *wqe; - void (*handler)(struct ib_event *, void *); - u32 tail; - int ret; - - if (qp->ibqp.srq) { - srq = to_isrq(qp->ibqp.srq); - handler = srq->ibsrq.event_handler; - rq = &srq->rq; - } else { - srq = NULL; - handler = NULL; - rq = &qp->r_rq; - } - - spin_lock_irqsave(&rq->lock, flags); - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { - ret = 0; - goto unlock; - } - - wq = rq->wq; - tail = wq->tail; - /* Validate tail before using it since it is user writable. */ - if (tail >= rq->size) - tail = 0; - do { - if (unlikely(tail == wq->head)) { - ret = 0; - goto unlock; - } - /* Make sure entry is read after head index is read. */ - smp_rmb(); - wqe = get_rwqe_ptr(rq, tail); - if (++tail >= rq->size) - tail = 0; - if (wr_id_only) - break; - qp->r_sge.sg_list = qp->r_sg_list; - } while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge)); - qp->r_wr_id = wqe->wr_id; - wq->tail = tail; - - ret = 1; - set_bit(IPATH_R_WRID_VALID, &qp->r_aflags); - if (handler) { - u32 n; - - /* - * validate head pointer value and compute - * the number of remaining WQEs. - */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - goto bail; - } - } -unlock: - spin_unlock_irqrestore(&rq->lock, flags); -bail: - return ret; -} - -/** - * ipath_ruc_loopback - handle UC and RC lookback requests - * @sqp: the sending QP - * - * This is called from ipath_do_send() to - * forward a WQE addressed to the same HCA. - * Note that although we are single threaded due to the tasklet, we still - * have to protect against post_send(). We don't have to worry about - * receive interrupts since this is a connected protocol and all packets - * will pass through here. - */ -static void ipath_ruc_loopback(struct ipath_qp *sqp) -{ - struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); - struct ipath_qp *qp; - struct ipath_swqe *wqe; - struct ipath_sge *sge; - unsigned long flags; - struct ib_wc wc; - u64 sdata; - atomic64_t *maddr; - enum ib_wc_status send_status; - - /* - * Note that we check the responder QP state after - * checking the requester's state. - */ - qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); - - spin_lock_irqsave(&sqp->s_lock, flags); - - /* Return if we are already busy processing a work request. */ - if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || - !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) - goto unlock; - - sqp->s_flags |= IPATH_S_BUSY; - -again: - if (sqp->s_last == sqp->s_head) - goto clr_busy; - wqe = get_swqe_ptr(sqp, sqp->s_last); - - /* Return if it is not OK to start a new work reqeust. */ - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { - if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) - goto clr_busy; - /* We are in the error state, flush the work request. */ - send_status = IB_WC_WR_FLUSH_ERR; - goto flush_send; - } - - /* - * We can rely on the entry not changing without the s_lock - * being held until we update s_last. - * We increment s_cur to indicate s_last is in progress. - */ - if (sqp->s_last == sqp->s_cur) { - if (++sqp->s_cur >= sqp->s_size) - sqp->s_cur = 0; - } - spin_unlock_irqrestore(&sqp->s_lock, flags); - - if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { - dev->n_pkt_drops++; - /* - * For RC, the requester would timeout and retry so - * shortcut the timeouts and just signal too many retries. - */ - if (sqp->ibqp.qp_type == IB_QPT_RC) - send_status = IB_WC_RETRY_EXC_ERR; - else - send_status = IB_WC_SUCCESS; - goto serr; - } - - memset(&wc, 0, sizeof wc); - send_status = IB_WC_SUCCESS; - - sqp->s_sge.sge = wqe->sg_list[0]; - sqp->s_sge.sg_list = wqe->sg_list + 1; - sqp->s_sge.num_sge = wqe->wr.num_sge; - sqp->s_len = wqe->length; - switch (wqe->wr.opcode) { - case IB_WR_SEND_WITH_IMM: - wc.wc_flags = IB_WC_WITH_IMM; - wc.ex.imm_data = wqe->wr.ex.imm_data; - /* FALLTHROUGH */ - case IB_WR_SEND: - if (!ipath_get_rwqe(qp, 0)) - goto rnr_nak; - break; - - case IB_WR_RDMA_WRITE_WITH_IMM: - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) - goto inv_err; - wc.wc_flags = IB_WC_WITH_IMM; - wc.ex.imm_data = wqe->wr.ex.imm_data; - if (!ipath_get_rwqe(qp, 1)) - goto rnr_nak; - /* FALLTHROUGH */ - case IB_WR_RDMA_WRITE: - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) - goto inv_err; - if (wqe->length == 0) - break; - if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, - IB_ACCESS_REMOTE_WRITE))) - goto acc_err; - break; - - case IB_WR_RDMA_READ: - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) - goto inv_err; - if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, - IB_ACCESS_REMOTE_READ))) - goto acc_err; - qp->r_sge.sge = wqe->sg_list[0]; - qp->r_sge.sg_list = wqe->sg_list + 1; - qp->r_sge.num_sge = wqe->wr.num_sge; - break; - - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) - goto inv_err; - if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), - wqe->wr.wr.atomic.remote_addr, - wqe->wr.wr.atomic.rkey, - IB_ACCESS_REMOTE_ATOMIC))) - goto acc_err; - /* Perform atomic OP and save result. */ - maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = wqe->wr.wr.atomic.compare_add; - *(u64 *) sqp->s_sge.sge.vaddr = - (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? - (u64) atomic64_add_return(sdata, maddr) - sdata : - (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - sdata, wqe->wr.wr.atomic.swap); - goto send_comp; - - default: - send_status = IB_WC_LOC_QP_OP_ERR; - goto serr; - } - - sge = &sqp->s_sge.sge; - while (sqp->s_len) { - u32 len = sqp->s_len; - - if (len > sge->length) - len = sge->length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - ipath_copy_sge(&qp->r_sge, sge->vaddr, len); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--sqp->s_sge.num_sge) - *sge = *sqp->s_sge.sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - sqp->s_len -= len; - } - - if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) - goto send_comp; - - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) - wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; - else - wc.opcode = IB_WC_RECV; - wc.wr_id = qp->r_wr_id; - wc.status = IB_WC_SUCCESS; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - wc.port_num = 1; - /* Signal completion event if the solicited bit is set. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - wqe->wr.send_flags & IB_SEND_SOLICITED); - -send_comp: - spin_lock_irqsave(&sqp->s_lock, flags); -flush_send: - sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; - ipath_send_complete(sqp, wqe, send_status); - goto again; - -rnr_nak: - /* Handle RNR NAK */ - if (qp->ibqp.qp_type == IB_QPT_UC) - goto send_comp; - /* - * Note: we don't need the s_lock held since the BUSY flag - * makes this single threaded. - */ - if (sqp->s_rnr_retry == 0) { - send_status = IB_WC_RNR_RETRY_EXC_ERR; - goto serr; - } - if (sqp->s_rnr_retry_cnt < 7) - sqp->s_rnr_retry--; - spin_lock_irqsave(&sqp->s_lock, flags); - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) - goto clr_busy; - sqp->s_flags |= IPATH_S_WAITING; - dev->n_rnr_naks++; - sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; - ipath_insert_rnr_queue(sqp); - goto clr_busy; - -inv_err: - send_status = IB_WC_REM_INV_REQ_ERR; - wc.status = IB_WC_LOC_QP_OP_ERR; - goto err; - -acc_err: - send_status = IB_WC_REM_ACCESS_ERR; - wc.status = IB_WC_LOC_PROT_ERR; -err: - /* responder goes to error state */ - ipath_rc_error(qp, wc.status); - -serr: - spin_lock_irqsave(&sqp->s_lock, flags); - ipath_send_complete(sqp, wqe, send_status); - if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR); - - sqp->s_flags &= ~IPATH_S_BUSY; - spin_unlock_irqrestore(&sqp->s_lock, flags); - if (lastwqe) { - struct ib_event ev; - - ev.device = sqp->ibqp.device; - ev.element.qp = &sqp->ibqp; - ev.event = IB_EVENT_QP_LAST_WQE_REACHED; - sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); - } - goto done; - } -clr_busy: - sqp->s_flags &= ~IPATH_S_BUSY; -unlock: - spin_unlock_irqrestore(&sqp->s_lock, flags); -done: - if (qp && atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); -} - -static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) -{ - if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || - qp->ibqp.qp_type == IB_QPT_SMI) { - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - } -} - -/** - * ipath_no_bufs_available - tell the layer driver we need buffers - * @qp: the QP that caused the problem - * @dev: the device we ran out of buffers on - * - * Called when we run out of PIO buffers. - * If we are now in the error state, return zero to flush the - * send work request. - */ -static int ipath_no_bufs_available(struct ipath_qp *qp, - struct ipath_ibdev *dev) -{ - unsigned long flags; - int ret = 1; - - /* - * Note that as soon as want_buffer() is called and - * possibly before it returns, ipath_ib_piobufavail() - * could be called. Therefore, put QP on the piowait list before - * enabling the PIO avail interrupt. - */ - spin_lock_irqsave(&qp->s_lock, flags); - if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { - dev->n_piowait++; - qp->s_flags |= IPATH_S_WAITING; - qp->s_flags &= ~IPATH_S_BUSY; - spin_lock(&dev->pending_lock); - if (list_empty(&qp->piowait)) - list_add_tail(&qp->piowait, &dev->piowait); - spin_unlock(&dev->pending_lock); - } else - ret = 0; - spin_unlock_irqrestore(&qp->s_lock, flags); - if (ret) - want_buffer(dev->dd, qp); - return ret; -} - -/** - * ipath_make_grh - construct a GRH header - * @dev: a pointer to the ipath device - * @hdr: a pointer to the GRH header being constructed - * @grh: the global route address to send to - * @hwords: the number of 32 bit words of header being sent - * @nwords: the number of 32 bit words of data being sent - * - * Return the size of the header in 32 bit words. - */ -u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, - struct ib_global_route *grh, u32 hwords, u32 nwords) -{ - hdr->version_tclass_flow = - cpu_to_be32((6 << 28) | - (grh->traffic_class << 20) | - grh->flow_label); - hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2); - /* next_hdr is defined by C8-7 in ch. 8.4.1 */ - hdr->next_hdr = 0x1B; - hdr->hop_limit = grh->hop_limit; - /* The SGID is 32-bit aligned. */ - hdr->sgid.global.subnet_prefix = dev->gid_prefix; - hdr->sgid.global.interface_id = dev->dd->ipath_guid; - hdr->dgid = grh->dgid; - - /* GRH header size in 32-bit words. */ - return sizeof(struct ib_grh) / sizeof(u32); -} - -void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp, - struct ipath_other_headers *ohdr, - u32 bth0, u32 bth2) -{ - u16 lrh0; - u32 nwords; - u32 extra_bytes; - - /* Construct the header. */ - extra_bytes = -qp->s_cur_size & 3; - nwords = (qp->s_cur_size + extra_bytes) >> 2; - lrh0 = IPATH_LRH_BTH; - if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh, - &qp->remote_ah_attr.grh, - qp->s_hdrwords, nwords); - lrh0 = IPATH_LRH_GRH; - } - lrh0 |= qp->remote_ah_attr.sl << 4; - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid | - qp->remote_ah_attr.src_path_bits); - bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); - bth0 |= extra_bytes << 20; - ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22)); - ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); - ohdr->bth[2] = cpu_to_be32(bth2); -} - -/** - * ipath_do_send - perform a send on a QP - * @data: contains a pointer to the QP - * - * Process entries in the send work queue until credit or queue is - * exhausted. Only allow one CPU to send a packet per QP (tasklet). - * Otherwise, two threads could send packets out of order. - */ -void ipath_do_send(unsigned long data) -{ - struct ipath_qp *qp = (struct ipath_qp *)data; - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - int (*make_req)(struct ipath_qp *qp); - unsigned long flags; - - if ((qp->ibqp.qp_type == IB_QPT_RC || - qp->ibqp.qp_type == IB_QPT_UC) && - qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { - ipath_ruc_loopback(qp); - goto bail; - } - - if (qp->ibqp.qp_type == IB_QPT_RC) - make_req = ipath_make_rc_req; - else if (qp->ibqp.qp_type == IB_QPT_UC) - make_req = ipath_make_uc_req; - else - make_req = ipath_make_ud_req; - - spin_lock_irqsave(&qp->s_lock, flags); - - /* Return if we are already busy processing a work request. */ - if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || - !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) { - spin_unlock_irqrestore(&qp->s_lock, flags); - goto bail; - } - - qp->s_flags |= IPATH_S_BUSY; - - spin_unlock_irqrestore(&qp->s_lock, flags); - -again: - /* Check for a constructed packet to be sent. */ - if (qp->s_hdrwords != 0) { - /* - * If no PIO bufs are available, return. An interrupt will - * call ipath_ib_piobufavail() when one is available. - */ - if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, - qp->s_cur_sge, qp->s_cur_size)) { - if (ipath_no_bufs_available(qp, dev)) - goto bail; - } - dev->n_unicast_xmit++; - /* Record that we sent the packet and s_hdr is empty. */ - qp->s_hdrwords = 0; - } - - if (make_req(qp)) - goto again; - -bail:; -} - -/* - * This should be called with s_lock held. - */ -void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, - enum ib_wc_status status) -{ - u32 old_last, last; - - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) - return; - - /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - status != IB_WC_SUCCESS) { - struct ib_wc wc; - - memset(&wc, 0, sizeof wc); - wc.wr_id = wqe->wr.wr_id; - wc.status = status; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.qp = &qp->ibqp; - if (status == IB_WC_SUCCESS) - wc.byte_len = wqe->length; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); - } - - old_last = last = qp->s_last; - if (++last >= qp->s_size) - last = 0; - qp->s_last = last; - if (qp->s_cur == old_last) - qp->s_cur = last; - if (qp->s_tail == old_last) - qp->s_tail = last; - if (qp->state == IB_QPS_SQD && last == qp->s_cur) - qp->s_draining = 0; -} diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c deleted file mode 100644 index 17a517766..000000000 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ /dev/null @@ -1,818 +0,0 @@ -/* - * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/spinlock.h> -#include <linux/gfp.h> - -#include "ipath_kernel.h" -#include "ipath_verbs.h" -#include "ipath_common.h" - -#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */ - -static void vl15_watchdog_enq(struct ipath_devdata *dd) -{ - /* ipath_sdma_lock must already be held */ - if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) { - unsigned long interval = (HZ + 19) / 20; - dd->ipath_sdma_vl15_timer.expires = jiffies + interval; - add_timer(&dd->ipath_sdma_vl15_timer); - } -} - -static void vl15_watchdog_deq(struct ipath_devdata *dd) -{ - /* ipath_sdma_lock must already be held */ - if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) { - unsigned long interval = (HZ + 19) / 20; - mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval); - } else { - del_timer(&dd->ipath_sdma_vl15_timer); - } -} - -static void vl15_watchdog_timeout(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *)opaque; - - if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) { - ipath_dbg("vl15 watchdog timeout - clearing\n"); - ipath_cancel_sends(dd, 1); - ipath_hol_down(dd); - } else { - ipath_dbg("vl15 watchdog timeout - " - "condition already cleared\n"); - } -} - -static void unmap_desc(struct ipath_devdata *dd, unsigned head) -{ - __le64 *descqp = &dd->ipath_sdma_descq[head].qw[0]; - u64 desc[2]; - dma_addr_t addr; - size_t len; - - desc[0] = le64_to_cpu(descqp[0]); - desc[1] = le64_to_cpu(descqp[1]); - - addr = (desc[1] << 32) | (desc[0] >> 32); - len = (desc[0] >> 14) & (0x7ffULL << 2); - dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE); -} - -/* - * ipath_sdma_lock should be locked before calling this. - */ -int ipath_sdma_make_progress(struct ipath_devdata *dd) -{ - struct list_head *lp = NULL; - struct ipath_sdma_txreq *txp = NULL; - u16 dmahead; - u16 start_idx = 0; - int progress = 0; - - if (!list_empty(&dd->ipath_sdma_activelist)) { - lp = dd->ipath_sdma_activelist.next; - txp = list_entry(lp, struct ipath_sdma_txreq, list); - start_idx = txp->start_idx; - } - - /* - * Read the SDMA head register in order to know that the - * interrupt clear has been written to the chip. - * Otherwise, we may not get an interrupt for the last - * descriptor in the queue. - */ - dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead); - /* sanity check return value for error handling (chip reset, etc.) */ - if (dmahead >= dd->ipath_sdma_descq_cnt) - goto done; - - while (dd->ipath_sdma_descq_head != dmahead) { - if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC && - dd->ipath_sdma_descq_head == start_idx) { - unmap_desc(dd, dd->ipath_sdma_descq_head); - start_idx++; - if (start_idx == dd->ipath_sdma_descq_cnt) - start_idx = 0; - } - - /* increment free count and head */ - dd->ipath_sdma_descq_removed++; - if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt) - dd->ipath_sdma_descq_head = 0; - - if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) { - /* move to notify list */ - if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) - vl15_watchdog_deq(dd); - list_move_tail(lp, &dd->ipath_sdma_notifylist); - if (!list_empty(&dd->ipath_sdma_activelist)) { - lp = dd->ipath_sdma_activelist.next; - txp = list_entry(lp, struct ipath_sdma_txreq, - list); - start_idx = txp->start_idx; - } else { - lp = NULL; - txp = NULL; - } - } - progress = 1; - } - - if (progress) - tasklet_hi_schedule(&dd->ipath_sdma_notify_task); - -done: - return progress; -} - -static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list) -{ - struct ipath_sdma_txreq *txp, *txp_next; - - list_for_each_entry_safe(txp, txp_next, list, list) { - list_del_init(&txp->list); - - if (txp->callback) - (*txp->callback)(txp->callback_cookie, - txp->callback_status); - } -} - -static void sdma_notify_taskbody(struct ipath_devdata *dd) -{ - unsigned long flags; - struct list_head list; - - INIT_LIST_HEAD(&list); - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - - list_splice_init(&dd->ipath_sdma_notifylist, &list); - - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - ipath_sdma_notify(dd, &list); - - /* - * The IB verbs layer needs to see the callback before getting - * the call to ipath_ib_piobufavail() because the callback - * handles releasing resources the next send will need. - * Otherwise, we could do these calls in - * ipath_sdma_make_progress(). - */ - ipath_ib_piobufavail(dd->verbs_dev); -} - -static void sdma_notify_task(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *)opaque; - - if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - sdma_notify_taskbody(dd); -} - -static void dump_sdma_state(struct ipath_devdata *dd) -{ - unsigned long reg; - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus); - ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl); - ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0); - ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1); - ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2); - ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail); - ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead); - ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg); -} - -static void sdma_abort_task(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *) opaque; - u64 status; - unsigned long flags; - - if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - return; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - - status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK; - - /* nothing to do */ - if (status == IPATH_SDMA_ABORT_NONE) - goto unlock; - - /* ipath_sdma_abort() is done, waiting for interrupt */ - if (status == IPATH_SDMA_ABORT_DISARMED) { - if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout)) - goto resched_noprint; - /* give up, intr got lost somewhere */ - ipath_dbg("give up waiting for SDMADISABLED intr\n"); - __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); - status = IPATH_SDMA_ABORT_ABORTED; - } - - /* everything is stopped, time to clean up and restart */ - if (status == IPATH_SDMA_ABORT_ABORTED) { - struct ipath_sdma_txreq *txp, *txpnext; - u64 hwstatus; - int notify = 0; - - hwstatus = ipath_read_kreg64(dd, - dd->ipath_kregs->kr_senddmastatus); - - if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG | - IPATH_SDMA_STATUS_ABORT_IN_PROG | - IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) || - !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) { - if (dd->ipath_sdma_reset_wait > 0) { - /* not done shutting down sdma */ - --dd->ipath_sdma_reset_wait; - goto resched; - } - ipath_cdbg(VERBOSE, "gave up waiting for quiescent " - "status after SDMA reset, continuing\n"); - dump_sdma_state(dd); - } - - /* dequeue all "sent" requests */ - list_for_each_entry_safe(txp, txpnext, - &dd->ipath_sdma_activelist, list) { - txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED; - if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) - vl15_watchdog_deq(dd); - list_move_tail(&txp->list, &dd->ipath_sdma_notifylist); - notify = 1; - } - if (notify) - tasklet_hi_schedule(&dd->ipath_sdma_notify_task); - - /* reset our notion of head and tail */ - dd->ipath_sdma_descq_tail = 0; - dd->ipath_sdma_descq_head = 0; - dd->ipath_sdma_head_dma[0] = 0; - dd->ipath_sdma_generation = 0; - dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added; - - /* Reset SendDmaLenGen */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, - (u64) dd->ipath_sdma_descq_cnt | (1ULL << 18)); - - /* done with sdma state for a bit */ - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - /* - * Don't restart sdma here (with the exception - * below). Wait until link is up to ACTIVE. VL15 MADs - * used to bring the link up use PIO, and multiple link - * transitions otherwise cause the sdma engine to be - * stopped and started multiple times. - * The disable is done here, including the shadow, - * so the state is kept consistent. - * See ipath_restart_sdma() for the actual starting - * of sdma. - */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* make sure I see next message */ - dd->ipath_sdma_abort_jiffies = 0; - - /* - * Not everything that takes SDMA offline is a link - * status change. If the link was up, restart SDMA. - */ - if (dd->ipath_flags & IPATH_LINKACTIVE) - ipath_restart_sdma(dd); - - goto done; - } - -resched: - /* - * for now, keep spinning - * JAG - this is bad to just have default be a loop without - * state change - */ - if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) { - ipath_dbg("looping with status 0x%08lx\n", - dd->ipath_sdma_status); - dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ; - } -resched_noprint: - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - tasklet_hi_schedule(&dd->ipath_sdma_abort_task); - return; - -unlock: - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); -done: - return; -} - -/* - * This is called from interrupt context. - */ -void ipath_sdma_intr(struct ipath_devdata *dd) -{ - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - - (void) ipath_sdma_make_progress(dd); - - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); -} - -static int alloc_sdma(struct ipath_devdata *dd) -{ - int ret = 0; - - /* Allocate memory for SendDMA descriptor FIFO */ - dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev, - SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL); - - if (!dd->ipath_sdma_descq) { - ipath_dev_err(dd, "failed to allocate SendDMA descriptor " - "FIFO memory\n"); - ret = -ENOMEM; - goto done; - } - - dd->ipath_sdma_descq_cnt = - SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc); - - /* Allocate memory for DMA of head register to memory */ - dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev, - PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL); - if (!dd->ipath_sdma_head_dma) { - ipath_dev_err(dd, "failed to allocate SendDMA head memory\n"); - ret = -ENOMEM; - goto cleanup_descq; - } - dd->ipath_sdma_head_dma[0] = 0; - - init_timer(&dd->ipath_sdma_vl15_timer); - dd->ipath_sdma_vl15_timer.function = vl15_watchdog_timeout; - dd->ipath_sdma_vl15_timer.data = (unsigned long)dd; - atomic_set(&dd->ipath_sdma_vl15_count, 0); - - goto done; - -cleanup_descq: - dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ, - (void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys); - dd->ipath_sdma_descq = NULL; - dd->ipath_sdma_descq_phys = 0; -done: - return ret; -} - -int setup_sdma(struct ipath_devdata *dd) -{ - int ret = 0; - unsigned i, n; - u64 tmp64; - u64 senddmabufmask[3] = { 0 }; - unsigned long flags; - - ret = alloc_sdma(dd); - if (ret) - goto done; - - if (!dd->ipath_sdma_descq) { - ipath_dev_err(dd, "SendDMA memory not allocated\n"); - goto done; - } - - /* - * Set initial status as if we had been up, then gone down. - * This lets initial start on transition to ACTIVE be the - * same as restart after link flap. - */ - dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED; - dd->ipath_sdma_abort_jiffies = 0; - dd->ipath_sdma_generation = 0; - dd->ipath_sdma_descq_tail = 0; - dd->ipath_sdma_descq_head = 0; - dd->ipath_sdma_descq_removed = 0; - dd->ipath_sdma_descq_added = 0; - - /* Set SendDmaBase */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, - dd->ipath_sdma_descq_phys); - /* Set SendDmaLenGen */ - tmp64 = dd->ipath_sdma_descq_cnt; - tmp64 |= 1<<18; /* enable generation checking */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64); - /* Set SendDmaTail */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, - dd->ipath_sdma_descq_tail); - /* Set SendDmaHeadAddr */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, - dd->ipath_sdma_head_phys); - - /* - * Reserve all the former "kernel" piobufs, using high number range - * so we get as many 4K buffers as possible - */ - n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; - i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved; - ipath_chg_pioavailkernel(dd, i, n - i , 0); - for (; i < n; ++i) { - unsigned word = i / 64; - unsigned bit = i & 63; - BUG_ON(word >= 3); - senddmabufmask[word] |= 1ULL << bit; - } - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, - senddmabufmask[0]); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, - senddmabufmask[1]); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, - senddmabufmask[2]); - - INIT_LIST_HEAD(&dd->ipath_sdma_activelist); - INIT_LIST_HEAD(&dd->ipath_sdma_notifylist); - - tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task, - (unsigned long) dd); - tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task, - (unsigned long) dd); - - /* - * No use to turn on SDMA here, as link is probably not ACTIVE - * Just mark it RUNNING and enable the interrupt, and let the - * ipath_restart_sdma() on link transition to ACTIVE actually - * enable it. - */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - __set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - -done: - return ret; -} - -void teardown_sdma(struct ipath_devdata *dd) -{ - struct ipath_sdma_txreq *txp, *txpnext; - unsigned long flags; - dma_addr_t sdma_head_phys = 0; - dma_addr_t sdma_descq_phys = 0; - void *sdma_descq = NULL; - void *sdma_head_dma = NULL; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - __clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status); - __set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); - __set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - tasklet_kill(&dd->ipath_sdma_abort_task); - tasklet_kill(&dd->ipath_sdma_notify_task); - - /* turn off sdma */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - /* dequeue all "sent" requests */ - list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist, - list) { - txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN; - if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) - vl15_watchdog_deq(dd); - list_move_tail(&txp->list, &dd->ipath_sdma_notifylist); - } - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - sdma_notify_taskbody(dd); - - del_timer_sync(&dd->ipath_sdma_vl15_timer); - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - - dd->ipath_sdma_abort_jiffies = 0; - - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0); - - if (dd->ipath_sdma_head_dma) { - sdma_head_dma = (void *) dd->ipath_sdma_head_dma; - sdma_head_phys = dd->ipath_sdma_head_phys; - dd->ipath_sdma_head_dma = NULL; - dd->ipath_sdma_head_phys = 0; - } - - if (dd->ipath_sdma_descq) { - sdma_descq = dd->ipath_sdma_descq; - sdma_descq_phys = dd->ipath_sdma_descq_phys; - dd->ipath_sdma_descq = NULL; - dd->ipath_sdma_descq_phys = 0; - } - - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - if (sdma_head_dma) - dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - sdma_head_dma, sdma_head_phys); - - if (sdma_descq) - dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ, - sdma_descq, sdma_descq_phys); -} - -/* - * [Re]start SDMA, if we use it, and it's not already OK. - * This is called on transition to link ACTIVE, either the first or - * subsequent times. - */ -void ipath_restart_sdma(struct ipath_devdata *dd) -{ - unsigned long flags; - int needed = 1; - - if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) - goto bail; - - /* - * First, make sure we should, which is to say, - * check that we are "RUNNING" (not in teardown) - * and not "SHUTDOWN" - */ - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status) - || test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - needed = 0; - else { - __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); - __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); - __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); - } - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (!needed) { - ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n", - dd->ipath_sdma_status); - goto bail; - } - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - /* - * First clear, just to be safe. Enable is only done - * in chip on 0->1 transition - */ - dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* notify upper layers */ - ipath_ib_piobufavail(dd->verbs_dev); - -bail: - return; -} - -static inline void make_sdma_desc(struct ipath_devdata *dd, - u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset) -{ - WARN_ON(addr & 3); - /* SDmaPhyAddr[47:32] */ - sdmadesc[1] = addr >> 32; - /* SDmaPhyAddr[31:0] */ - sdmadesc[0] = (addr & 0xfffffffcULL) << 32; - /* SDmaGeneration[1:0] */ - sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30; - /* SDmaDwordCount[10:0] */ - sdmadesc[0] |= (dwlen & 0x7ffULL) << 16; - /* SDmaBufOffset[12:2] */ - sdmadesc[0] |= dwoffset & 0x7ffULL; -} - -/* - * This function queues one IB packet onto the send DMA queue per call. - * The caller is responsible for checking: - * 1) The number of send DMA descriptor entries is less than the size of - * the descriptor queue. - * 2) The IB SGE addresses and lengths are 32-bit aligned - * (except possibly the last SGE's length) - * 3) The SGE addresses are suitable for passing to dma_map_single(). - */ -int ipath_sdma_verbs_send(struct ipath_devdata *dd, - struct ipath_sge_state *ss, u32 dwords, - struct ipath_verbs_txreq *tx) -{ - - unsigned long flags; - struct ipath_sge *sge; - int ret = 0; - u16 tail; - __le64 *descqp; - u64 sdmadesc[2]; - u32 dwoffset; - dma_addr_t addr; - - if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) { - ipath_dbg("packet size %X > ibmax %X, fail\n", - tx->map_len + (dwords<<2), dd->ipath_ibmaxlen); - ret = -EMSGSIZE; - goto fail; - } - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - -retry: - if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) { - ret = -EBUSY; - goto unlock; - } - - if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) { - if (ipath_sdma_make_progress(dd)) - goto retry; - ret = -ENOBUFS; - goto unlock; - } - - addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, - tx->map_len, DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, addr)) - goto ioerr; - - dwoffset = tx->map_len >> 2; - make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0); - - /* SDmaFirstDesc */ - sdmadesc[0] |= 1ULL << 12; - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF) - sdmadesc[0] |= 1ULL << 14; /* SDmaUseLargeBuf */ - - /* write to the descq */ - tail = dd->ipath_sdma_descq_tail; - descqp = &dd->ipath_sdma_descq[tail].qw[0]; - *descqp++ = cpu_to_le64(sdmadesc[0]); - *descqp++ = cpu_to_le64(sdmadesc[1]); - - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC) - tx->txreq.start_idx = tail; - - /* increment the tail */ - if (++tail == dd->ipath_sdma_descq_cnt) { - tail = 0; - descqp = &dd->ipath_sdma_descq[0].qw[0]; - ++dd->ipath_sdma_generation; - } - - sge = &ss->sge; - while (dwords) { - u32 dw; - u32 len; - - len = dwords << 2; - if (len > sge->length) - len = sge->length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - dw = (len + 3) >> 2; - addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2, - DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, addr)) - goto unmap; - make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset); - /* SDmaUseLargeBuf has to be set in every descriptor */ - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF) - sdmadesc[0] |= 1ULL << 14; - /* write to the descq */ - *descqp++ = cpu_to_le64(sdmadesc[0]); - *descqp++ = cpu_to_le64(sdmadesc[1]); - - /* increment the tail */ - if (++tail == dd->ipath_sdma_descq_cnt) { - tail = 0; - descqp = &dd->ipath_sdma_descq[0].qw[0]; - ++dd->ipath_sdma_generation; - } - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - - dwoffset += dw; - dwords -= dw; - } - - if (!tail) - descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0]; - descqp -= 2; - /* SDmaLastDesc */ - descqp[0] |= cpu_to_le64(1ULL << 11); - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) { - /* SDmaIntReq */ - descqp[0] |= cpu_to_le64(1ULL << 15); - } - - /* Commit writes to memory and advance the tail on the chip */ - wmb(); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail); - - tx->txreq.next_descq_idx = tail; - tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK; - dd->ipath_sdma_descq_tail = tail; - dd->ipath_sdma_descq_added += tx->txreq.sg_count; - list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist); - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15) - vl15_watchdog_enq(dd); - goto unlock; - -unmap: - while (tail != dd->ipath_sdma_descq_tail) { - if (!tail) - tail = dd->ipath_sdma_descq_cnt - 1; - else - tail--; - unmap_desc(dd, tail); - } -ioerr: - ret = -EIO; -unlock: - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); -fail: - return ret; -} diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c deleted file mode 100644 index 26271984b..000000000 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/err.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> - -#include "ipath_verbs.h" - -/** - * ipath_post_srq_receive - post a receive on a shared receive queue - * @ibsrq: the SRQ to post the receive on - * @wr: the list of work requests to post - * @bad_wr: the first WR to cause a problem is put here - * - * This may be called from interrupt context. - */ -int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) -{ - struct ipath_srq *srq = to_isrq(ibsrq); - struct ipath_rwq *wq; - unsigned long flags; - int ret; - - for (; wr; wr = wr->next) { - struct ipath_rwqe *wqe; - u32 next; - int i; - - if ((unsigned) wr->num_sge > srq->rq.max_sge) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&srq->rq.lock, flags); - wq = srq->rq.wq; - next = wq->head + 1; - if (next >= srq->rq.size) - next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&srq->rq.lock, flags); - *bad_wr = wr; - ret = -ENOMEM; - goto bail; - } - - wqe = get_rwqe_ptr(&srq->rq, wq->head); - wqe->wr_id = wr->wr_id; - wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; - /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; - spin_unlock_irqrestore(&srq->rq.lock, flags); - } - ret = 0; - -bail: - return ret; -} - -/** - * ipath_create_srq - create a shared receive queue - * @ibpd: the protection domain of the SRQ to create - * @srq_init_attr: the attributes of the SRQ - * @udata: data from libipathverbs when creating a user SRQ - */ -struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) -{ - struct ipath_ibdev *dev = to_idev(ibpd->device); - struct ipath_srq *srq; - u32 sz; - struct ib_srq *ret; - - if (srq_init_attr->srq_type != IB_SRQT_BASIC) { - ret = ERR_PTR(-ENOSYS); - goto done; - } - - if (srq_init_attr->attr.max_wr == 0) { - ret = ERR_PTR(-EINVAL); - goto done; - } - - if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) || - (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) { - ret = ERR_PTR(-EINVAL); - goto done; - } - - srq = kmalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) { - ret = ERR_PTR(-ENOMEM); - goto done; - } - - /* - * Need to use vmalloc() if we want to support large #s of entries. - */ - srq->rq.size = srq_init_attr->attr.max_wr + 1; - srq->rq.max_sge = srq_init_attr->attr.max_sge; - sz = sizeof(struct ib_sge) * srq->rq.max_sge + - sizeof(struct ipath_rwqe); - srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz); - if (!srq->rq.wq) { - ret = ERR_PTR(-ENOMEM); - goto bail_srq; - } - - /* - * Return the address of the RWQ as the offset to mmap. - * See ipath_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - int err; - u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz; - - srq->ip = - ipath_create_mmap_info(dev, s, - ibpd->uobject->context, - srq->rq.wq); - if (!srq->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wq; - } - - err = ib_copy_to_udata(udata, &srq->ip->offset, - sizeof(srq->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else - srq->ip = NULL; - - /* - * ib_create_srq() will initialize srq->ibsrq. - */ - spin_lock_init(&srq->rq.lock); - srq->rq.wq->head = 0; - srq->rq.wq->tail = 0; - srq->limit = srq_init_attr->attr.srq_limit; - - spin_lock(&dev->n_srqs_lock); - if (dev->n_srqs_allocated == ib_ipath_max_srqs) { - spin_unlock(&dev->n_srqs_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_srqs_allocated++; - spin_unlock(&dev->n_srqs_lock); - - if (srq->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - } - - ret = &srq->ibsrq; - goto done; - -bail_ip: - kfree(srq->ip); -bail_wq: - vfree(srq->rq.wq); -bail_srq: - kfree(srq); -done: - return ret; -} - -/** - * ipath_modify_srq - modify a shared receive queue - * @ibsrq: the SRQ to modify - * @attr: the new attributes of the SRQ - * @attr_mask: indicates which attributes to modify - * @udata: user data for ipathverbs.so - */ -int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, - struct ib_udata *udata) -{ - struct ipath_srq *srq = to_isrq(ibsrq); - struct ipath_rwq *wq; - int ret = 0; - - if (attr_mask & IB_SRQ_MAX_WR) { - struct ipath_rwq *owq; - struct ipath_rwqe *p; - u32 sz, size, n, head, tail; - - /* Check that the requested sizes are below the limits. */ - if ((attr->max_wr > ib_ipath_max_srq_wrs) || - ((attr_mask & IB_SRQ_LIMIT) ? - attr->srq_limit : srq->limit) > attr->max_wr) { - ret = -EINVAL; - goto bail; - } - - sz = sizeof(struct ipath_rwqe) + - srq->rq.max_sge * sizeof(struct ib_sge); - size = attr->max_wr + 1; - wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz); - if (!wq) { - ret = -ENOMEM; - goto bail; - } - - /* Check that we can write the offset to mmap. */ - if (udata && udata->inlen >= sizeof(__u64)) { - __u64 offset_addr; - __u64 offset = 0; - - ret = ib_copy_from_udata(&offset_addr, udata, - sizeof(offset_addr)); - if (ret) - goto bail_free; - udata->outbuf = - (void __user *) (unsigned long) offset_addr; - ret = ib_copy_to_udata(udata, &offset, - sizeof(offset)); - if (ret) - goto bail_free; - } - - spin_lock_irq(&srq->rq.lock); - /* - * validate head pointer value and compute - * the number of remaining WQEs. - */ - owq = srq->rq.wq; - head = owq->head; - if (head >= srq->rq.size) - head = 0; - tail = owq->tail; - if (tail >= srq->rq.size) - tail = 0; - n = head; - if (n < tail) - n += srq->rq.size - tail; - else - n -= tail; - if (size <= n) { - ret = -EINVAL; - goto bail_unlock; - } - n = 0; - p = wq->wq; - while (tail != head) { - struct ipath_rwqe *wqe; - int i; - - wqe = get_rwqe_ptr(&srq->rq, tail); - p->wr_id = wqe->wr_id; - p->num_sge = wqe->num_sge; - for (i = 0; i < wqe->num_sge; i++) - p->sg_list[i] = wqe->sg_list[i]; - n++; - p = (struct ipath_rwqe *)((char *) p + sz); - if (++tail >= srq->rq.size) - tail = 0; - } - srq->rq.wq = wq; - srq->rq.size = size; - wq->head = n; - wq->tail = 0; - if (attr_mask & IB_SRQ_LIMIT) - srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); - - vfree(owq); - - if (srq->ip) { - struct ipath_mmap_info *ip = srq->ip; - struct ipath_ibdev *dev = to_idev(srq->ibsrq.device); - u32 s = sizeof(struct ipath_rwq) + size * sz; - - ipath_update_mmap_info(dev, ip, s, wq); - - /* - * Return the offset to mmap. - * See ipath_mmap() for details. - */ - if (udata && udata->inlen >= sizeof(__u64)) { - ret = ib_copy_to_udata(udata, &ip->offset, - sizeof(ip->offset)); - if (ret) - goto bail; - } - - spin_lock_irq(&dev->pending_lock); - if (list_empty(&ip->pending_mmaps)) - list_add(&ip->pending_mmaps, - &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - } - } else if (attr_mask & IB_SRQ_LIMIT) { - spin_lock_irq(&srq->rq.lock); - if (attr->srq_limit >= srq->rq.size) - ret = -EINVAL; - else - srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); - } - goto bail; - -bail_unlock: - spin_unlock_irq(&srq->rq.lock); -bail_free: - vfree(wq); -bail: - return ret; -} - -int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) -{ - struct ipath_srq *srq = to_isrq(ibsrq); - - attr->max_wr = srq->rq.size - 1; - attr->max_sge = srq->rq.max_sge; - attr->srq_limit = srq->limit; - return 0; -} - -/** - * ipath_destroy_srq - destroy a shared receive queue - * @ibsrq: the SRQ to destroy - */ -int ipath_destroy_srq(struct ib_srq *ibsrq) -{ - struct ipath_srq *srq = to_isrq(ibsrq); - struct ipath_ibdev *dev = to_idev(ibsrq->device); - - spin_lock(&dev->n_srqs_lock); - dev->n_srqs_allocated--; - spin_unlock(&dev->n_srqs_lock); - if (srq->ip) - kref_put(&srq->ip->ref, ipath_release_mmap_info); - else - vfree(srq->rq.wq); - kfree(srq); - - return 0; -} diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c deleted file mode 100644 index f63e143e3..000000000 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ipath_kernel.h" - -struct infinipath_stats ipath_stats; - -/** - * ipath_snap_cntr - snapshot a chip counter - * @dd: the infinipath device - * @creg: the counter to snapshot - * - * called from add_timer and user counter read calls, to deal with - * counters that wrap in "human time". The words sent and received, and - * the packets sent and received are all that we worry about. For now, - * at least, we don't worry about error counters, because if they wrap - * that quickly, we probably don't care. We may eventually just make this - * handle all the counters. word counters can wrap in about 20 seconds - * of full bandwidth traffic, packet counters in a few hours. - */ - -u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) -{ - u32 val, reg64 = 0; - u64 val64; - unsigned long t0, t1; - u64 ret; - - t0 = jiffies; - /* If fast increment counters are only 32 bits, snapshot them, - * and maintain them as 64bit values in the driver */ - if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) && - (creg == dd->ipath_cregs->cr_wordsendcnt || - creg == dd->ipath_cregs->cr_wordrcvcnt || - creg == dd->ipath_cregs->cr_pktsendcnt || - creg == dd->ipath_cregs->cr_pktrcvcnt)) { - val64 = ipath_read_creg(dd, creg); - val = val64 == ~0ULL ? ~0U : 0; - reg64 = 1; - } else /* val64 just to keep gcc quiet... */ - val64 = val = ipath_read_creg32(dd, creg); - /* - * See if a second has passed. This is just a way to detect things - * that are quite broken. Normally this should take just a few - * cycles (the check is for long enough that we don't care if we get - * pre-empted.) An Opteron HT O read timeout is 4 seconds with - * normal NB values - */ - t1 = jiffies; - if (time_before(t0 + HZ, t1) && val == -1) { - ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n", - creg); - ret = 0ULL; - goto bail; - } - if (reg64) { - ret = val64; - goto bail; - } - - if (creg == dd->ipath_cregs->cr_wordsendcnt) { - if (val != dd->ipath_lastsword) { - dd->ipath_sword += val - dd->ipath_lastsword; - dd->ipath_lastsword = val; - } - val64 = dd->ipath_sword; - } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { - if (val != dd->ipath_lastrword) { - dd->ipath_rword += val - dd->ipath_lastrword; - dd->ipath_lastrword = val; - } - val64 = dd->ipath_rword; - } else if (creg == dd->ipath_cregs->cr_pktsendcnt) { - if (val != dd->ipath_lastspkts) { - dd->ipath_spkts += val - dd->ipath_lastspkts; - dd->ipath_lastspkts = val; - } - val64 = dd->ipath_spkts; - } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) { - if (val != dd->ipath_lastrpkts) { - dd->ipath_rpkts += val - dd->ipath_lastrpkts; - dd->ipath_lastrpkts = val; - } - val64 = dd->ipath_rpkts; - } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) { - if (dd->ibdeltainprog) - val64 -= val64 - dd->ibsymsnap; - val64 -= dd->ibsymdelta; - } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) { - if (dd->ibdeltainprog) - val64 -= val64 - dd->iblnkerrsnap; - val64 -= dd->iblnkerrdelta; - } else - val64 = (u64) val; - - ret = val64; - -bail: - return ret; -} - -/** - * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports - * @dd: the infinipath device - * - * print the delta of egrfull/hdrqfull errors for kernel ports no more than - * every 5 seconds. User processes are printed at close, but kernel doesn't - * close, so... Separate routine so may call from other places someday, and - * so function name when printed by _IPATH_INFO is meaningfull - */ -static void ipath_qcheck(struct ipath_devdata *dd) -{ - static u64 last_tot_hdrqfull; - struct ipath_portdata *pd = dd->ipath_pd[0]; - size_t blen = 0; - char buf[128]; - u32 hdrqtail; - - *buf = 0; - if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) { - blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u", - pd->port_hdrqfull - - dd->ipath_p0_hdrqfull); - dd->ipath_p0_hdrqfull = pd->port_hdrqfull; - } - if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) { - blen += snprintf(buf + blen, sizeof buf - blen, - "%srcvegrfull %llu", - blen ? ", " : "", - (unsigned long long) - (ipath_stats.sps_etidfull - - dd->ipath_last_tidfull)); - dd->ipath_last_tidfull = ipath_stats.sps_etidfull; - } - - /* - * this is actually the number of hdrq full interrupts, not actual - * events, but at the moment that's mostly what I'm interested in. - * Actual count, etc. is in the counters, if needed. For production - * users this won't ordinarily be printed. - */ - - if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) && - ipath_stats.sps_hdrqfull != last_tot_hdrqfull) { - blen += snprintf(buf + blen, sizeof buf - blen, - "%shdrqfull %llu (all ports)", - blen ? ", " : "", - (unsigned long long) - (ipath_stats.sps_hdrqfull - - last_tot_hdrqfull)); - last_tot_hdrqfull = ipath_stats.sps_hdrqfull; - } - if (blen) - ipath_dbg("%s\n", buf); - - hdrqtail = ipath_get_hdrqtail(pd); - if (pd->port_head != hdrqtail) { - if (dd->ipath_lastport0rcv_cnt == - ipath_stats.sps_port0pkts) { - ipath_cdbg(PKT, "missing rcv interrupts? " - "port0 hd=%x tl=%x; port0pkts %llx; write" - " hd (w/intr)\n", - pd->port_head, hdrqtail, - (unsigned long long) - ipath_stats.sps_port0pkts); - ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail | - dd->ipath_rhdrhead_intr_off, pd->port_port); - } - dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts; - } -} - -static void ipath_chk_errormask(struct ipath_devdata *dd) -{ - static u32 fixed; - u32 ctrl; - unsigned long errormask; - unsigned long hwerrs; - - if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED)) - return; - - errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask); - - if (errormask == dd->ipath_errormask) - return; - fixed++; - - hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); - ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); - - if ((hwerrs & dd->ipath_hwerrmask) || - (ctrl & INFINIPATH_C_FREEZEMODE)) { - /* force re-interrupt of pending events, just in case */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); - dev_info(&dd->pcidev->dev, - "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n", - fixed, errormask, (unsigned long)dd->ipath_errormask, - ctrl, hwerrs); - } else - ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n", - fixed, errormask, - (unsigned long)dd->ipath_errormask); -} - - -/** - * ipath_get_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the infinipath device ipath_devdata - * - * called from add_timer - */ -void ipath_get_faststats(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *) opaque; - int i; - static unsigned cnt; - unsigned long flags; - u64 traffic_wds; - - /* - * don't access the chip while running diags, or memory diags can - * fail - */ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) || - ipath_diag_inuse) - /* but re-arm the timer, for diags case; won't hurt other */ - goto done; - - /* - * We now try to maintain a "active timer", based on traffic - * exceeding a threshold, so we need to check the word-counts - * even if they are 64-bit. - */ - traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); - spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); - traffic_wds -= dd->ipath_traffic_wds; - dd->ipath_traffic_wds += traffic_wds; - if (traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD) - atomic_add(5, &dd->ipath_active_time); /* S/B #define */ - spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); - - if (dd->ipath_flags & IPATH_32BITCOUNTERS) { - ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); - ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - } - - ipath_qcheck(dd); - - /* - * deal with repeat error suppression. Doesn't really matter if - * last error was almost a full interval ago, or just a few usecs - * ago; still won't get more than 2 per interval. We may want - * longer intervals for this eventually, could do with mod, counter - * or separate timer. Also see code in ipath_handle_errors() and - * ipath_handle_hwerrors(). - */ - - if (dd->ipath_lasterror) - dd->ipath_lasterror = 0; - if (dd->ipath_lasthwerror) - dd->ipath_lasthwerror = 0; - if (dd->ipath_maskederrs - && time_after(jiffies, dd->ipath_unmasktime)) { - char ebuf[256]; - int iserr; - iserr = ipath_decode_err(dd, ebuf, sizeof ebuf, - dd->ipath_maskederrs); - if (dd->ipath_maskederrs & - ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_PKTERRS)) - ipath_dev_err(dd, "Re-enabling masked errors " - "(%s)\n", ebuf); - else { - /* - * rcvegrfull and rcvhdrqfull are "normal", for some - * types of processes (mostly benchmarks) that send - * huge numbers of messages, while not processing - * them. So only complain about these at debug - * level. - */ - if (iserr) - ipath_dbg( - "Re-enabling queue full errors (%s)\n", - ebuf); - else - ipath_cdbg(ERRPKT, "Re-enabling packet" - " problem interrupt (%s)\n", ebuf); - } - - /* re-enable masked errors */ - dd->ipath_errormask |= dd->ipath_maskederrs; - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); - dd->ipath_maskederrs = 0; - } - - /* limit qfull messages to ~one per minute per port */ - if ((++cnt & 0x10)) { - for (i = (int) dd->ipath_cfgports; --i >= 0; ) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - - if (pd && pd->port_lastrcvhdrqtail != -1) - pd->port_lastrcvhdrqtail = -1; - } - } - - ipath_chk_errormask(dd); -done: - mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); -} diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c deleted file mode 100644 index 75558f33f..000000000 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ /dev/null @@ -1,1238 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/ctype.h> -#include <linux/stat.h> - -#include "ipath_kernel.h" -#include "ipath_verbs.h" -#include "ipath_common.h" - -/** - * ipath_parse_ushort - parse an unsigned short value in an arbitrary base - * @str: the string containing the number - * @valp: where to put the result - * - * returns the number of bytes consumed, or negative value on error - */ -int ipath_parse_ushort(const char *str, unsigned short *valp) -{ - unsigned long val; - char *end; - int ret; - - if (!isdigit(str[0])) { - ret = -EINVAL; - goto bail; - } - - val = simple_strtoul(str, &end, 0); - - if (val > 0xffff) { - ret = -EINVAL; - goto bail; - } - - *valp = val; - - ret = end + 1 - str; - if (ret == 0) - ret = -EINVAL; - -bail: - return ret; -} - -static ssize_t show_version(struct device_driver *dev, char *buf) -{ - /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version); -} - -static ssize_t show_num_units(struct device_driver *dev, char *buf) -{ - return scnprintf(buf, PAGE_SIZE, "%d\n", - ipath_count_units(NULL, NULL, NULL)); -} - -static ssize_t show_status(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - ssize_t ret; - - if (!dd->ipath_statusp) { - ret = -EINVAL; - goto bail; - } - - ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", - (unsigned long long) *(dd->ipath_statusp)); - -bail: - return ret; -} - -static const char *ipath_status_str[] = { - "Initted", - "Disabled", - "Admin_Disabled", - "", /* This used to be the old "OIB_SMA" status. */ - "", /* This used to be the old "SMA" status. */ - "Present", - "IB_link_up", - "IB_configured", - "NoIBcable", - "Fatal_Hardware_Error", - NULL, -}; - -static ssize_t show_status_str(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int i, any; - u64 s; - ssize_t ret; - - if (!dd->ipath_statusp) { - ret = -EINVAL; - goto bail; - } - - s = *(dd->ipath_statusp); - *buf = '\0'; - for (any = i = 0; s && ipath_status_str[i]; i++) { - if (s & 1) { - if (any && strlcat(buf, " ", PAGE_SIZE) >= - PAGE_SIZE) - /* overflow */ - break; - if (strlcat(buf, ipath_status_str[i], - PAGE_SIZE) >= PAGE_SIZE) - break; - any = 1; - } - s >>= 1; - } - if (any) - strlcat(buf, "\n", PAGE_SIZE); - - ret = strlen(buf); - -bail: - return ret; -} - -static ssize_t show_boardversion(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion); -} - -static ssize_t show_localbus_info(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info); -} - -static ssize_t show_lmc(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc); -} - -static ssize_t store_lmc(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u16 lmc = 0; - int ret; - - ret = ipath_parse_ushort(buf, &lmc); - if (ret < 0) - goto invalid; - - if (lmc > 7) { - ret = -EINVAL; - goto invalid; - } - - ipath_set_lid(dd, dd->ipath_lid, lmc); - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc); -bail: - return ret; -} - -static ssize_t show_lid(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid); -} - -static ssize_t store_lid(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u16 lid = 0; - int ret; - - ret = ipath_parse_ushort(buf, &lid); - if (ret < 0) - goto invalid; - - if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) { - ret = -EINVAL; - goto invalid; - } - - ipath_set_lid(dd, lid, dd->ipath_lmc); - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid LID 0x%x\n", lid); -bail: - return ret; -} - -static ssize_t show_mlid(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid); -} - -static ssize_t store_mlid(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u16 mlid; - int ret; - - ret = ipath_parse_ushort(buf, &mlid); - if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE) - goto invalid; - - dd->ipath_mlid = mlid; - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid MLID\n"); -bail: - return ret; -} - -static ssize_t show_guid(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u8 *guid; - - guid = (u8 *) & (dd->ipath_guid); - - return scnprintf(buf, PAGE_SIZE, - "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", - guid[0], guid[1], guid[2], guid[3], - guid[4], guid[5], guid[6], guid[7]); -} - -static ssize_t store_guid(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - ssize_t ret; - unsigned short guid[8]; - __be64 new_guid; - u8 *ng; - int i; - - if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx", - &guid[0], &guid[1], &guid[2], &guid[3], - &guid[4], &guid[5], &guid[6], &guid[7]) != 8) - goto invalid; - - ng = (u8 *) &new_guid; - - for (i = 0; i < 8; i++) { - if (guid[i] > 0xff) - goto invalid; - ng[i] = guid[i]; - } - - if (new_guid == 0) - goto invalid; - - dd->ipath_guid = new_guid; - dd->ipath_nguid = 1; - if (dd->verbs_dev) - dd->verbs_dev->ibdev.node_guid = new_guid; - - ret = strlen(buf); - goto bail; - -invalid: - ipath_dev_err(dd, "attempt to set invalid GUID\n"); - ret = -EINVAL; - -bail: - return ret; -} - -static ssize_t show_nguid(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid); -} - -static ssize_t show_nports(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - /* Return the number of user ports available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1); -} - -static ssize_t show_serial(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - buf[sizeof dd->ipath_serial] = '\0'; - memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial); - strcat(buf, "\n"); - return strlen(buf); -} - -static ssize_t show_unit(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit); -} - -static ssize_t show_jint_max_packets(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets); -} - -static ssize_t store_jint_max_packets(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u16 v = 0; - int ret; - - ret = ipath_parse_ushort(buf, &v); - if (ret < 0) - ipath_dev_err(dd, "invalid jint_max_packets.\n"); - else - dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v); - - return ret; -} - -static ssize_t show_jint_idle_ticks(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks); -} - -static ssize_t store_jint_idle_ticks(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u16 v = 0; - int ret; - - ret = ipath_parse_ushort(buf, &v); - if (ret < 0) - ipath_dev_err(dd, "invalid jint_idle_ticks.\n"); - else - dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets); - - return ret; -} - -#define DEVICE_COUNTER(name, attr) \ - static ssize_t show_counter_##name(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ - { \ - struct ipath_devdata *dd = dev_get_drvdata(dev); \ - return scnprintf(\ - buf, PAGE_SIZE, "%llu\n", (unsigned long long) \ - ipath_snap_cntr( \ - dd, offsetof(struct infinipath_counters, \ - attr) / sizeof(u64))); \ - } \ - static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL); - -DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt); -DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt); -DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt); -DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt); -DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt); -DEVICE_COUNTER(lb_ints, LBIntCnt); -DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt); -DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt); -DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt); -DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt); -DEVICE_COUNTER(rx_dwords, RxDwordCnt); -DEVICE_COUNTER(rx_ebps, RxEBPCnt); -DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt); -DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt); -DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt); -DEVICE_COUNTER(rx_len_errs, RxLenErrCnt); -DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt); -DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt); -DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt); -DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt); -DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt); -DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt); -DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt); -DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt); -DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt); -DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt); -DEVICE_COUNTER(tx_dwords, TxDwordCnt); -DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt); -DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt); -DEVICE_COUNTER(tx_len_errs, TxLenErrCnt); -DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt); -DEVICE_COUNTER(tx_underruns, TxUnderrunCnt); -DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt); - -static struct attribute *dev_counter_attributes[] = { - &dev_attr_ib_link_downeds.attr, - &dev_attr_ib_link_err_recoveries.attr, - &dev_attr_ib_status_changes.attr, - &dev_attr_ib_symbol_errs.attr, - &dev_attr_lb_flow_stalls.attr, - &dev_attr_lb_ints.attr, - &dev_attr_rx_bad_formats.attr, - &dev_attr_rx_buf_ovfls.attr, - &dev_attr_rx_data_pkts.attr, - &dev_attr_rx_dropped_pkts.attr, - &dev_attr_rx_dwords.attr, - &dev_attr_rx_ebps.attr, - &dev_attr_rx_flow_ctrl_errs.attr, - &dev_attr_rx_flow_pkts.attr, - &dev_attr_rx_icrc_errs.attr, - &dev_attr_rx_len_errs.attr, - &dev_attr_rx_link_problems.attr, - &dev_attr_rx_lpcrc_errs.attr, - &dev_attr_rx_max_min_len_errs.attr, - &dev_attr_rx_p0_hdr_egr_ovfls.attr, - &dev_attr_rx_p1_hdr_egr_ovfls.attr, - &dev_attr_rx_p2_hdr_egr_ovfls.attr, - &dev_attr_rx_p3_hdr_egr_ovfls.attr, - &dev_attr_rx_p4_hdr_egr_ovfls.attr, - &dev_attr_rx_p5_hdr_egr_ovfls.attr, - &dev_attr_rx_p6_hdr_egr_ovfls.attr, - &dev_attr_rx_p7_hdr_egr_ovfls.attr, - &dev_attr_rx_p8_hdr_egr_ovfls.attr, - &dev_attr_rx_pkey_mismatches.attr, - &dev_attr_rx_tid_full_errs.attr, - &dev_attr_rx_tid_valid_errs.attr, - &dev_attr_rx_vcrc_errs.attr, - &dev_attr_tx_data_pkts.attr, - &dev_attr_tx_dropped_pkts.attr, - &dev_attr_tx_dwords.attr, - &dev_attr_tx_flow_pkts.attr, - &dev_attr_tx_flow_stalls.attr, - &dev_attr_tx_len_errs.attr, - &dev_attr_tx_max_min_len_errs.attr, - &dev_attr_tx_underruns.attr, - &dev_attr_tx_unsup_vl_errs.attr, - NULL -}; - -static struct attribute_group dev_counter_attr_group = { - .name = "counters", - .attrs = dev_counter_attributes -}; - -static ssize_t store_reset(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - if (count < 5 || memcmp(buf, "reset", 5)) { - ret = -EINVAL; - goto bail; - } - - if (dd->ipath_flags & IPATH_DISABLED) { - /* - * post-reset init would re-enable interrupts, etc. - * so don't allow reset on disabled devices. Not - * perfect error, but about the best choice. - */ - dev_info(dev,"Unit %d is disabled, can't reset\n", - dd->ipath_unit); - ret = -EINVAL; - goto bail; - } - ret = ipath_reset_device(dd->ipath_unit); -bail: - return ret<0 ? ret : count; -} - -static ssize_t store_link_state(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 state; - - ret = ipath_parse_ushort(buf, &state); - if (ret < 0) - goto invalid; - - r = ipath_set_linkstate(dd, state); - if (r < 0) { - ret = r; - goto bail; - } - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid link state\n"); -bail: - return ret; -} - -static ssize_t show_mtu(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu); -} - -static ssize_t store_mtu(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - ssize_t ret; - u16 mtu = 0; - int r; - - ret = ipath_parse_ushort(buf, &mtu); - if (ret < 0) - goto invalid; - - r = ipath_set_mtu(dd, mtu); - if (r < 0) - ret = r; - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid MTU\n"); -bail: - return ret; -} - -static ssize_t show_enabled(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", - (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1); -} - -static ssize_t store_enabled(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - ssize_t ret; - u16 enable = 0; - - ret = ipath_parse_ushort(buf, &enable); - if (ret < 0) { - ipath_dev_err(dd, "attempt to use non-numeric on enable\n"); - goto bail; - } - - if (enable) { - if (!(dd->ipath_flags & IPATH_DISABLED)) - goto bail; - - dev_info(dev, "Enabling unit %d\n", dd->ipath_unit); - /* same as post-reset */ - ret = ipath_init_chip(dd, 1); - if (ret) - ipath_dev_err(dd, "Failed to enable unit %d\n", - dd->ipath_unit); - else { - dd->ipath_flags &= ~IPATH_DISABLED; - *dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED; - } - } - else if (!(dd->ipath_flags & IPATH_DISABLED)) { - dev_info(dev, "Disabling unit %d\n", dd->ipath_unit); - ipath_shutdown_device(dd); - dd->ipath_flags |= IPATH_DISABLED; - *dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED; - } - -bail: - return ret; -} - -static ssize_t store_rx_pol_inv(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret < 0) - goto invalid; - - r = ipath_set_rx_pol_inv(dd, val); - if (r < 0) { - ret = r; - goto bail; - } - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n"); -bail: - return ret; -} - -static ssize_t store_led_override(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret > 0) - ipath_set_led_override(dd, val); - else - ipath_dev_err(dd, "attempt to set invalid LED override\n"); - return ret; -} - -static ssize_t show_logged_errs(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int idx, count; - - /* force consistency with actual EEPROM */ - if (ipath_update_eeprom_log(dd) != 0) - return -ENXIO; - - count = 0; - for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { - count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c", - dd->ipath_eep_st_errs[idx], - idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' '); - } - - return count; -} - -/* - * New sysfs entries to control various IB config. These all turn into - * accesses via ipath_f_get/set_ib_cfg. - * - * Get/Set heartbeat enable. Or of 1=enabled, 2=auto - */ -static ssize_t show_hrtbt_enb(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -static ssize_t store_hrtbt_enb(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret >= 0 && val > 3) - ret = -EINVAL; - if (ret < 0) { - ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); - goto bail; - } - - /* - * Set the "intentional" heartbeat enable per either of - * "Enable" and "Auto", as these are normally set together. - * This bit is consulted when leaving loopback mode, - * because entering loopback mode overrides it and automatically - * disables heartbeat. - */ - r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val); - if (r < 0) - ret = r; - else if (val == IPATH_IB_HRTBT_OFF) - dd->ipath_flags |= IPATH_NO_HRTBT; - else - dd->ipath_flags &= ~IPATH_NO_HRTBT; - -bail: - return ret; -} - -/* - * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric, - * _not_ the particular encoding of any given chip) - */ -static ssize_t show_lwid_enb(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -static ssize_t store_lwid_enb(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret >= 0 && (val == 0 || val > 3)) - ret = -EINVAL; - if (ret < 0) { - ipath_dev_err(dd, - "attempt to set invalid Link Width (enable)\n"); - goto bail; - } - - r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val); - if (r < 0) - ret = r; - -bail: - return ret; -} - -/* Get current link width */ -static ssize_t show_lwid(struct device *dev, - struct device_attribute *attr, - char *buf) - -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -/* - * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR. - */ -static ssize_t show_spd_enb(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -static ssize_t store_spd_enb(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR))) - ret = -EINVAL; - if (ret < 0) { - ipath_dev_err(dd, - "attempt to set invalid Link Speed (enable)\n"); - goto bail; - } - - r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val); - if (r < 0) - ret = r; - -bail: - return ret; -} - -/* Get current link speed */ -static ssize_t show_spd(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -/* - * Get/Set RX polarity-invert enable. 0=no, 1=yes. - */ -static ssize_t show_rx_polinv_enb(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -static ssize_t store_rx_polinv_enb(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret >= 0 && val > 1) { - ipath_dev_err(dd, - "attempt to set invalid Rx Polarity (enable)\n"); - ret = -EINVAL; - goto bail; - } - - r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val); - if (r < 0) - ret = r; - -bail: - return ret; -} - -/* - * Get/Set RX lane-reversal enable. 0=no, 1=yes. - */ -static ssize_t show_lanerev_enb(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -static ssize_t store_lanerev_enb(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret >= 0 && val > 1) { - ret = -EINVAL; - ipath_dev_err(dd, - "attempt to set invalid Lane reversal (enable)\n"); - goto bail; - } - - r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val); - if (r < 0) - ret = r; - -bail: - return ret; -} - -static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); -static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); - -static struct attribute *driver_attributes[] = { - &driver_attr_num_units.attr, - &driver_attr_version.attr, - NULL -}; - -static struct attribute_group driver_attr_group = { - .attrs = driver_attributes -}; - -static ssize_t store_tempsense(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, stat; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret <= 0) { - ipath_dev_err(dd, "attempt to set invalid tempsense config\n"); - goto bail; - } - /* If anything but the highest limit, enable T_CRIT_A "interrupt" */ - stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0); - if (stat) { - ipath_dev_err(dd, "Unable to set tempsense config\n"); - ret = -1; - goto bail; - } - stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF)); - if (stat) { - ipath_dev_err(dd, "Unable to set local Tcrit\n"); - ret = -1; - goto bail; - } - stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8)); - if (stat) { - ipath_dev_err(dd, "Unable to set remote Tcrit\n"); - ret = -1; - goto bail; - } - -bail: - return ret; -} - -/* - * dump tempsense regs. in decimal, to ease shell-scripts. - */ -static ssize_t show_tempsense(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - int idx; - u8 regvals[8]; - - ret = -ENXIO; - for (idx = 0; idx < 8; ++idx) { - if (idx == 6) - continue; - ret = ipath_tempsense_read(dd, idx); - if (ret < 0) - break; - regvals[idx] = ret; - } - if (idx == 8) - ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n", - *(signed char *)(regvals), - *(signed char *)(regvals + 1), - regvals[2], regvals[3], - *(signed char *)(regvals + 5), - *(signed char *)(regvals + 7)); - return ret; -} - -const struct attribute_group *ipath_driver_attr_groups[] = { - &driver_attr_group, - NULL, -}; - -static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid); -static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc); -static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid); -static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state); -static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid); -static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu); -static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled); -static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL); -static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL); -static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset); -static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); -static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); -static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL); -static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); -static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); -static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); -static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); -static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); -static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL); -static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO, - show_jint_max_packets, store_jint_max_packets); -static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO, - show_jint_idle_ticks, store_jint_idle_ticks); -static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO, - show_tempsense, store_tempsense); - -static struct attribute *dev_attributes[] = { - &dev_attr_guid.attr, - &dev_attr_lmc.attr, - &dev_attr_lid.attr, - &dev_attr_link_state.attr, - &dev_attr_mlid.attr, - &dev_attr_mtu.attr, - &dev_attr_nguid.attr, - &dev_attr_nports.attr, - &dev_attr_serial.attr, - &dev_attr_status.attr, - &dev_attr_status_str.attr, - &dev_attr_boardversion.attr, - &dev_attr_unit.attr, - &dev_attr_enabled.attr, - &dev_attr_rx_pol_inv.attr, - &dev_attr_led_override.attr, - &dev_attr_logged_errors.attr, - &dev_attr_tempsense.attr, - &dev_attr_localbus_info.attr, - NULL -}; - -static struct attribute_group dev_attr_group = { - .attrs = dev_attributes -}; - -static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb, - store_hrtbt_enb); -static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb, - store_lwid_enb); -static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL); -static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb, - store_spd_enb); -static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL); -static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb, - store_rx_polinv_enb); -static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb, - store_lanerev_enb); - -static struct attribute *dev_ibcfg_attributes[] = { - &dev_attr_hrtbt_enable.attr, - &dev_attr_link_width_enable.attr, - &dev_attr_link_width.attr, - &dev_attr_link_speed_enable.attr, - &dev_attr_link_speed.attr, - &dev_attr_rx_pol_inv_enable.attr, - &dev_attr_rx_lane_rev_enable.attr, - NULL -}; - -static struct attribute_group dev_ibcfg_attr_group = { - .attrs = dev_ibcfg_attributes -}; - -/** - * ipath_expose_reset - create a device reset file - * @dev: the device structure - * - * Only expose a file that lets us reset the device after someone - * enters diag mode. A device reset is quite likely to crash the - * machine entirely, so we don't want to normally make it - * available. - * - * Called with ipath_mutex held. - */ -int ipath_expose_reset(struct device *dev) -{ - static int exposed; - int ret; - - if (!exposed) { - ret = device_create_file(dev, &dev_attr_reset); - exposed = 1; - } - else - ret = 0; - - return ret; -} - -int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd) -{ - int ret; - - ret = sysfs_create_group(&dev->kobj, &dev_attr_group); - if (ret) - goto bail; - - ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group); - if (ret) - goto bail_attrs; - - if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { - ret = device_create_file(dev, &dev_attr_jint_idle_ticks); - if (ret) - goto bail_counter; - ret = device_create_file(dev, &dev_attr_jint_max_packets); - if (ret) - goto bail_idle; - - ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group); - if (ret) - goto bail_max; - } - - return 0; - -bail_max: - device_remove_file(dev, &dev_attr_jint_max_packets); -bail_idle: - device_remove_file(dev, &dev_attr_jint_idle_ticks); -bail_counter: - sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); -bail_attrs: - sysfs_remove_group(&dev->kobj, &dev_attr_group); -bail: - return ret; -} - -void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd) -{ - sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); - - if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { - sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group); - device_remove_file(dev, &dev_attr_jint_idle_ticks); - device_remove_file(dev, &dev_attr_jint_max_packets); - } - - sysfs_remove_group(&dev->kobj, &dev_attr_group); - - device_remove_file(dev, &dev_attr_reset); -} diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c deleted file mode 100644 index 22e60998f..000000000 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ /dev/null @@ -1,547 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -/* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_UC_##x - -/** - * ipath_make_uc_req - construct a request packet (SEND, RDMA write) - * @qp: a pointer to the QP - * - * Return 1 if constructed; otherwise, return 0. - */ -int ipath_make_uc_req(struct ipath_qp *qp) -{ - struct ipath_other_headers *ohdr; - struct ipath_swqe *wqe; - unsigned long flags; - u32 hwords; - u32 bth0; - u32 len; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); - int ret = 0; - - spin_lock_irqsave(&qp->s_lock, flags); - - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { - if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) - goto bail; - /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) - goto bail; - /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_dma_busy)) { - qp->s_flags |= IPATH_S_WAIT_DMA; - goto bail; - } - wqe = get_swqe_ptr(qp, qp->s_last); - ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); - goto done; - } - - ohdr = &qp->s_hdr.u.oth; - if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; - - /* header size in 32-bit words LRH+BTH = (8+12)/4. */ - hwords = 5; - bth0 = 1 << 22; /* Set M bit */ - - /* Get the next send request. */ - wqe = get_swqe_ptr(qp, qp->s_cur); - qp->s_wqe = NULL; - switch (qp->s_state) { - default: - if (!(ib_ipath_state_ops[qp->state] & - IPATH_PROCESS_NEXT_SEND_OK)) - goto bail; - /* Check if send work queue is empty. */ - if (qp->s_cur == qp->s_head) - goto bail; - /* - * Start a new request. - */ - qp->s_psn = wqe->psn = qp->s_next_psn; - qp->s_sge.sge = wqe->sg_list[0]; - qp->s_sge.sg_list = wqe->sg_list + 1; - qp->s_sge.num_sge = wqe->wr.num_sge; - qp->s_len = len = wqe->length; - switch (wqe->wr.opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: - if (len > pmtu) { - qp->s_state = OP(SEND_FIRST); - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_SEND) - qp->s_state = OP(SEND_ONLY); - else { - qp->s_state = - OP(SEND_ONLY_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - } - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - qp->s_wqe = wqe; - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); - ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); - ohdr->u.rc.reth.length = cpu_to_be32(len); - hwords += sizeof(struct ib_reth) / 4; - if (len > pmtu) { - qp->s_state = OP(RDMA_WRITE_FIRST); - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) - qp->s_state = OP(RDMA_WRITE_ONLY); - else { - qp->s_state = - OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); - /* Immediate data comes after the RETH */ - ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - } - qp->s_wqe = wqe; - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - - default: - goto bail; - } - break; - - case OP(SEND_FIRST): - qp->s_state = OP(SEND_MIDDLE); - /* FALLTHROUGH */ - case OP(SEND_MIDDLE): - len = qp->s_len; - if (len > pmtu) { - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_SEND) - qp->s_state = OP(SEND_LAST); - else { - qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - } - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - qp->s_wqe = wqe; - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - - case OP(RDMA_WRITE_FIRST): - qp->s_state = OP(RDMA_WRITE_MIDDLE); - /* FALLTHROUGH */ - case OP(RDMA_WRITE_MIDDLE): - len = qp->s_len; - if (len > pmtu) { - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) - qp->s_state = OP(RDMA_WRITE_LAST); - else { - qp->s_state = - OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - } - qp->s_wqe = wqe; - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - } - qp->s_len -= len; - qp->s_hdrwords = hwords; - qp->s_cur_sge = &qp->s_sge; - qp->s_cur_size = len; - ipath_make_ruc_header(to_idev(qp->ibqp.device), - qp, ohdr, bth0 | (qp->s_state << 24), - qp->s_next_psn++ & IPATH_PSN_MASK); -done: - ret = 1; - goto unlock; - -bail: - qp->s_flags &= ~IPATH_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); - return ret; -} - -/** - * ipath_uc_rcv - handle an incoming UC packet - * @dev: the device the packet came in on - * @hdr: the header of the packet - * @has_grh: true if the packet has a GRH - * @data: the packet data - * @tlen: the length of the packet - * @qp: the QP for this packet. - * - * This is called from ipath_qp_rcv() to process an incoming UC packet - * for the given QP. - * Called at interrupt level. - */ -void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp) -{ - struct ipath_other_headers *ohdr; - int opcode; - u32 hdrsize; - u32 psn; - u32 pad; - struct ib_wc wc; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); - struct ib_reth *reth; - int header_in_data; - - /* Validate the SLID. See Ch. 9.6.1.5 */ - if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) - goto done; - - /* Check for GRH */ - if (!has_grh) { - ohdr = &hdr->u.oth; - hdrsize = 8 + 12; /* LRH + BTH */ - psn = be32_to_cpu(ohdr->bth[2]); - header_in_data = 0; - } else { - ohdr = &hdr->u.l.oth; - hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ - /* - * The header with GRH is 60 bytes and the - * core driver sets the eager header buffer - * size to 56 bytes so the last 4 bytes of - * the BTH header (PSN) is in the data buffer. - */ - header_in_data = dev->dd->ipath_rcvhdrentsize == 16; - if (header_in_data) { - psn = be32_to_cpu(((__be32 *) data)[0]); - data += sizeof(__be32); - } else - psn = be32_to_cpu(ohdr->bth[2]); - } - /* - * The opcode is in the low byte when its in network order - * (top byte when in host order). - */ - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - - memset(&wc, 0, sizeof wc); - - /* Compare the PSN verses the expected PSN. */ - if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { - /* - * Handle a sequence error. - * Silently drop any current message. - */ - qp->r_psn = psn; - inv: - qp->r_state = OP(SEND_LAST); - switch (opcode) { - case OP(SEND_FIRST): - case OP(SEND_ONLY): - case OP(SEND_ONLY_WITH_IMMEDIATE): - goto send_first; - - case OP(RDMA_WRITE_FIRST): - case OP(RDMA_WRITE_ONLY): - case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): - goto rdma_first; - - default: - dev->n_pkt_drops++; - goto done; - } - } - - /* Check for opcode sequence errors. */ - switch (qp->r_state) { - case OP(SEND_FIRST): - case OP(SEND_MIDDLE): - if (opcode == OP(SEND_MIDDLE) || - opcode == OP(SEND_LAST) || - opcode == OP(SEND_LAST_WITH_IMMEDIATE)) - break; - goto inv; - - case OP(RDMA_WRITE_FIRST): - case OP(RDMA_WRITE_MIDDLE): - if (opcode == OP(RDMA_WRITE_MIDDLE) || - opcode == OP(RDMA_WRITE_LAST) || - opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) - break; - goto inv; - - default: - if (opcode == OP(SEND_FIRST) || - opcode == OP(SEND_ONLY) || - opcode == OP(SEND_ONLY_WITH_IMMEDIATE) || - opcode == OP(RDMA_WRITE_FIRST) || - opcode == OP(RDMA_WRITE_ONLY) || - opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) - break; - goto inv; - } - - /* OK, process the packet. */ - switch (opcode) { - case OP(SEND_FIRST): - case OP(SEND_ONLY): - case OP(SEND_ONLY_WITH_IMMEDIATE): - send_first: - if (qp->r_flags & IPATH_R_REUSE_SGE) { - qp->r_flags &= ~IPATH_R_REUSE_SGE; - qp->r_sge = qp->s_rdma_read_sge; - } else if (!ipath_get_rwqe(qp, 0)) { - dev->n_pkt_drops++; - goto done; - } - /* Save the WQE so we can reuse it in case of an error. */ - qp->s_rdma_read_sge = qp->r_sge; - qp->r_rcv_len = 0; - if (opcode == OP(SEND_ONLY)) - goto send_last; - else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) - goto send_last_imm; - /* FALLTHROUGH */ - case OP(SEND_MIDDLE): - /* Check for invalid length PMTU or posted rwqe len. */ - if (unlikely(tlen != (hdrsize + pmtu + 4))) { - qp->r_flags |= IPATH_R_REUSE_SGE; - dev->n_pkt_drops++; - goto done; - } - qp->r_rcv_len += pmtu; - if (unlikely(qp->r_rcv_len > qp->r_len)) { - qp->r_flags |= IPATH_R_REUSE_SGE; - dev->n_pkt_drops++; - goto done; - } - ipath_copy_sge(&qp->r_sge, data, pmtu); - break; - - case OP(SEND_LAST_WITH_IMMEDIATE): - send_last_imm: - if (header_in_data) { - wc.ex.imm_data = *(__be32 *) data; - data += sizeof(__be32); - } else { - /* Immediate data comes after BTH */ - wc.ex.imm_data = ohdr->u.imm_data; - } - hdrsize += 4; - wc.wc_flags = IB_WC_WITH_IMM; - /* FALLTHROUGH */ - case OP(SEND_LAST): - send_last: - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* Check for invalid length. */ - /* XXX LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) { - qp->r_flags |= IPATH_R_REUSE_SGE; - dev->n_pkt_drops++; - goto done; - } - /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); - wc.byte_len = tlen + qp->r_rcv_len; - if (unlikely(wc.byte_len > qp->r_len)) { - qp->r_flags |= IPATH_R_REUSE_SGE; - dev->n_pkt_drops++; - goto done; - } - wc.opcode = IB_WC_RECV; - last_imm: - ipath_copy_sge(&qp->r_sge, data, tlen); - wc.wr_id = qp->r_wr_id; - wc.status = IB_WC_SUCCESS; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - /* Signal completion event if the solicited bit is set. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - (ohdr->bth[0] & - cpu_to_be32(1 << 23)) != 0); - break; - - case OP(RDMA_WRITE_FIRST): - case OP(RDMA_WRITE_ONLY): - case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */ - rdma_first: - /* RETH comes after BTH */ - if (!header_in_data) - reth = &ohdr->u.rc.reth; - else { - reth = (struct ib_reth *)data; - data += sizeof(*reth); - } - hdrsize += sizeof(*reth); - qp->r_len = be32_to_cpu(reth->length); - qp->r_rcv_len = 0; - if (qp->r_len != 0) { - u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); - int ok; - - /* Check rkey */ - ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len, - vaddr, rkey, - IB_ACCESS_REMOTE_WRITE); - if (unlikely(!ok)) { - dev->n_pkt_drops++; - goto done; - } - } else { - qp->r_sge.sg_list = NULL; - qp->r_sge.sge.mr = NULL; - qp->r_sge.sge.vaddr = NULL; - qp->r_sge.sge.length = 0; - qp->r_sge.sge.sge_length = 0; - } - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) { - dev->n_pkt_drops++; - goto done; - } - if (opcode == OP(RDMA_WRITE_ONLY)) - goto rdma_last; - else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) - goto rdma_last_imm; - /* FALLTHROUGH */ - case OP(RDMA_WRITE_MIDDLE): - /* Check for invalid length PMTU or posted rwqe len. */ - if (unlikely(tlen != (hdrsize + pmtu + 4))) { - dev->n_pkt_drops++; - goto done; - } - qp->r_rcv_len += pmtu; - if (unlikely(qp->r_rcv_len > qp->r_len)) { - dev->n_pkt_drops++; - goto done; - } - ipath_copy_sge(&qp->r_sge, data, pmtu); - break; - - case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): - rdma_last_imm: - if (header_in_data) { - wc.ex.imm_data = *(__be32 *) data; - data += sizeof(__be32); - } else { - /* Immediate data comes after BTH */ - wc.ex.imm_data = ohdr->u.imm_data; - } - hdrsize += 4; - wc.wc_flags = IB_WC_WITH_IMM; - - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* Check for invalid length. */ - /* XXX LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) { - dev->n_pkt_drops++; - goto done; - } - /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); - if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) { - dev->n_pkt_drops++; - goto done; - } - if (qp->r_flags & IPATH_R_REUSE_SGE) - qp->r_flags &= ~IPATH_R_REUSE_SGE; - else if (!ipath_get_rwqe(qp, 1)) { - dev->n_pkt_drops++; - goto done; - } - wc.byte_len = qp->r_len; - wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; - goto last_imm; - - case OP(RDMA_WRITE_LAST): - rdma_last: - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* Check for invalid length. */ - /* XXX LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) { - dev->n_pkt_drops++; - goto done; - } - /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); - if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) { - dev->n_pkt_drops++; - goto done; - } - ipath_copy_sge(&qp->r_sge, data, tlen); - break; - - default: - /* Drop packet for unknown opcodes. */ - dev->n_pkt_drops++; - goto done; - } - qp->r_psn++; - qp->r_state = opcode; -done: - return; -} diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c deleted file mode 100644 index e8a2a9152..000000000 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ /dev/null @@ -1,580 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/sched.h> -#include <rdma/ib_smi.h> - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -/** - * ipath_ud_loopback - handle send on loopback QPs - * @sqp: the sending QP - * @swqe: the send work request - * - * This is called from ipath_make_ud_req() to forward a WQE addressed - * to the same HCA. - * Note that the receive interrupt handler may be calling ipath_ud_rcv() - * while this is being called. - */ -static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) -{ - struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); - struct ipath_qp *qp; - struct ib_ah_attr *ah_attr; - unsigned long flags; - struct ipath_rq *rq; - struct ipath_srq *srq; - struct ipath_sge_state rsge; - struct ipath_sge *sge; - struct ipath_rwq *wq; - struct ipath_rwqe *wqe; - void (*handler)(struct ib_event *, void *); - struct ib_wc wc; - u32 tail; - u32 rlen; - u32 length; - - qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); - if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { - dev->n_pkt_drops++; - goto done; - } - - /* - * Check that the qkey matches (except for QP0, see 9.6.1.4.1). - * Qkeys with the high order bit set mean use the - * qkey from the QP context instead of the WR (see 10.2.5). - */ - if (unlikely(qp->ibqp.qp_num && - ((int) swqe->wr.wr.ud.remote_qkey < 0 ? - sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) { - /* XXX OK to lose a count once in a while. */ - dev->qkey_violations++; - dev->n_pkt_drops++; - goto drop; - } - - /* - * A GRH is expected to precede the data even if not - * present on the wire. - */ - length = swqe->length; - memset(&wc, 0, sizeof wc); - wc.byte_len = length + sizeof(struct ib_grh); - - if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { - wc.wc_flags = IB_WC_WITH_IMM; - wc.ex.imm_data = swqe->wr.ex.imm_data; - } - - /* - * This would be a lot simpler if we could call ipath_get_rwqe() - * but that uses state that the receive interrupt handler uses - * so we would need to lock out receive interrupts while doing - * local loopback. - */ - if (qp->ibqp.srq) { - srq = to_isrq(qp->ibqp.srq); - handler = srq->ibsrq.event_handler; - rq = &srq->rq; - } else { - srq = NULL; - handler = NULL; - rq = &qp->r_rq; - } - - /* - * Get the next work request entry to find where to put the data. - * Note that it is safe to drop the lock after changing rq->tail - * since ipath_post_receive() won't fill the empty slot. - */ - spin_lock_irqsave(&rq->lock, flags); - wq = rq->wq; - tail = wq->tail; - /* Validate tail before using it since it is user writable. */ - if (tail >= rq->size) - tail = 0; - if (unlikely(tail == wq->head)) { - spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; - goto drop; - } - wqe = get_rwqe_ptr(rq, tail); - rsge.sg_list = qp->r_ud_sg_list; - if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) { - spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; - goto drop; - } - /* Silently drop packets which are too big. */ - if (wc.byte_len > rlen) { - spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; - goto drop; - } - if (++tail >= rq->size) - tail = 0; - wq->tail = tail; - wc.wr_id = wqe->wr_id; - if (handler) { - u32 n; - - /* - * validate head pointer value and compute - * the number of remaining WQEs. - */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - } else - spin_unlock_irqrestore(&rq->lock, flags); - } else - spin_unlock_irqrestore(&rq->lock, flags); - - ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; - if (ah_attr->ah_flags & IB_AH_GRH) { - ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); - wc.wc_flags |= IB_WC_GRH; - } else - ipath_skip_sge(&rsge, sizeof(struct ib_grh)); - sge = swqe->sg_list; - while (length) { - u32 len = sge->length; - - if (len > length) - len = length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - ipath_copy_sge(&rsge, sge->vaddr, len); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--swqe->wr.num_sge) - sge++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - length -= len; - } - wc.status = IB_WC_SUCCESS; - wc.opcode = IB_WC_RECV; - wc.qp = &qp->ibqp; - wc.src_qp = sqp->ibqp.qp_num; - /* XXX do we know which pkey matched? Only needed for GSI. */ - wc.pkey_index = 0; - wc.slid = dev->dd->ipath_lid | - (ah_attr->src_path_bits & - ((1 << dev->dd->ipath_lmc) - 1)); - wc.sl = ah_attr->sl; - wc.dlid_path_bits = - ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1); - wc.port_num = 1; - /* Signal completion event if the solicited bit is set. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - swqe->wr.send_flags & IB_SEND_SOLICITED); -drop: - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); -done:; -} - -/** - * ipath_make_ud_req - construct a UD request packet - * @qp: the QP - * - * Return 1 if constructed; otherwise, return 0. - */ -int ipath_make_ud_req(struct ipath_qp *qp) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_other_headers *ohdr; - struct ib_ah_attr *ah_attr; - struct ipath_swqe *wqe; - unsigned long flags; - u32 nwords; - u32 extra_bytes; - u32 bth0; - u16 lrh0; - u16 lid; - int ret = 0; - int next_cur; - - spin_lock_irqsave(&qp->s_lock, flags); - - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { - if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) - goto bail; - /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) - goto bail; - /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_dma_busy)) { - qp->s_flags |= IPATH_S_WAIT_DMA; - goto bail; - } - wqe = get_swqe_ptr(qp, qp->s_last); - ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); - goto done; - } - - if (qp->s_cur == qp->s_head) - goto bail; - - wqe = get_swqe_ptr(qp, qp->s_cur); - next_cur = qp->s_cur + 1; - if (next_cur >= qp->s_size) - next_cur = 0; - - /* Construct the header. */ - ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; - if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) { - if (ah_attr->dlid != IPATH_PERMISSIVE_LID) - dev->n_multicast_xmit++; - else - dev->n_unicast_xmit++; - } else { - dev->n_unicast_xmit++; - lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1); - if (unlikely(lid == dev->dd->ipath_lid)) { - /* - * If DMAs are in progress, we can't generate - * a completion for the loopback packet since - * it would be out of order. - * XXX Instead of waiting, we could queue a - * zero length descriptor so we get a callback. - */ - if (atomic_read(&qp->s_dma_busy)) { - qp->s_flags |= IPATH_S_WAIT_DMA; - goto bail; - } - qp->s_cur = next_cur; - spin_unlock_irqrestore(&qp->s_lock, flags); - ipath_ud_loopback(qp, wqe); - spin_lock_irqsave(&qp->s_lock, flags); - ipath_send_complete(qp, wqe, IB_WC_SUCCESS); - goto done; - } - } - - qp->s_cur = next_cur; - extra_bytes = -wqe->length & 3; - nwords = (wqe->length + extra_bytes) >> 2; - - /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ - qp->s_hdrwords = 7; - qp->s_cur_size = wqe->length; - qp->s_cur_sge = &qp->s_sge; - qp->s_dmult = ah_attr->static_rate; - qp->s_wqe = wqe; - qp->s_sge.sge = wqe->sg_list[0]; - qp->s_sge.sg_list = wqe->sg_list + 1; - qp->s_sge.num_sge = wqe->wr.num_sge; - - if (ah_attr->ah_flags & IB_AH_GRH) { - /* Header size in 32-bit words. */ - qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh, - &ah_attr->grh, - qp->s_hdrwords, nwords); - lrh0 = IPATH_LRH_GRH; - ohdr = &qp->s_hdr.u.l.oth; - /* - * Don't worry about sending to locally attached multicast - * QPs. It is unspecified by the spec. what happens. - */ - } else { - /* Header size in 32-bit words. */ - lrh0 = IPATH_LRH_BTH; - ohdr = &qp->s_hdr.u.oth; - } - if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { - qp->s_hdrwords++; - ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; - bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; - } else - bth0 = IB_OPCODE_UD_SEND_ONLY << 24; - lrh0 |= ah_attr->sl << 4; - if (qp->ibqp.qp_type == IB_QPT_SMI) - lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + - SIZE_OF_CRC); - lid = dev->dd->ipath_lid; - if (lid) { - lid |= ah_attr->src_path_bits & - ((1 << dev->dd->ipath_lmc) - 1); - qp->s_hdr.lrh[3] = cpu_to_be16(lid); - } else - qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - bth0 |= extra_bytes << 20; - bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY : - ipath_get_pkey(dev->dd, qp->s_pkey_index); - ohdr->bth[0] = cpu_to_be32(bth0); - /* - * Use the multicast QP if the destination LID is a multicast LID. - */ - ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && - ah_attr->dlid != IPATH_PERMISSIVE_LID ? - cpu_to_be32(IPATH_MULTICAST_QPN) : - cpu_to_be32(wqe->wr.wr.ud.remote_qpn); - ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK); - /* - * Qkeys with the high order bit set mean use the - * qkey from the QP context instead of the WR (see 10.2.5). - */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? - qp->qkey : wqe->wr.wr.ud.remote_qkey); - ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); - -done: - ret = 1; - goto unlock; - -bail: - qp->s_flags &= ~IPATH_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); - return ret; -} - -/** - * ipath_ud_rcv - receive an incoming UD packet - * @dev: the device the packet came in on - * @hdr: the packet header - * @has_grh: true if the packet has a GRH - * @data: the packet data - * @tlen: the packet length - * @qp: the QP the packet came on - * - * This is called from ipath_qp_rcv() to process an incoming UD packet - * for the given QP. - * Called at interrupt level. - */ -void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp) -{ - struct ipath_other_headers *ohdr; - int opcode; - u32 hdrsize; - u32 pad; - struct ib_wc wc; - u32 qkey; - u32 src_qp; - u16 dlid; - int header_in_data; - - /* Check for GRH */ - if (!has_grh) { - ohdr = &hdr->u.oth; - hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */ - qkey = be32_to_cpu(ohdr->u.ud.deth[0]); - src_qp = be32_to_cpu(ohdr->u.ud.deth[1]); - header_in_data = 0; - } else { - ohdr = &hdr->u.l.oth; - hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */ - /* - * The header with GRH is 68 bytes and the core driver sets - * the eager header buffer size to 56 bytes so the last 12 - * bytes of the IB header is in the data buffer. - */ - header_in_data = dev->dd->ipath_rcvhdrentsize == 16; - if (header_in_data) { - qkey = be32_to_cpu(((__be32 *) data)[1]); - src_qp = be32_to_cpu(((__be32 *) data)[2]); - data += 12; - } else { - qkey = be32_to_cpu(ohdr->u.ud.deth[0]); - src_qp = be32_to_cpu(ohdr->u.ud.deth[1]); - } - } - src_qp &= IPATH_QPN_MASK; - - /* - * Check that the permissive LID is only used on QP0 - * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1). - */ - if (qp->ibqp.qp_num) { - if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE || - hdr->lrh[3] == IB_LID_PERMISSIVE)) { - dev->n_pkt_drops++; - goto bail; - } - if (unlikely(qkey != qp->qkey)) { - /* XXX OK to lose a count once in a while. */ - dev->qkey_violations++; - dev->n_pkt_drops++; - goto bail; - } - } else if (hdr->lrh[1] == IB_LID_PERMISSIVE || - hdr->lrh[3] == IB_LID_PERMISSIVE) { - struct ib_smp *smp = (struct ib_smp *) data; - - if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - dev->n_pkt_drops++; - goto bail; - } - } - - /* - * The opcode is in the low byte when its in network order - * (top byte when in host order). - */ - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - if (qp->ibqp.qp_num > 1 && - opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { - if (header_in_data) { - wc.ex.imm_data = *(__be32 *) data; - data += sizeof(__be32); - } else - wc.ex.imm_data = ohdr->u.ud.imm_data; - wc.wc_flags = IB_WC_WITH_IMM; - hdrsize += sizeof(u32); - } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { - wc.ex.imm_data = 0; - wc.wc_flags = 0; - } else { - dev->n_pkt_drops++; - goto bail; - } - - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - if (unlikely(tlen < (hdrsize + pad + 4))) { - /* Drop incomplete packets. */ - dev->n_pkt_drops++; - goto bail; - } - tlen -= hdrsize + pad + 4; - - /* Drop invalid MAD packets (see 13.5.3.1). */ - if (unlikely((qp->ibqp.qp_num == 0 && - (tlen != 256 || - (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) || - (qp->ibqp.qp_num == 1 && - (tlen != 256 || - (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) { - dev->n_pkt_drops++; - goto bail; - } - - /* - * A GRH is expected to precede the data even if not - * present on the wire. - */ - wc.byte_len = tlen + sizeof(struct ib_grh); - - /* - * Get the next work request entry to find where to put the data. - */ - if (qp->r_flags & IPATH_R_REUSE_SGE) - qp->r_flags &= ~IPATH_R_REUSE_SGE; - else if (!ipath_get_rwqe(qp, 0)) { - /* - * Count VL15 packets dropped due to no receive buffer. - * Otherwise, count them as buffer overruns since usually, - * the HW will be able to receive packets even if there are - * no QPs with posted receive buffers. - */ - if (qp->ibqp.qp_num == 0) - dev->n_vl15_dropped++; - else - dev->rcv_errors++; - goto bail; - } - /* Silently drop packets which are too big. */ - if (wc.byte_len > qp->r_len) { - qp->r_flags |= IPATH_R_REUSE_SGE; - dev->n_pkt_drops++; - goto bail; - } - if (has_grh) { - ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh, - sizeof(struct ib_grh)); - wc.wc_flags |= IB_WC_GRH; - } else - ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); - ipath_copy_sge(&qp->r_sge, data, - wc.byte_len - sizeof(struct ib_grh)); - if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) - goto bail; - wc.wr_id = qp->r_wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; - wc.qp = &qp->ibqp; - wc.src_qp = src_qp; - /* XXX do we know which pkey matched? Only needed for GSI. */ - wc.pkey_index = 0; - wc.slid = be16_to_cpu(hdr->lrh[3]); - wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF; - dlid = be16_to_cpu(hdr->lrh[1]); - /* - * Save the LMC lower bits if the destination LID is a unicast LID. - */ - wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 : - dlid & ((1 << dev->dd->ipath_lmc) - 1); - wc.port_num = 1; - /* Signal completion event if the solicited bit is set. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - (ohdr->bth[0] & - cpu_to_be32(1 << 23)) != 0); - -bail:; -} diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c deleted file mode 100644 index 1da1252dc..000000000 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/mm.h> -#include <linux/device.h> -#include <linux/slab.h> -#include <linux/sched.h> - -#include "ipath_kernel.h" - -static void __ipath_release_user_pages(struct page **p, size_t num_pages, - int dirty) -{ - size_t i; - - for (i = 0; i < num_pages; i++) { - ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i, - (unsigned long) num_pages, p[i]); - if (dirty) - set_page_dirty_lock(p[i]); - put_page(p[i]); - } -} - -/* call with current->mm->mmap_sem held */ -static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages, - struct page **p) -{ - unsigned long lock_limit; - size_t got; - int ret; - - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (num_pages > lock_limit) { - ret = -ENOMEM; - goto bail; - } - - ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n", - (unsigned long) num_pages, start_page); - - for (got = 0; got < num_pages; got += ret) { - ret = get_user_pages(current, current->mm, - start_page + got * PAGE_SIZE, - num_pages - got, 1, 1, - p + got, NULL); - if (ret < 0) - goto bail_release; - } - - current->mm->pinned_vm += num_pages; - - ret = 0; - goto bail; - -bail_release: - __ipath_release_user_pages(p, got, 0); -bail: - return ret; -} - -/** - * ipath_map_page - a safety wrapper around pci_map_page() - * - * A dma_addr of all 0's is interpreted by the chip as "disabled". - * Unfortunately, it can also be a valid dma_addr returned on some - * architectures. - * - * The powerpc iommu assigns dma_addrs in ascending order, so we don't - * have to bother with retries or mapping a dummy page to insure we - * don't just get the same mapping again. - * - * I'm sure we won't be so lucky with other iommu's, so FIXME. - */ -dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page, - unsigned long offset, size_t size, int direction) -{ - dma_addr_t phys; - - phys = pci_map_page(hwdev, page, offset, size, direction); - - if (phys == 0) { - pci_unmap_page(hwdev, phys, size, direction); - phys = pci_map_page(hwdev, page, offset, size, direction); - /* - * FIXME: If we get 0 again, we should keep this page, - * map another, then free the 0 page. - */ - } - - return phys; -} - -/** - * ipath_map_single - a safety wrapper around pci_map_single() - * - * Same idea as ipath_map_page(). - */ -dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size, - int direction) -{ - dma_addr_t phys; - - phys = pci_map_single(hwdev, ptr, size, direction); - - if (phys == 0) { - pci_unmap_single(hwdev, phys, size, direction); - phys = pci_map_single(hwdev, ptr, size, direction); - /* - * FIXME: If we get 0 again, we should keep this page, - * map another, then free the 0 page. - */ - } - - return phys; -} - -/** - * ipath_get_user_pages - lock user pages into memory - * @start_page: the start page - * @num_pages: the number of pages - * @p: the output page structures - * - * This function takes a given start page (page aligned user virtual - * address) and pins it and the following specified number of pages. For - * now, num_pages is always 1, but that will probably change at some point - * (because caller is doing expected sends on a single virtually contiguous - * buffer, so we can do all pages at once). - */ -int ipath_get_user_pages(unsigned long start_page, size_t num_pages, - struct page **p) -{ - int ret; - - down_write(¤t->mm->mmap_sem); - - ret = __ipath_get_user_pages(start_page, num_pages, p); - - up_write(¤t->mm->mmap_sem); - - return ret; -} - -void ipath_release_user_pages(struct page **p, size_t num_pages) -{ - down_write(¤t->mm->mmap_sem); - - __ipath_release_user_pages(p, num_pages, 1); - - current->mm->pinned_vm -= num_pages; - - up_write(¤t->mm->mmap_sem); -} - -struct ipath_user_pages_work { - struct work_struct work; - struct mm_struct *mm; - unsigned long num_pages; -}; - -static void user_pages_account(struct work_struct *_work) -{ - struct ipath_user_pages_work *work = - container_of(_work, struct ipath_user_pages_work, work); - - down_write(&work->mm->mmap_sem); - work->mm->pinned_vm -= work->num_pages; - up_write(&work->mm->mmap_sem); - mmput(work->mm); - kfree(work); -} - -void ipath_release_user_pages_on_close(struct page **p, size_t num_pages) -{ - struct ipath_user_pages_work *work; - struct mm_struct *mm; - - __ipath_release_user_pages(p, num_pages, 1); - - mm = get_task_mm(current); - if (!mm) - return; - - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) - goto bail_mm; - - INIT_WORK(&work->work, user_pages_account); - work->mm = mm; - work->num_pages = num_pages; - - queue_work(ib_wq, &work->work); - return; - -bail_mm: - mmput(mm); - return; -} diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c deleted file mode 100644 index cc04b7ba3..000000000 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ /dev/null @@ -1,875 +0,0 @@ -/* - * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/mm.h> -#include <linux/types.h> -#include <linux/device.h> -#include <linux/dmapool.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/list.h> -#include <linux/highmem.h> -#include <linux/io.h> -#include <linux/uio.h> -#include <linux/rbtree.h> -#include <linux/spinlock.h> -#include <linux/delay.h> - -#include "ipath_kernel.h" -#include "ipath_user_sdma.h" - -/* minimum size of header */ -#define IPATH_USER_SDMA_MIN_HEADER_LENGTH 64 -/* expected size of headers (for dma_pool) */ -#define IPATH_USER_SDMA_EXP_HEADER_LENGTH 64 -/* length mask in PBC (lower 11 bits) */ -#define IPATH_PBC_LENGTH_MASK ((1 << 11) - 1) - -struct ipath_user_sdma_pkt { - u8 naddr; /* dimension of addr (1..3) ... */ - u32 counter; /* sdma pkts queued counter for this entry */ - u64 added; /* global descq number of entries */ - - struct { - u32 offset; /* offset for kvaddr, addr */ - u32 length; /* length in page */ - u8 put_page; /* should we put_page? */ - u8 dma_mapped; /* is page dma_mapped? */ - struct page *page; /* may be NULL (coherent mem) */ - void *kvaddr; /* FIXME: only for pio hack */ - dma_addr_t addr; - } addr[4]; /* max pages, any more and we coalesce */ - struct list_head list; /* list element */ -}; - -struct ipath_user_sdma_queue { - /* - * pkts sent to dma engine are queued on this - * list head. the type of the elements of this - * list are struct ipath_user_sdma_pkt... - */ - struct list_head sent; - - /* headers with expected length are allocated from here... */ - char header_cache_name[64]; - struct dma_pool *header_cache; - - /* packets are allocated from the slab cache... */ - char pkt_slab_name[64]; - struct kmem_cache *pkt_slab; - - /* as packets go on the queued queue, they are counted... */ - u32 counter; - u32 sent_counter; - - /* dma page table */ - struct rb_root dma_pages_root; - - /* protect everything above... */ - struct mutex lock; -}; - -struct ipath_user_sdma_queue * -ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport) -{ - struct ipath_user_sdma_queue *pq = - kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL); - - if (!pq) - goto done; - - pq->counter = 0; - pq->sent_counter = 0; - INIT_LIST_HEAD(&pq->sent); - - mutex_init(&pq->lock); - - snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name), - "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport); - pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name, - sizeof(struct ipath_user_sdma_pkt), - 0, 0, NULL); - - if (!pq->pkt_slab) - goto err_kfree; - - snprintf(pq->header_cache_name, sizeof(pq->header_cache_name), - "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport); - pq->header_cache = dma_pool_create(pq->header_cache_name, - dev, - IPATH_USER_SDMA_EXP_HEADER_LENGTH, - 4, 0); - if (!pq->header_cache) - goto err_slab; - - pq->dma_pages_root = RB_ROOT; - - goto done; - -err_slab: - kmem_cache_destroy(pq->pkt_slab); -err_kfree: - kfree(pq); - pq = NULL; - -done: - return pq; -} - -static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt, - int i, size_t offset, size_t len, - int put_page, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) -{ - pkt->addr[i].offset = offset; - pkt->addr[i].length = len; - pkt->addr[i].put_page = put_page; - pkt->addr[i].dma_mapped = dma_mapped; - pkt->addr[i].page = page; - pkt->addr[i].kvaddr = kvaddr; - pkt->addr[i].addr = dma_addr; -} - -static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt, - u32 counter, size_t offset, - size_t len, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) -{ - pkt->naddr = 1; - pkt->counter = counter; - ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page, - kvaddr, dma_addr); -} - -/* we've too many pages in the iovec, coalesce to a single page */ -static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd, - struct ipath_user_sdma_pkt *pkt, - const struct iovec *iov, - unsigned long niov) { - int ret = 0; - struct page *page = alloc_page(GFP_KERNEL); - void *mpage_save; - char *mpage; - int i; - int len = 0; - dma_addr_t dma_addr; - - if (!page) { - ret = -ENOMEM; - goto done; - } - - mpage = kmap(page); - mpage_save = mpage; - for (i = 0; i < niov; i++) { - int cfur; - - cfur = copy_from_user(mpage, - iov[i].iov_base, iov[i].iov_len); - if (cfur) { - ret = -EFAULT; - goto free_unmap; - } - - mpage += iov[i].iov_len; - len += iov[i].iov_len; - } - - dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, - DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { - ret = -ENOMEM; - goto free_unmap; - } - - ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save, - dma_addr); - pkt->naddr = 2; - - goto done; - -free_unmap: - kunmap(page); - __free_page(page); -done: - return ret; -} - -/* how many pages in this iovec element? */ -static int ipath_user_sdma_num_pages(const struct iovec *iov) -{ - const unsigned long addr = (unsigned long) iov->iov_base; - const unsigned long len = iov->iov_len; - const unsigned long spage = addr & PAGE_MASK; - const unsigned long epage = (addr + len - 1) & PAGE_MASK; - - return 1 + ((epage - spage) >> PAGE_SHIFT); -} - -/* truncate length to page boundary */ -static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len) -{ - const unsigned long offset = addr & ~PAGE_MASK; - - return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len; -} - -static void ipath_user_sdma_free_pkt_frag(struct device *dev, - struct ipath_user_sdma_queue *pq, - struct ipath_user_sdma_pkt *pkt, - int frag) -{ - const int i = frag; - - if (pkt->addr[i].page) { - if (pkt->addr[i].dma_mapped) - dma_unmap_page(dev, - pkt->addr[i].addr, - pkt->addr[i].length, - DMA_TO_DEVICE); - - if (pkt->addr[i].kvaddr) - kunmap(pkt->addr[i].page); - - if (pkt->addr[i].put_page) - put_page(pkt->addr[i].page); - else - __free_page(pkt->addr[i].page); - } else if (pkt->addr[i].kvaddr) - /* free coherent mem from cache... */ - dma_pool_free(pq->header_cache, - pkt->addr[i].kvaddr, pkt->addr[i].addr); -} - -/* return number of pages pinned... */ -static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd, - struct ipath_user_sdma_pkt *pkt, - unsigned long addr, int tlen, int npages) -{ - struct page *pages[2]; - int j; - int ret; - - ret = get_user_pages_fast(addr, npages, 0, pages); - if (ret != npages) { - int i; - - for (i = 0; i < ret; i++) - put_page(pages[i]); - - ret = -ENOMEM; - goto done; - } - - for (j = 0; j < npages; j++) { - /* map the pages... */ - const int flen = - ipath_user_sdma_page_length(addr, tlen); - dma_addr_t dma_addr = - dma_map_page(&dd->pcidev->dev, - pages[j], 0, flen, DMA_TO_DEVICE); - unsigned long fofs = addr & ~PAGE_MASK; - - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { - ret = -ENOMEM; - goto done; - } - - ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1, - pages[j], kmap(pages[j]), - dma_addr); - - pkt->naddr++; - addr += flen; - tlen -= flen; - } - -done: - return ret; -} - -static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - struct ipath_user_sdma_pkt *pkt, - const struct iovec *iov, - unsigned long niov) -{ - int ret = 0; - unsigned long idx; - - for (idx = 0; idx < niov; idx++) { - const int npages = ipath_user_sdma_num_pages(iov + idx); - const unsigned long addr = (unsigned long) iov[idx].iov_base; - - ret = ipath_user_sdma_pin_pages(dd, pkt, - addr, iov[idx].iov_len, - npages); - if (ret < 0) - goto free_pkt; - } - - goto done; - -free_pkt: - for (idx = 0; idx < pkt->naddr; idx++) - ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx); - -done: - return ret; -} - -static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - struct ipath_user_sdma_pkt *pkt, - const struct iovec *iov, - unsigned long niov, int npages) -{ - int ret = 0; - - if (npages >= ARRAY_SIZE(pkt->addr)) - ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov); - else - ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov); - - return ret; -} - -/* free a packet list -- return counter value of last packet */ -static void ipath_user_sdma_free_pkt_list(struct device *dev, - struct ipath_user_sdma_queue *pq, - struct list_head *list) -{ - struct ipath_user_sdma_pkt *pkt, *pkt_next; - - list_for_each_entry_safe(pkt, pkt_next, list, list) { - int i; - - for (i = 0; i < pkt->naddr; i++) - ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i); - - kmem_cache_free(pq->pkt_slab, pkt); - } -} - -/* - * copy headers, coalesce etc -- pq->lock must be held - * - * we queue all the packets to list, returning the - * number of bytes total. list must be empty initially, - * as, if there is an error we clean it... - */ -static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - struct list_head *list, - const struct iovec *iov, - unsigned long niov, - int maxpkts) -{ - unsigned long idx = 0; - int ret = 0; - int npkts = 0; - struct page *page = NULL; - __le32 *pbc; - dma_addr_t dma_addr; - struct ipath_user_sdma_pkt *pkt = NULL; - size_t len; - size_t nw; - u32 counter = pq->counter; - int dma_mapped = 0; - - while (idx < niov && npkts < maxpkts) { - const unsigned long addr = (unsigned long) iov[idx].iov_base; - const unsigned long idx_save = idx; - unsigned pktnw; - unsigned pktnwc; - int nfrags = 0; - int npages = 0; - int cfur; - - dma_mapped = 0; - len = iov[idx].iov_len; - nw = len >> 2; - page = NULL; - - pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); - if (!pkt) { - ret = -ENOMEM; - goto free_list; - } - - if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH || - len > PAGE_SIZE || len & 3 || addr & 3) { - ret = -EINVAL; - goto free_pkt; - } - - if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH) - pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL, - &dma_addr); - else - pbc = NULL; - - if (!pbc) { - page = alloc_page(GFP_KERNEL); - if (!page) { - ret = -ENOMEM; - goto free_pkt; - } - pbc = kmap(page); - } - - cfur = copy_from_user(pbc, iov[idx].iov_base, len); - if (cfur) { - ret = -EFAULT; - goto free_pbc; - } - - /* - * this assignment is a bit strange. it's because the - * the pbc counts the number of 32 bit words in the full - * packet _except_ the first word of the pbc itself... - */ - pktnwc = nw - 1; - - /* - * pktnw computation yields the number of 32 bit words - * that the caller has indicated in the PBC. note that - * this is one less than the total number of words that - * goes to the send DMA engine as the first 32 bit word - * of the PBC itself is not counted. Armed with this count, - * we can verify that the packet is consistent with the - * iovec lengths. - */ - pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK; - if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) { - ret = -EINVAL; - goto free_pbc; - } - - - idx++; - while (pktnwc < pktnw && idx < niov) { - const size_t slen = iov[idx].iov_len; - const unsigned long faddr = - (unsigned long) iov[idx].iov_base; - - if (slen & 3 || faddr & 3 || !slen || - slen > PAGE_SIZE) { - ret = -EINVAL; - goto free_pbc; - } - - npages++; - if ((faddr & PAGE_MASK) != - ((faddr + slen - 1) & PAGE_MASK)) - npages++; - - pktnwc += slen >> 2; - idx++; - nfrags++; - } - - if (pktnwc != pktnw) { - ret = -EINVAL; - goto free_pbc; - } - - if (page) { - dma_addr = dma_map_page(&dd->pcidev->dev, - page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { - ret = -ENOMEM; - goto free_pbc; - } - - dma_mapped = 1; - } - - ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped, - page, pbc, dma_addr); - - if (nfrags) { - ret = ipath_user_sdma_init_payload(dd, pq, pkt, - iov + idx_save + 1, - nfrags, npages); - if (ret < 0) - goto free_pbc_dma; - } - - counter++; - npkts++; - - list_add_tail(&pkt->list, list); - } - - ret = idx; - goto done; - -free_pbc_dma: - if (dma_mapped) - dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE); -free_pbc: - if (page) { - kunmap(page); - __free_page(page); - } else - dma_pool_free(pq->header_cache, pbc, dma_addr); -free_pkt: - kmem_cache_free(pq->pkt_slab, pkt); -free_list: - ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list); -done: - return ret; -} - -static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq, - u32 c) -{ - pq->sent_counter = c; -} - -/* try to clean out queue -- needs pq->lock */ -static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq) -{ - struct list_head free_list; - struct ipath_user_sdma_pkt *pkt; - struct ipath_user_sdma_pkt *pkt_prev; - int ret = 0; - - INIT_LIST_HEAD(&free_list); - - list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) { - s64 descd = dd->ipath_sdma_descq_removed - pkt->added; - - if (descd < 0) - break; - - list_move_tail(&pkt->list, &free_list); - - /* one more packet cleaned */ - ret++; - } - - if (!list_empty(&free_list)) { - u32 counter; - - pkt = list_entry(free_list.prev, - struct ipath_user_sdma_pkt, list); - counter = pkt->counter; - - ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); - ipath_user_sdma_set_complete_counter(pq, counter); - } - - return ret; -} - -void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq) -{ - if (!pq) - return; - - kmem_cache_destroy(pq->pkt_slab); - dma_pool_destroy(pq->header_cache); - kfree(pq); -} - -/* clean descriptor queue, returns > 0 if some elements cleaned */ -static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd) -{ - int ret; - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - ret = ipath_sdma_make_progress(dd); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - return ret; -} - -/* we're in close, drain packets so that we can cleanup successfully... */ -void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq) -{ - int i; - - if (!pq) - return; - - for (i = 0; i < 100; i++) { - mutex_lock(&pq->lock); - if (list_empty(&pq->sent)) { - mutex_unlock(&pq->lock); - break; - } - ipath_user_sdma_hwqueue_clean(dd); - ipath_user_sdma_queue_clean(dd, pq); - mutex_unlock(&pq->lock); - msleep(10); - } - - if (!list_empty(&pq->sent)) { - struct list_head free_list; - - printk(KERN_INFO "drain: lists not empty: forcing!\n"); - INIT_LIST_HEAD(&free_list); - mutex_lock(&pq->lock); - list_splice_init(&pq->sent, &free_list); - ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); - mutex_unlock(&pq->lock); - } -} - -static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd, - u64 addr, u64 dwlen, u64 dwoffset) -{ - return cpu_to_le64(/* SDmaPhyAddr[31:0] */ - ((addr & 0xfffffffcULL) << 32) | - /* SDmaGeneration[1:0] */ - ((dd->ipath_sdma_generation & 3ULL) << 30) | - /* SDmaDwordCount[10:0] */ - ((dwlen & 0x7ffULL) << 16) | - /* SDmaBufOffset[12:2] */ - (dwoffset & 0x7ffULL)); -} - -static inline __le64 ipath_sdma_make_first_desc0(__le64 descq) -{ - return descq | cpu_to_le64(1ULL << 12); -} - -static inline __le64 ipath_sdma_make_last_desc0(__le64 descq) -{ - /* last */ /* dma head */ - return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13); -} - -static inline __le64 ipath_sdma_make_desc1(u64 addr) -{ - /* SDmaPhyAddr[47:32] */ - return cpu_to_le64(addr >> 32); -} - -static void ipath_user_sdma_send_frag(struct ipath_devdata *dd, - struct ipath_user_sdma_pkt *pkt, int idx, - unsigned ofs, u16 tail) -{ - const u64 addr = (u64) pkt->addr[idx].addr + - (u64) pkt->addr[idx].offset; - const u64 dwlen = (u64) pkt->addr[idx].length / 4; - __le64 *descqp; - __le64 descq0; - - descqp = &dd->ipath_sdma_descq[tail].qw[0]; - - descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs); - if (idx == 0) - descq0 = ipath_sdma_make_first_desc0(descq0); - if (idx == pkt->naddr - 1) - descq0 = ipath_sdma_make_last_desc0(descq0); - - descqp[0] = descq0; - descqp[1] = ipath_sdma_make_desc1(addr); -} - -/* pq->lock must be held, get packets on the wire... */ -static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - struct list_head *pktlist) -{ - int ret = 0; - unsigned long flags; - u16 tail; - - if (list_empty(pktlist)) - return 0; - - if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE))) - return -ECOMM; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - - if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) { - ret = -ECOMM; - goto unlock; - } - - tail = dd->ipath_sdma_descq_tail; - while (!list_empty(pktlist)) { - struct ipath_user_sdma_pkt *pkt = - list_entry(pktlist->next, struct ipath_user_sdma_pkt, - list); - int i; - unsigned ofs = 0; - u16 dtail = tail; - - if (pkt->naddr > ipath_sdma_descq_freecnt(dd)) - goto unlock_check_tail; - - for (i = 0; i < pkt->naddr; i++) { - ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail); - ofs += pkt->addr[i].length >> 2; - - if (++tail == dd->ipath_sdma_descq_cnt) { - tail = 0; - ++dd->ipath_sdma_generation; - } - } - - if ((ofs<<2) > dd->ipath_ibmaxlen) { - ipath_dbg("packet size %X > ibmax %X, fail\n", - ofs<<2, dd->ipath_ibmaxlen); - ret = -EMSGSIZE; - goto unlock; - } - - /* - * if the packet is >= 2KB mtu equivalent, we have to use - * the large buffers, and have to mark each descriptor as - * part of a large buffer packet. - */ - if (ofs >= IPATH_SMALLBUF_DWORDS) { - for (i = 0; i < pkt->naddr; i++) { - dd->ipath_sdma_descq[dtail].qw[0] |= - cpu_to_le64(1ULL << 14); - if (++dtail == dd->ipath_sdma_descq_cnt) - dtail = 0; - } - } - - dd->ipath_sdma_descq_added += pkt->naddr; - pkt->added = dd->ipath_sdma_descq_added; - list_move_tail(&pkt->list, &pq->sent); - ret++; - } - -unlock_check_tail: - /* advance the tail on the chip if necessary */ - if (dd->ipath_sdma_descq_tail != tail) { - wmb(); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail); - dd->ipath_sdma_descq_tail = tail; - } - -unlock: - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - return ret; -} - -int ipath_user_sdma_writev(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - const struct iovec *iov, - unsigned long dim) -{ - int ret = 0; - struct list_head list; - int npkts = 0; - - INIT_LIST_HEAD(&list); - - mutex_lock(&pq->lock); - - if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) { - ipath_user_sdma_hwqueue_clean(dd); - ipath_user_sdma_queue_clean(dd, pq); - } - - while (dim) { - const int mxp = 8; - - ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); - if (ret <= 0) - goto done_unlock; - else { - dim -= ret; - iov += ret; - } - - /* force packets onto the sdma hw queue... */ - if (!list_empty(&list)) { - /* - * lazily clean hw queue. the 4 is a guess of about - * how many sdma descriptors a packet will take (it - * doesn't have to be perfect). - */ - if (ipath_sdma_descq_freecnt(dd) < ret * 4) { - ipath_user_sdma_hwqueue_clean(dd); - ipath_user_sdma_queue_clean(dd, pq); - } - - ret = ipath_user_sdma_push_pkts(dd, pq, &list); - if (ret < 0) - goto done_unlock; - else { - npkts += ret; - pq->counter += ret; - - if (!list_empty(&list)) - goto done_unlock; - } - } - } - -done_unlock: - if (!list_empty(&list)) - ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list); - mutex_unlock(&pq->lock); - - return (ret < 0) ? ret : npkts; -} - -int ipath_user_sdma_make_progress(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq) -{ - int ret = 0; - - mutex_lock(&pq->lock); - ipath_user_sdma_hwqueue_clean(dd); - ret = ipath_user_sdma_queue_clean(dd, pq); - mutex_unlock(&pq->lock); - - return ret; -} - -u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq) -{ - return pq->sent_counter; -} - -u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq) -{ - return pq->counter; -} - diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h deleted file mode 100644 index fc76316c4..000000000 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/device.h> - -struct ipath_user_sdma_queue; - -struct ipath_user_sdma_queue * -ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport); -void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq); - -int ipath_user_sdma_writev(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - const struct iovec *iov, - unsigned long dim); - -int ipath_user_sdma_make_progress(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq); - -void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq); - -u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq); -u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c deleted file mode 100644 index 30ba49c4a..000000000 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ /dev/null @@ -1,2364 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <rdma/ib_mad.h> -#include <rdma/ib_user_verbs.h> -#include <linux/io.h> -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/utsname.h> -#include <linux/rculist.h> - -#include "ipath_kernel.h" -#include "ipath_verbs.h" -#include "ipath_common.h" - -static unsigned int ib_ipath_qp_table_size = 251; -module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO); -MODULE_PARM_DESC(qp_table_size, "QP table size"); - -unsigned int ib_ipath_lkey_table_size = 12; -module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint, - S_IRUGO); -MODULE_PARM_DESC(lkey_table_size, - "LKEY table size in bits (2^n, 1 <= n <= 23)"); - -static unsigned int ib_ipath_max_pds = 0xFFFF; -module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_pds, - "Maximum number of protection domains to support"); - -static unsigned int ib_ipath_max_ahs = 0xFFFF; -module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); - -unsigned int ib_ipath_max_cqes = 0x2FFFF; -module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_cqes, - "Maximum number of completion queue entries to support"); - -unsigned int ib_ipath_max_cqs = 0x1FFFF; -module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); - -unsigned int ib_ipath_max_qp_wrs = 0x3FFF; -module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint, - S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); - -unsigned int ib_ipath_max_qps = 16384; -module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); - -unsigned int ib_ipath_max_sges = 0x60; -module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); - -unsigned int ib_ipath_max_mcast_grps = 16384; -module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint, - S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_mcast_grps, - "Maximum number of multicast groups to support"); - -unsigned int ib_ipath_max_mcast_qp_attached = 16; -module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached, - uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_mcast_qp_attached, - "Maximum number of attached QPs to support"); - -unsigned int ib_ipath_max_srqs = 1024; -module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); - -unsigned int ib_ipath_max_srq_sges = 128; -module_param_named(max_srq_sges, ib_ipath_max_srq_sges, - uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); - -unsigned int ib_ipath_max_srq_wrs = 0x1FFFF; -module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs, - uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); - -static unsigned int ib_ipath_disable_sma; -module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(disable_sma, "Disable the SMA"); - -/* - * Note that it is OK to post send work requests in the SQE and ERR - * states; ipath_do_send() will process them and generate error - * completions as per IB 1.2 C10-96. - */ -const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { - [IB_QPS_RESET] = 0, - [IB_QPS_INIT] = IPATH_POST_RECV_OK, - [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, - [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK | - IPATH_PROCESS_NEXT_SEND_OK, - [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, - [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, - [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV | - IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, -}; - -struct ipath_ucontext { - struct ib_ucontext ibucontext; -}; - -static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext - *ibucontext) -{ - return container_of(ibucontext, struct ipath_ucontext, ibucontext); -} - -/* - * Translate ib_wr_opcode into ib_wc_opcode. - */ -const enum ib_wc_opcode ib_ipath_wc_opcode[] = { - [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [IB_WR_SEND] = IB_WC_SEND, - [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, - [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD -}; - -/* - * System image GUID. - */ -static __be64 sys_image_guid; - -/** - * ipath_copy_sge - copy data to SGE memory - * @ss: the SGE state - * @data: the data to copy - * @length: the length of the data - */ -void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) -{ - struct ipath_sge *sge = &ss->sge; - - while (length) { - u32 len = sge->length; - - if (len > length) - len = length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - memcpy(sge->vaddr, data, len); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - data += len; - length -= len; - } -} - -/** - * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func - * @ss: the SGE state - * @length: the number of bytes to skip - */ -void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) -{ - struct ipath_sge *sge = &ss->sge; - - while (length) { - u32 len = sge->length; - - if (len > length) - len = length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - length -= len; - } -} - -/* - * Count the number of DMA descriptors needed to send length bytes of data. - * Don't modify the ipath_sge_state to get the count. - * Return zero if any of the segments is not aligned. - */ -static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length) -{ - struct ipath_sge *sg_list = ss->sg_list; - struct ipath_sge sge = ss->sge; - u8 num_sge = ss->num_sge; - u32 ndesc = 1; /* count the header */ - - while (length) { - u32 len = sge.length; - - if (len > length) - len = length; - if (len > sge.sge_length) - len = sge.sge_length; - BUG_ON(len == 0); - if (((long) sge.vaddr & (sizeof(u32) - 1)) || - (len != length && (len & (sizeof(u32) - 1)))) { - ndesc = 0; - break; - } - ndesc++; - sge.vaddr += len; - sge.length -= len; - sge.sge_length -= len; - if (sge.sge_length == 0) { - if (--num_sge) - sge = *sg_list++; - } else if (sge.length == 0 && sge.mr != NULL) { - if (++sge.n >= IPATH_SEGSZ) { - if (++sge.m >= sge.mr->mapsz) - break; - sge.n = 0; - } - sge.vaddr = - sge.mr->map[sge.m]->segs[sge.n].vaddr; - sge.length = - sge.mr->map[sge.m]->segs[sge.n].length; - } - length -= len; - } - return ndesc; -} - -/* - * Copy from the SGEs to the data buffer. - */ -static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss, - u32 length) -{ - struct ipath_sge *sge = &ss->sge; - - while (length) { - u32 len = sge->length; - - if (len > length) - len = length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - memcpy(data, sge->vaddr, len); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - data += len; - length -= len; - } -} - -/** - * ipath_post_one_send - post one RC, UC, or UD send work request - * @qp: the QP to post on - * @wr: the work request to send - */ -static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) -{ - struct ipath_swqe *wqe; - u32 next; - int i; - int j; - int acc; - int ret; - unsigned long flags; - struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; - - spin_lock_irqsave(&qp->s_lock, flags); - - if (qp->ibqp.qp_type != IB_QPT_SMI && - !(dd->ipath_flags & IPATH_LINKACTIVE)) { - ret = -ENETDOWN; - goto bail; - } - - /* Check that state is OK to post send. */ - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) - goto bail_inval; - - /* IB spec says that num_sge == 0 is OK. */ - if (wr->num_sge > qp->s_max_sge) - goto bail_inval; - - /* - * Don't allow RDMA reads or atomic operations on UC or - * undefined operations. - * Make sure buffer is large enough to hold the result for atomics. - */ - if (qp->ibqp.qp_type == IB_QPT_UC) { - if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) - goto bail_inval; - } else if (qp->ibqp.qp_type == IB_QPT_UD) { - /* Check UD opcode */ - if (wr->opcode != IB_WR_SEND && - wr->opcode != IB_WR_SEND_WITH_IMM) - goto bail_inval; - /* Check UD destination address PD */ - if (qp->ibqp.pd != wr->wr.ud.ah->pd) - goto bail_inval; - } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) - goto bail_inval; - else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && - (wr->num_sge == 0 || - wr->sg_list[0].length < sizeof(u64) || - wr->sg_list[0].addr & (sizeof(u64) - 1))) - goto bail_inval; - else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) - goto bail_inval; - - next = qp->s_head + 1; - if (next >= qp->s_size) - next = 0; - if (next == qp->s_last) { - ret = -ENOMEM; - goto bail; - } - - wqe = get_swqe_ptr(qp, qp->s_head); - wqe->wr = *wr; - wqe->length = 0; - if (wr->num_sge) { - acc = wr->opcode >= IB_WR_RDMA_READ ? - IB_ACCESS_LOCAL_WRITE : 0; - for (i = 0, j = 0; i < wr->num_sge; i++) { - u32 length = wr->sg_list[i].length; - int ok; - - if (length == 0) - continue; - ok = ipath_lkey_ok(qp, &wqe->sg_list[j], - &wr->sg_list[i], acc); - if (!ok) - goto bail_inval; - wqe->length += length; - j++; - } - wqe->wr.num_sge = j; - } - if (qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_RC) { - if (wqe->length > 0x80000000U) - goto bail_inval; - } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu) - goto bail_inval; - wqe->ssn = qp->s_ssn++; - qp->s_head = next; - - ret = 0; - goto bail; - -bail_inval: - ret = -EINVAL; -bail: - spin_unlock_irqrestore(&qp->s_lock, flags); - return ret; -} - -/** - * ipath_post_send - post a send on a QP - * @ibqp: the QP to post the send on - * @wr: the list of work requests to post - * @bad_wr: the first bad WR is put here - * - * This may be called from interrupt context. - */ -static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) -{ - struct ipath_qp *qp = to_iqp(ibqp); - int err = 0; - - for (; wr; wr = wr->next) { - err = ipath_post_one_send(qp, wr); - if (err) { - *bad_wr = wr; - goto bail; - } - } - - /* Try to do the send work in the caller's context. */ - ipath_do_send((unsigned long) qp); - -bail: - return err; -} - -/** - * ipath_post_receive - post a receive on a QP - * @ibqp: the QP to post the receive on - * @wr: the WR to post - * @bad_wr: the first bad WR is put here - * - * This may be called from interrupt context. - */ -static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) -{ - struct ipath_qp *qp = to_iqp(ibqp); - struct ipath_rwq *wq = qp->r_rq.wq; - unsigned long flags; - int ret; - - /* Check that state is OK to post receive. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - for (; wr; wr = wr->next) { - struct ipath_rwqe *wqe; - u32 next; - int i; - - if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&qp->r_rq.lock, flags); - next = wq->head + 1; - if (next >= qp->r_rq.size) - next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&qp->r_rq.lock, flags); - *bad_wr = wr; - ret = -ENOMEM; - goto bail; - } - - wqe = get_rwqe_ptr(&qp->r_rq, wq->head); - wqe->wr_id = wr->wr_id; - wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; - /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; - spin_unlock_irqrestore(&qp->r_rq.lock, flags); - } - ret = 0; - -bail: - return ret; -} - -/** - * ipath_qp_rcv - processing an incoming packet on a QP - * @dev: the device the packet came on - * @hdr: the packet header - * @has_grh: true if the packet has a GRH - * @data: the packet data - * @tlen: the packet length - * @qp: the QP the packet came on - * - * This is called from ipath_ib_rcv() to process an incoming packet - * for the given QP. - * Called at interrupt level. - */ -static void ipath_qp_rcv(struct ipath_ibdev *dev, - struct ipath_ib_header *hdr, int has_grh, - void *data, u32 tlen, struct ipath_qp *qp) -{ - /* Check for valid receive state. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { - dev->n_pkt_drops++; - return; - } - - switch (qp->ibqp.qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - if (ib_ipath_disable_sma) - break; - /* FALLTHROUGH */ - case IB_QPT_UD: - ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp); - break; - - case IB_QPT_RC: - ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp); - break; - - case IB_QPT_UC: - ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp); - break; - - default: - break; - } -} - -/** - * ipath_ib_rcv - process an incoming packet - * @arg: the device pointer - * @rhdr: the header of the packet - * @data: the packet data - * @tlen: the packet length - * - * This is called from ipath_kreceive() to process an incoming packet at - * interrupt level. Tlen is the length of the header + data + CRC in bytes. - */ -void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, - u32 tlen) -{ - struct ipath_ib_header *hdr = rhdr; - struct ipath_other_headers *ohdr; - struct ipath_qp *qp; - u32 qp_num; - int lnh; - u8 opcode; - u16 lid; - - if (unlikely(dev == NULL)) - goto bail; - - if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */ - dev->rcv_errors++; - goto bail; - } - - /* Check for a valid destination LID (see ch. 7.11.1). */ - lid = be16_to_cpu(hdr->lrh[1]); - if (lid < IPATH_MULTICAST_LID_BASE) { - lid &= ~((1 << dev->dd->ipath_lmc) - 1); - if (unlikely(lid != dev->dd->ipath_lid)) { - dev->rcv_errors++; - goto bail; - } - } - - /* Check for GRH */ - lnh = be16_to_cpu(hdr->lrh[0]) & 3; - if (lnh == IPATH_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == IPATH_LRH_GRH) - ohdr = &hdr->u.l.oth; - else { - dev->rcv_errors++; - goto bail; - } - - opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f; - dev->opstats[opcode].n_bytes += tlen; - dev->opstats[opcode].n_packets++; - - /* Get the destination QP number. */ - qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK; - if (qp_num == IPATH_MULTICAST_QPN) { - struct ipath_mcast *mcast; - struct ipath_mcast_qp *p; - - if (lnh != IPATH_LRH_GRH) { - dev->n_pkt_drops++; - goto bail; - } - mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); - if (mcast == NULL) { - dev->n_pkt_drops++; - goto bail; - } - dev->n_multicast_rcv++; - list_for_each_entry_rcu(p, &mcast->qp_list, list) - ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp); - /* - * Notify ipath_multicast_detach() if it is waiting for us - * to finish. - */ - if (atomic_dec_return(&mcast->refcount) <= 1) - wake_up(&mcast->wait); - } else { - qp = ipath_lookup_qpn(&dev->qp_table, qp_num); - if (qp) { - dev->n_unicast_rcv++; - ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data, - tlen, qp); - /* - * Notify ipath_destroy_qp() if it is waiting - * for us to finish. - */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } else - dev->n_pkt_drops++; - } - -bail:; -} - -/** - * ipath_ib_timer - verbs timer - * @arg: the device pointer - * - * This is called from ipath_do_rcv_timer() at interrupt level to check for - * QPs which need retransmits and to collect performance numbers. - */ -static void ipath_ib_timer(struct ipath_ibdev *dev) -{ - struct ipath_qp *resend = NULL; - struct ipath_qp *rnr = NULL; - struct list_head *last; - struct ipath_qp *qp; - unsigned long flags; - - if (dev == NULL) - return; - - spin_lock_irqsave(&dev->pending_lock, flags); - /* Start filling the next pending queue. */ - if (++dev->pending_index >= ARRAY_SIZE(dev->pending)) - dev->pending_index = 0; - /* Save any requests still in the new queue, they have timed out. */ - last = &dev->pending[dev->pending_index]; - while (!list_empty(last)) { - qp = list_entry(last->next, struct ipath_qp, timerwait); - list_del_init(&qp->timerwait); - qp->timer_next = resend; - resend = qp; - atomic_inc(&qp->refcount); - } - last = &dev->rnrwait; - if (!list_empty(last)) { - qp = list_entry(last->next, struct ipath_qp, timerwait); - if (--qp->s_rnr_timeout == 0) { - do { - list_del_init(&qp->timerwait); - qp->timer_next = rnr; - rnr = qp; - atomic_inc(&qp->refcount); - if (list_empty(last)) - break; - qp = list_entry(last->next, struct ipath_qp, - timerwait); - } while (qp->s_rnr_timeout == 0); - } - } - /* - * We should only be in the started state if pma_sample_start != 0 - */ - if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED && - --dev->pma_sample_start == 0) { - dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; - ipath_snapshot_counters(dev->dd, &dev->ipath_sword, - &dev->ipath_rword, - &dev->ipath_spkts, - &dev->ipath_rpkts, - &dev->ipath_xmit_wait); - } - if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { - if (dev->pma_sample_interval == 0) { - u64 ta, tb, tc, td, te; - - dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; - ipath_snapshot_counters(dev->dd, &ta, &tb, - &tc, &td, &te); - - dev->ipath_sword = ta - dev->ipath_sword; - dev->ipath_rword = tb - dev->ipath_rword; - dev->ipath_spkts = tc - dev->ipath_spkts; - dev->ipath_rpkts = td - dev->ipath_rpkts; - dev->ipath_xmit_wait = te - dev->ipath_xmit_wait; - } - else - dev->pma_sample_interval--; - } - spin_unlock_irqrestore(&dev->pending_lock, flags); - - /* XXX What if timer fires again while this is running? */ - while (resend != NULL) { - qp = resend; - resend = qp->timer_next; - - spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_last != qp->s_tail && - ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { - dev->n_timeouts++; - ipath_restart_rc(qp, qp->s_last_psn + 1); - } - spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Notify ipath_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } - while (rnr != NULL) { - qp = rnr; - rnr = qp->timer_next; - - spin_lock_irqsave(&qp->s_lock, flags); - if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) - ipath_schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Notify ipath_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } -} - -static void update_sge(struct ipath_sge_state *ss, u32 length) -{ - struct ipath_sge *sge = &ss->sge; - - sge->vaddr += length; - sge->length -= length; - sge->sge_length -= length; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - return; - sge->n = 0; - } - sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = sge->mr->map[sge->m]->segs[sge->n].length; - } -} - -#ifdef __LITTLE_ENDIAN -static inline u32 get_upper_bits(u32 data, u32 shift) -{ - return data >> shift; -} - -static inline u32 set_upper_bits(u32 data, u32 shift) -{ - return data << shift; -} - -static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) -{ - data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); - data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); - return data; -} -#else -static inline u32 get_upper_bits(u32 data, u32 shift) -{ - return data << shift; -} - -static inline u32 set_upper_bits(u32 data, u32 shift) -{ - return data >> shift; -} - -static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) -{ - data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); - data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); - return data; -} -#endif - -static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, - u32 length, unsigned flush_wc) -{ - u32 extra = 0; - u32 data = 0; - u32 last; - - while (1) { - u32 len = ss->sge.length; - u32 off; - - if (len > length) - len = length; - if (len > ss->sge.sge_length) - len = ss->sge.sge_length; - BUG_ON(len == 0); - /* If the source address is not aligned, try to align it. */ - off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); - if (off) { - u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & - ~(sizeof(u32) - 1)); - u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); - u32 y; - - y = sizeof(u32) - off; - if (len > y) - len = y; - if (len + extra >= sizeof(u32)) { - data |= set_upper_bits(v, extra * - BITS_PER_BYTE); - len = sizeof(u32) - extra; - if (len == length) { - last = data; - break; - } - __raw_writel(data, piobuf); - piobuf++; - extra = 0; - data = 0; - } else { - /* Clear unused upper bytes */ - data |= clear_upper_bytes(v, len, extra); - if (len == length) { - last = data; - break; - } - extra += len; - } - } else if (extra) { - /* Source address is aligned. */ - u32 *addr = (u32 *) ss->sge.vaddr; - int shift = extra * BITS_PER_BYTE; - int ushift = 32 - shift; - u32 l = len; - - while (l >= sizeof(u32)) { - u32 v = *addr; - - data |= set_upper_bits(v, shift); - __raw_writel(data, piobuf); - data = get_upper_bits(v, ushift); - piobuf++; - addr++; - l -= sizeof(u32); - } - /* - * We still have 'extra' number of bytes leftover. - */ - if (l) { - u32 v = *addr; - - if (l + extra >= sizeof(u32)) { - data |= set_upper_bits(v, shift); - len -= l + extra - sizeof(u32); - if (len == length) { - last = data; - break; - } - __raw_writel(data, piobuf); - piobuf++; - extra = 0; - data = 0; - } else { - /* Clear unused upper bytes */ - data |= clear_upper_bytes(v, l, - extra); - if (len == length) { - last = data; - break; - } - extra += l; - } - } else if (len == length) { - last = data; - break; - } - } else if (len == length) { - u32 w; - - /* - * Need to round up for the last dword in the - * packet. - */ - w = (len + 3) >> 2; - __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); - piobuf += w - 1; - last = ((u32 *) ss->sge.vaddr)[w - 1]; - break; - } else { - u32 w = len >> 2; - - __iowrite32_copy(piobuf, ss->sge.vaddr, w); - piobuf += w; - - extra = len & (sizeof(u32) - 1); - if (extra) { - u32 v = ((u32 *) ss->sge.vaddr)[w]; - - /* Clear unused upper bytes */ - data = clear_upper_bytes(v, extra, 0); - } - } - update_sge(ss, len); - length -= len; - } - /* Update address before sending packet. */ - update_sge(ss, length); - if (flush_wc) { - /* must flush early everything before trigger word */ - ipath_flush_wc(); - __raw_writel(last, piobuf); - /* be sure trigger word is written */ - ipath_flush_wc(); - } else - __raw_writel(last, piobuf); -} - -/* - * Convert IB rate to delay multiplier. - */ -unsigned ipath_ib_rate_to_mult(enum ib_rate rate) -{ - switch (rate) { - case IB_RATE_2_5_GBPS: return 8; - case IB_RATE_5_GBPS: return 4; - case IB_RATE_10_GBPS: return 2; - case IB_RATE_20_GBPS: return 1; - default: return 0; - } -} - -/* - * Convert delay multiplier to IB rate - */ -static enum ib_rate ipath_mult_to_ib_rate(unsigned mult) -{ - switch (mult) { - case 8: return IB_RATE_2_5_GBPS; - case 4: return IB_RATE_5_GBPS; - case 2: return IB_RATE_10_GBPS; - case 1: return IB_RATE_20_GBPS; - default: return IB_RATE_PORT_CURRENT; - } -} - -static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev) -{ - struct ipath_verbs_txreq *tx = NULL; - unsigned long flags; - - spin_lock_irqsave(&dev->pending_lock, flags); - if (!list_empty(&dev->txreq_free)) { - struct list_head *l = dev->txreq_free.next; - - list_del(l); - tx = list_entry(l, struct ipath_verbs_txreq, txreq.list); - } - spin_unlock_irqrestore(&dev->pending_lock, flags); - return tx; -} - -static inline void put_txreq(struct ipath_ibdev *dev, - struct ipath_verbs_txreq *tx) -{ - unsigned long flags; - - spin_lock_irqsave(&dev->pending_lock, flags); - list_add(&tx->txreq.list, &dev->txreq_free); - spin_unlock_irqrestore(&dev->pending_lock, flags); -} - -static void sdma_complete(void *cookie, int status) -{ - struct ipath_verbs_txreq *tx = cookie; - struct ipath_qp *qp = tx->qp; - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - unsigned long flags; - enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? - IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; - - if (atomic_dec_and_test(&qp->s_dma_busy)) { - spin_lock_irqsave(&qp->s_lock, flags); - if (tx->wqe) - ipath_send_complete(qp, tx->wqe, ibs); - if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && - qp->s_last != qp->s_head) || - (qp->s_flags & IPATH_S_WAIT_DMA)) - ipath_schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, flags); - wake_up(&qp->wait_dma); - } else if (tx->wqe) { - spin_lock_irqsave(&qp->s_lock, flags); - ipath_send_complete(qp, tx->wqe, ibs); - spin_unlock_irqrestore(&qp->s_lock, flags); - } - - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) - kfree(tx->txreq.map_addr); - put_txreq(dev, tx); - - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); -} - -static void decrement_dma_busy(struct ipath_qp *qp) -{ - unsigned long flags; - - if (atomic_dec_and_test(&qp->s_dma_busy)) { - spin_lock_irqsave(&qp->s_lock, flags); - if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && - qp->s_last != qp->s_head) || - (qp->s_flags & IPATH_S_WAIT_DMA)) - ipath_schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, flags); - wake_up(&qp->wait_dma); - } -} - -/* - * Compute the number of clock cycles of delay before sending the next packet. - * The multipliers reflect the number of clocks for the fastest rate so - * one tick at 4xDDR is 8 ticks at 1xSDR. - * If the destination port will take longer to receive a packet than - * the outgoing link can send it, we need to delay sending the next packet - * by the difference in time it takes the receiver to receive and the sender - * to send this packet. - * Note that this delay is always correct for UC and RC but not always - * optimal for UD. For UD, the destination HCA can be different for each - * packet, in which case, we could send packets to a different destination - * while "waiting" for the delay. The overhead for doing this without - * HW support is more than just paying the cost of delaying some packets - * unnecessarily. - */ -static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult) -{ - return (rcv_mult > snd_mult) ? - (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0; -} - -static int ipath_verbs_send_dma(struct ipath_qp *qp, - struct ipath_ib_header *hdr, u32 hdrwords, - struct ipath_sge_state *ss, u32 len, - u32 plen, u32 dwords) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_devdata *dd = dev->dd; - struct ipath_verbs_txreq *tx; - u32 *piobuf; - u32 control; - u32 ndesc; - int ret; - - tx = qp->s_tx; - if (tx) { - qp->s_tx = NULL; - /* resend previously constructed packet */ - atomic_inc(&qp->s_dma_busy); - ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); - if (ret) { - qp->s_tx = tx; - decrement_dma_busy(qp); - } - goto bail; - } - - tx = get_txreq(dev); - if (!tx) { - ret = -EBUSY; - goto bail; - } - - /* - * Get the saved delay count we computed for the previous packet - * and save the delay count for this packet to be used next time - * we get here. - */ - control = qp->s_pkt_delay; - qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); - - tx->qp = qp; - atomic_inc(&qp->refcount); - tx->wqe = qp->s_wqe; - tx->txreq.callback = sdma_complete; - tx->txreq.callback_cookie = tx; - tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST | - IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC; - if (plen + 1 >= IPATH_SMALLBUF_DWORDS) - tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF; - - /* VL15 packets bypass credit check */ - if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) { - control |= 1ULL << 31; - tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15; - } - - if (len) { - /* - * Don't try to DMA if it takes more descriptors than - * the queue holds. - */ - ndesc = ipath_count_sge(ss, len); - if (ndesc >= dd->ipath_sdma_descq_cnt) - ndesc = 0; - } else - ndesc = 1; - if (ndesc) { - tx->hdr.pbc[0] = cpu_to_le32(plen); - tx->hdr.pbc[1] = cpu_to_le32(control); - memcpy(&tx->hdr.hdr, hdr, hdrwords << 2); - tx->txreq.sg_count = ndesc; - tx->map_len = (hdrwords + 2) << 2; - tx->txreq.map_addr = &tx->hdr; - atomic_inc(&qp->s_dma_busy); - ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); - if (ret) { - /* save ss and length in dwords */ - tx->ss = ss; - tx->len = dwords; - qp->s_tx = tx; - decrement_dma_busy(qp); - } - goto bail; - } - - /* Allocate a buffer and copy the header and payload to it. */ - tx->map_len = (plen + 1) << 2; - piobuf = kmalloc(tx->map_len, GFP_ATOMIC); - if (unlikely(piobuf == NULL)) { - ret = -EBUSY; - goto err_tx; - } - tx->txreq.map_addr = piobuf; - tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF; - tx->txreq.sg_count = 1; - - *piobuf++ = (__force u32) cpu_to_le32(plen); - *piobuf++ = (__force u32) cpu_to_le32(control); - memcpy(piobuf, hdr, hdrwords << 2); - ipath_copy_from_sge(piobuf + hdrwords, ss, len); - - atomic_inc(&qp->s_dma_busy); - ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); - /* - * If we couldn't queue the DMA request, save the info - * and try again later rather than destroying the - * buffer and undoing the side effects of the copy. - */ - if (ret) { - tx->ss = NULL; - tx->len = 0; - qp->s_tx = tx; - decrement_dma_busy(qp); - } - dev->n_unaligned++; - goto bail; - -err_tx: - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - put_txreq(dev, tx); -bail: - return ret; -} - -static int ipath_verbs_send_pio(struct ipath_qp *qp, - struct ipath_ib_header *ibhdr, u32 hdrwords, - struct ipath_sge_state *ss, u32 len, - u32 plen, u32 dwords) -{ - struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; - u32 *hdr = (u32 *) ibhdr; - u32 __iomem *piobuf; - unsigned flush_wc; - u32 control; - int ret; - unsigned long flags; - - piobuf = ipath_getpiobuf(dd, plen, NULL); - if (unlikely(piobuf == NULL)) { - ret = -EBUSY; - goto bail; - } - - /* - * Get the saved delay count we computed for the previous packet - * and save the delay count for this packet to be used next time - * we get here. - */ - control = qp->s_pkt_delay; - qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); - - /* VL15 packets bypass credit check */ - if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15) - control |= 1ULL << 31; - - /* - * Write the length to the control qword plus any needed flags. - * We have to flush after the PBC for correctness on some cpus - * or WC buffer can be written out of order. - */ - writeq(((u64) control << 32) | plen, piobuf); - piobuf += 2; - - flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC; - if (len == 0) { - /* - * If there is just the header portion, must flush before - * writing last word of header for correctness, and after - * the last header word (trigger word). - */ - if (flush_wc) { - ipath_flush_wc(); - __iowrite32_copy(piobuf, hdr, hdrwords - 1); - ipath_flush_wc(); - __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); - ipath_flush_wc(); - } else - __iowrite32_copy(piobuf, hdr, hdrwords); - goto done; - } - - if (flush_wc) - ipath_flush_wc(); - __iowrite32_copy(piobuf, hdr, hdrwords); - piobuf += hdrwords; - - /* The common case is aligned and contained in one segment. */ - if (likely(ss->num_sge == 1 && len <= ss->sge.length && - !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { - u32 *addr = (u32 *) ss->sge.vaddr; - - /* Update address before sending packet. */ - update_sge(ss, len); - if (flush_wc) { - __iowrite32_copy(piobuf, addr, dwords - 1); - /* must flush early everything before trigger word */ - ipath_flush_wc(); - __raw_writel(addr[dwords - 1], piobuf + dwords - 1); - /* be sure trigger word is written */ - ipath_flush_wc(); - } else - __iowrite32_copy(piobuf, addr, dwords); - goto done; - } - copy_io(piobuf, ss, len, flush_wc); -done: - if (qp->s_wqe) { - spin_lock_irqsave(&qp->s_lock, flags); - ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); - spin_unlock_irqrestore(&qp->s_lock, flags); - } - ret = 0; -bail: - return ret; -} - -/** - * ipath_verbs_send - send a packet - * @qp: the QP to send on - * @hdr: the packet header - * @hdrwords: the number of 32-bit words in the header - * @ss: the SGE to send - * @len: the length of the packet in bytes - */ -int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, - u32 hdrwords, struct ipath_sge_state *ss, u32 len) -{ - struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; - u32 plen; - int ret; - u32 dwords = (len + 3) >> 2; - - /* - * Calculate the send buffer trigger address. - * The +1 counts for the pbc control dword following the pbc length. - */ - plen = hdrwords + dwords + 1; - - /* - * VL15 packets (IB_QPT_SMI) will always use PIO, so we - * can defer SDMA restart until link goes ACTIVE without - * worrying about just how we got there. - */ - if (qp->ibqp.qp_type == IB_QPT_SMI || - !(dd->ipath_flags & IPATH_HAS_SEND_DMA)) - ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, - plen, dwords); - else - ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, - plen, dwords); - - return ret; -} - -int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, - u64 *rwords, u64 *spkts, u64 *rpkts, - u64 *xmit_wait) -{ - int ret; - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ret = -EINVAL; - goto bail; - } - *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); - *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); - *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); - *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); - - ret = 0; - -bail: - return ret; -} - -/** - * ipath_get_counters - get various chip counters - * @dd: the infinipath device - * @cntrs: counters are placed here - * - * Return the counters needed by recv_pma_get_portcounters(). - */ -int ipath_get_counters(struct ipath_devdata *dd, - struct ipath_verbs_counters *cntrs) -{ - struct ipath_cregs const *crp = dd->ipath_cregs; - int ret; - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ret = -EINVAL; - goto bail; - } - cntrs->symbol_error_counter = - ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt); - cntrs->link_error_recovery_counter = - ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt); - /* - * The link downed counter counts when the other side downs the - * connection. We add in the number of times we downed the link - * due to local link integrity errors to compensate. - */ - cntrs->link_downed_counter = - ipath_snap_cntr(dd, crp->cr_iblinkdowncnt); - cntrs->port_rcv_errors = - ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) + - ipath_snap_cntr(dd, crp->cr_rcvovflcnt) + - ipath_snap_cntr(dd, crp->cr_portovflcnt) + - ipath_snap_cntr(dd, crp->cr_err_rlencnt) + - ipath_snap_cntr(dd, crp->cr_invalidrlencnt) + - ipath_snap_cntr(dd, crp->cr_errlinkcnt) + - ipath_snap_cntr(dd, crp->cr_erricrccnt) + - ipath_snap_cntr(dd, crp->cr_errvcrccnt) + - ipath_snap_cntr(dd, crp->cr_errlpcrccnt) + - ipath_snap_cntr(dd, crp->cr_badformatcnt) + - dd->ipath_rxfc_unsupvl_errs; - if (crp->cr_rxotherlocalphyerrcnt) - cntrs->port_rcv_errors += - ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt); - if (crp->cr_rxvlerrcnt) - cntrs->port_rcv_errors += - ipath_snap_cntr(dd, crp->cr_rxvlerrcnt); - cntrs->port_rcv_remphys_errors = - ipath_snap_cntr(dd, crp->cr_rcvebpcnt); - cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt); - cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt); - cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt); - cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt); - cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt); - cntrs->local_link_integrity_errors = - crp->cr_locallinkintegrityerrcnt ? - ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) : - ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? - dd->ipath_lli_errs : dd->ipath_lli_errors); - cntrs->excessive_buffer_overrun_errors = - crp->cr_excessbufferovflcnt ? - ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) : - dd->ipath_overrun_thresh_errs; - cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ? - ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0; - - ret = 0; - -bail: - return ret; -} - -/** - * ipath_ib_piobufavail - callback when a PIO buffer is available - * @arg: the device pointer - * - * This is called from ipath_intr() at interrupt level when a PIO buffer is - * available after ipath_verbs_send() returned an error that no buffers were - * available. Return 1 if we consumed all the PIO buffers and we still have - * QPs waiting for buffers (for now, just restart the send tasklet and - * return zero). - */ -int ipath_ib_piobufavail(struct ipath_ibdev *dev) -{ - struct list_head *list; - struct ipath_qp *qplist; - struct ipath_qp *qp; - unsigned long flags; - - if (dev == NULL) - goto bail; - - list = &dev->piowait; - qplist = NULL; - - spin_lock_irqsave(&dev->pending_lock, flags); - while (!list_empty(list)) { - qp = list_entry(list->next, struct ipath_qp, piowait); - list_del_init(&qp->piowait); - qp->pio_next = qplist; - qplist = qp; - atomic_inc(&qp->refcount); - } - spin_unlock_irqrestore(&dev->pending_lock, flags); - - while (qplist != NULL) { - qp = qplist; - qplist = qp->pio_next; - - spin_lock_irqsave(&qp->s_lock, flags); - if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) - ipath_schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Notify ipath_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } - -bail: - return 0; -} - -static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - - memset(props, 0, sizeof(*props)); - - props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | - IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | - IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; - props->page_size_cap = PAGE_SIZE; - props->vendor_id = - IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3; - props->vendor_part_id = dev->dd->ipath_deviceid; - props->hw_ver = dev->dd->ipath_pcirev; - - props->sys_image_guid = dev->sys_image_guid; - - props->max_mr_size = ~0ull; - props->max_qp = ib_ipath_max_qps; - props->max_qp_wr = ib_ipath_max_qp_wrs; - props->max_sge = ib_ipath_max_sges; - props->max_cq = ib_ipath_max_cqs; - props->max_ah = ib_ipath_max_ahs; - props->max_cqe = ib_ipath_max_cqes; - props->max_mr = dev->lk_table.max; - props->max_fmr = dev->lk_table.max; - props->max_map_per_fmr = 32767; - props->max_pd = ib_ipath_max_pds; - props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; - props->max_qp_init_rd_atom = 255; - /* props->max_res_rd_atom */ - props->max_srq = ib_ipath_max_srqs; - props->max_srq_wr = ib_ipath_max_srq_wrs; - props->max_srq_sge = ib_ipath_max_srq_sges; - /* props->local_ca_ack_delay */ - props->atomic_cap = IB_ATOMIC_GLOB; - props->max_pkeys = ipath_get_npkeys(dev->dd); - props->max_mcast_grp = ib_ipath_max_mcast_grps; - props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; - props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * - props->max_mcast_grp; - - return 0; -} - -const u8 ipath_cvt_physportstate[32] = { - [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, - [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, - [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, - [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, - [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, - [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, - [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = - IB_PHYSPORTSTATE_CFG_TRAIN, - [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = - IB_PHYSPORTSTATE_CFG_TRAIN, - [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = - IB_PHYSPORTSTATE_CFG_TRAIN, - [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN, - [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = - IB_PHYSPORTSTATE_LINK_ERR_RECOVER, - [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = - IB_PHYSPORTSTATE_LINK_ERR_RECOVER, - [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = - IB_PHYSPORTSTATE_LINK_ERR_RECOVER, - [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN -}; - -u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) -{ - return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); -} - -static int ipath_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_devdata *dd = dev->dd; - enum ib_mtu mtu; - u16 lid = dd->ipath_lid; - u64 ibcstat; - - memset(props, 0, sizeof(*props)); - props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); - props->lmc = dd->ipath_lmc; - props->sm_lid = dev->sm_lid; - props->sm_sl = dev->sm_sl; - ibcstat = dd->ipath_lastibcstat; - /* map LinkState to IB portinfo values. */ - props->state = ipath_ib_linkstate(dd, ibcstat) + 1; - - /* See phys_state_show() */ - props->phys_state = /* MEA: assumes shift == 0 */ - ipath_cvt_physportstate[dd->ipath_lastibcstat & - dd->ibcs_lts_mask]; - props->port_cap_flags = dev->port_cap_flags; - props->gid_tbl_len = 1; - props->max_msg_sz = 0x80000000; - props->pkey_tbl_len = ipath_get_npkeys(dd); - props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) - - dev->z_pkey_violations; - props->qkey_viol_cntr = dev->qkey_violations; - props->active_width = dd->ipath_link_width_active; - /* See rate_show() */ - props->active_speed = dd->ipath_link_speed_active; - props->max_vl_num = 1; /* VLCap = VL0 */ - props->init_type_reply = 0; - - props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048; - switch (dd->ipath_ibmtu) { - case 4096: - mtu = IB_MTU_4096; - break; - case 2048: - mtu = IB_MTU_2048; - break; - case 1024: - mtu = IB_MTU_1024; - break; - case 512: - mtu = IB_MTU_512; - break; - case 256: - mtu = IB_MTU_256; - break; - default: - mtu = IB_MTU_2048; - } - props->active_mtu = mtu; - props->subnet_timeout = dev->subnet_timeout; - - return 0; -} - -static int ipath_modify_device(struct ib_device *device, - int device_modify_mask, - struct ib_device_modify *device_modify) -{ - int ret; - - if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | - IB_DEVICE_MODIFY_NODE_DESC)) { - ret = -EOPNOTSUPP; - goto bail; - } - - if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) - memcpy(device->node_desc, device_modify->node_desc, 64); - - if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) - to_idev(device)->sys_image_guid = - cpu_to_be64(device_modify->sys_image_guid); - - ret = 0; - -bail: - return ret; -} - -static int ipath_modify_port(struct ib_device *ibdev, - u8 port, int port_modify_mask, - struct ib_port_modify *props) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - - dev->port_cap_flags |= props->set_port_cap_mask; - dev->port_cap_flags &= ~props->clr_port_cap_mask; - if (port_modify_mask & IB_PORT_SHUTDOWN) - ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); - if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) - dev->qkey_violations = 0; - return 0; -} - -static int ipath_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - int ret; - - if (index >= 1) { - ret = -EINVAL; - goto bail; - } - gid->global.subnet_prefix = dev->gid_prefix; - gid->global.interface_id = dev->dd->ipath_guid; - - ret = 0; - -bail: - return ret; -} - -static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_pd *pd; - struct ib_pd *ret; - - /* - * This is actually totally arbitrary. Some correctness tests - * assume there's a maximum number of PDs that can be allocated. - * We don't actually have this limit, but we fail the test if - * we allow allocations of more than we report for this value. - */ - - pd = kmalloc(sizeof *pd, GFP_KERNEL); - if (!pd) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - spin_lock(&dev->n_pds_lock); - if (dev->n_pds_allocated == ib_ipath_max_pds) { - spin_unlock(&dev->n_pds_lock); - kfree(pd); - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - dev->n_pds_allocated++; - spin_unlock(&dev->n_pds_lock); - - /* ib_alloc_pd() will initialize pd->ibpd. */ - pd->user = udata != NULL; - - ret = &pd->ibpd; - -bail: - return ret; -} - -static int ipath_dealloc_pd(struct ib_pd *ibpd) -{ - struct ipath_pd *pd = to_ipd(ibpd); - struct ipath_ibdev *dev = to_idev(ibpd->device); - - spin_lock(&dev->n_pds_lock); - dev->n_pds_allocated--; - spin_unlock(&dev->n_pds_lock); - - kfree(pd); - - return 0; -} - -/** - * ipath_create_ah - create an address handle - * @pd: the protection domain - * @ah_attr: the attributes of the AH - * - * This may be called from interrupt context. - */ -static struct ib_ah *ipath_create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) -{ - struct ipath_ah *ah; - struct ib_ah *ret; - struct ipath_ibdev *dev = to_idev(pd->device); - unsigned long flags; - - /* A multicast address requires a GRH (see ch. 8.4.1). */ - if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && - ah_attr->dlid != IPATH_PERMISSIVE_LID && - !(ah_attr->ah_flags & IB_AH_GRH)) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - if (ah_attr->dlid == 0) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - if (ah_attr->port_num < 1 || - ah_attr->port_num > pd->device->phys_port_cnt) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - ah = kmalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - spin_lock_irqsave(&dev->n_ahs_lock, flags); - if (dev->n_ahs_allocated == ib_ipath_max_ahs) { - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - kfree(ah); - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - dev->n_ahs_allocated++; - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - - /* ib_create_ah() will initialize ah->ibah. */ - ah->attr = *ah_attr; - ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate); - - ret = &ah->ibah; - -bail: - return ret; -} - -/** - * ipath_destroy_ah - destroy an address handle - * @ibah: the AH to destroy - * - * This may be called from interrupt context. - */ -static int ipath_destroy_ah(struct ib_ah *ibah) -{ - struct ipath_ibdev *dev = to_idev(ibah->device); - struct ipath_ah *ah = to_iah(ibah); - unsigned long flags; - - spin_lock_irqsave(&dev->n_ahs_lock, flags); - dev->n_ahs_allocated--; - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - - kfree(ah); - - return 0; -} - -static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) -{ - struct ipath_ah *ah = to_iah(ibah); - - *ah_attr = ah->attr; - ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate); - - return 0; -} - -/** - * ipath_get_npkeys - return the size of the PKEY table for port 0 - * @dd: the infinipath device - */ -unsigned ipath_get_npkeys(struct ipath_devdata *dd) -{ - return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); -} - -/** - * ipath_get_pkey - return the indexed PKEY from the port PKEY table - * @dd: the infinipath device - * @index: the PKEY index - */ -unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) -{ - unsigned ret; - - /* always a kernel port, no locking needed */ - if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) - ret = 0; - else - ret = dd->ipath_pd[0]->port_pkeys[index]; - - return ret; -} - -static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - int ret; - - if (index >= ipath_get_npkeys(dev->dd)) { - ret = -EINVAL; - goto bail; - } - - *pkey = ipath_get_pkey(dev->dd, index); - ret = 0; - -bail: - return ret; -} - -/** - * ipath_alloc_ucontext - allocate a ucontest - * @ibdev: the infiniband device - * @udata: not used by the InfiniPath driver - */ - -static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) -{ - struct ipath_ucontext *context; - struct ib_ucontext *ret; - - context = kmalloc(sizeof *context, GFP_KERNEL); - if (!context) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - ret = &context->ibucontext; - -bail: - return ret; -} - -static int ipath_dealloc_ucontext(struct ib_ucontext *context) -{ - kfree(to_iucontext(context)); - return 0; -} - -static int ipath_verbs_register_sysfs(struct ib_device *dev); - -static void __verbs_timer(unsigned long arg) -{ - struct ipath_devdata *dd = (struct ipath_devdata *) arg; - - /* Handle verbs layer timeouts. */ - ipath_ib_timer(dd->verbs_dev); - - mod_timer(&dd->verbs_timer, jiffies + 1); -} - -static int enable_timer(struct ipath_devdata *dd) -{ - /* - * Early chips had a design flaw where the chip and kernel idea - * of the tail register don't always agree, and therefore we won't - * get an interrupt on the next packet received. - * If the board supports per packet receive interrupts, use it. - * Otherwise, the timer function periodically checks for packets - * to cover this case. - * Either way, the timer is needed for verbs layer related - * processing. - */ - if (dd->ipath_flags & IPATH_GPIO_INTR) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, - 0x2074076542310ULL); - /* Enable GPIO bit 2 interrupt */ - dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT); - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - dd->ipath_gpio_mask); - } - - init_timer(&dd->verbs_timer); - dd->verbs_timer.function = __verbs_timer; - dd->verbs_timer.data = (unsigned long)dd; - dd->verbs_timer.expires = jiffies + 1; - add_timer(&dd->verbs_timer); - - return 0; -} - -static int disable_timer(struct ipath_devdata *dd) -{ - /* Disable GPIO bit 2 interrupt */ - if (dd->ipath_flags & IPATH_GPIO_INTR) { - /* Disable GPIO bit 2 interrupt */ - dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - dd->ipath_gpio_mask); - /* - * We might want to undo changes to debugportselect, - * but how? - */ - } - - del_timer_sync(&dd->verbs_timer); - - return 0; -} - -static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - err = ipath_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; - immutable->max_mad_size = IB_MGMT_MAD_SIZE; - - return 0; -} - -/** - * ipath_register_ib_device - register our device with the infiniband core - * @dd: the device data structure - * Return the allocated ipath_ibdev pointer or NULL on error. - */ -int ipath_register_ib_device(struct ipath_devdata *dd) -{ - struct ipath_verbs_counters cntrs; - struct ipath_ibdev *idev; - struct ib_device *dev; - struct ipath_verbs_txreq *tx; - unsigned i; - int ret; - - idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); - if (idev == NULL) { - ret = -ENOMEM; - goto bail; - } - - dev = &idev->ibdev; - - if (dd->ipath_sdma_descq_cnt) { - tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx, - GFP_KERNEL); - if (tx == NULL) { - ret = -ENOMEM; - goto err_tx; - } - } else - tx = NULL; - idev->txreq_bufs = tx; - - /* Only need to initialize non-zero fields. */ - spin_lock_init(&idev->n_pds_lock); - spin_lock_init(&idev->n_ahs_lock); - spin_lock_init(&idev->n_cqs_lock); - spin_lock_init(&idev->n_qps_lock); - spin_lock_init(&idev->n_srqs_lock); - spin_lock_init(&idev->n_mcast_grps_lock); - - spin_lock_init(&idev->qp_table.lock); - spin_lock_init(&idev->lk_table.lock); - idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE); - /* Set the prefix to the default value (see ch. 4.1.1) */ - idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL); - - ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size); - if (ret) - goto err_qp; - - /* - * The top ib_ipath_lkey_table_size bits are used to index the - * table. The lower 8 bits can be owned by the user (copied from - * the LKEY). The remaining bits act as a generation number or tag. - */ - idev->lk_table.max = 1 << ib_ipath_lkey_table_size; - idev->lk_table.table = kzalloc(idev->lk_table.max * - sizeof(*idev->lk_table.table), - GFP_KERNEL); - if (idev->lk_table.table == NULL) { - ret = -ENOMEM; - goto err_lk; - } - INIT_LIST_HEAD(&idev->pending_mmaps); - spin_lock_init(&idev->pending_lock); - idev->mmap_offset = PAGE_SIZE; - spin_lock_init(&idev->mmap_offset_lock); - INIT_LIST_HEAD(&idev->pending[0]); - INIT_LIST_HEAD(&idev->pending[1]); - INIT_LIST_HEAD(&idev->pending[2]); - INIT_LIST_HEAD(&idev->piowait); - INIT_LIST_HEAD(&idev->rnrwait); - INIT_LIST_HEAD(&idev->txreq_free); - idev->pending_index = 0; - idev->port_cap_flags = - IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP; - if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY) - idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP; - idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; - idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; - idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; - idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; - idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; - - /* Snapshot current HW counters to "clear" them. */ - ipath_get_counters(dd, &cntrs); - idev->z_symbol_error_counter = cntrs.symbol_error_counter; - idev->z_link_error_recovery_counter = - cntrs.link_error_recovery_counter; - idev->z_link_downed_counter = cntrs.link_downed_counter; - idev->z_port_rcv_errors = cntrs.port_rcv_errors; - idev->z_port_rcv_remphys_errors = - cntrs.port_rcv_remphys_errors; - idev->z_port_xmit_discards = cntrs.port_xmit_discards; - idev->z_port_xmit_data = cntrs.port_xmit_data; - idev->z_port_rcv_data = cntrs.port_rcv_data; - idev->z_port_xmit_packets = cntrs.port_xmit_packets; - idev->z_port_rcv_packets = cntrs.port_rcv_packets; - idev->z_local_link_integrity_errors = - cntrs.local_link_integrity_errors; - idev->z_excessive_buffer_overrun_errors = - cntrs.excessive_buffer_overrun_errors; - idev->z_vl15_dropped = cntrs.vl15_dropped; - - for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++) - list_add(&tx->txreq.list, &idev->txreq_free); - - /* - * The system image GUID is supposed to be the same for all - * IB HCAs in a single system but since there can be other - * device types in the system, we can't be sure this is unique. - */ - if (!sys_image_guid) - sys_image_guid = dd->ipath_guid; - idev->sys_image_guid = sys_image_guid; - idev->ib_unit = dd->ipath_unit; - idev->dd = dd; - - strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); - dev->owner = THIS_MODULE; - dev->node_guid = dd->ipath_guid; - dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION; - dev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - dev->node_type = RDMA_NODE_IB_CA; - dev->phys_port_cnt = 1; - dev->num_comp_vectors = 1; - dev->dma_device = &dd->pcidev->dev; - dev->query_device = ipath_query_device; - dev->modify_device = ipath_modify_device; - dev->query_port = ipath_query_port; - dev->modify_port = ipath_modify_port; - dev->query_pkey = ipath_query_pkey; - dev->query_gid = ipath_query_gid; - dev->alloc_ucontext = ipath_alloc_ucontext; - dev->dealloc_ucontext = ipath_dealloc_ucontext; - dev->alloc_pd = ipath_alloc_pd; - dev->dealloc_pd = ipath_dealloc_pd; - dev->create_ah = ipath_create_ah; - dev->destroy_ah = ipath_destroy_ah; - dev->query_ah = ipath_query_ah; - dev->create_srq = ipath_create_srq; - dev->modify_srq = ipath_modify_srq; - dev->query_srq = ipath_query_srq; - dev->destroy_srq = ipath_destroy_srq; - dev->create_qp = ipath_create_qp; - dev->modify_qp = ipath_modify_qp; - dev->query_qp = ipath_query_qp; - dev->destroy_qp = ipath_destroy_qp; - dev->post_send = ipath_post_send; - dev->post_recv = ipath_post_receive; - dev->post_srq_recv = ipath_post_srq_receive; - dev->create_cq = ipath_create_cq; - dev->destroy_cq = ipath_destroy_cq; - dev->resize_cq = ipath_resize_cq; - dev->poll_cq = ipath_poll_cq; - dev->req_notify_cq = ipath_req_notify_cq; - dev->get_dma_mr = ipath_get_dma_mr; - dev->reg_phys_mr = ipath_reg_phys_mr; - dev->reg_user_mr = ipath_reg_user_mr; - dev->dereg_mr = ipath_dereg_mr; - dev->alloc_fmr = ipath_alloc_fmr; - dev->map_phys_fmr = ipath_map_phys_fmr; - dev->unmap_fmr = ipath_unmap_fmr; - dev->dealloc_fmr = ipath_dealloc_fmr; - dev->attach_mcast = ipath_multicast_attach; - dev->detach_mcast = ipath_multicast_detach; - dev->process_mad = ipath_process_mad; - dev->mmap = ipath_mmap; - dev->dma_ops = &ipath_dma_mapping_ops; - dev->get_port_immutable = ipath_port_immutable; - - snprintf(dev->node_desc, sizeof(dev->node_desc), - IPATH_IDSTR " %s", init_utsname()->nodename); - - ret = ib_register_device(dev, NULL); - if (ret) - goto err_reg; - - ret = ipath_verbs_register_sysfs(dev); - if (ret) - goto err_class; - - enable_timer(dd); - - goto bail; - -err_class: - ib_unregister_device(dev); -err_reg: - kfree(idev->lk_table.table); -err_lk: - kfree(idev->qp_table.table); -err_qp: - kfree(idev->txreq_bufs); -err_tx: - ib_dealloc_device(dev); - ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret); - idev = NULL; - -bail: - dd->verbs_dev = idev; - return ret; -} - -void ipath_unregister_ib_device(struct ipath_ibdev *dev) -{ - struct ib_device *ibdev = &dev->ibdev; - u32 qps_inuse; - - ib_unregister_device(ibdev); - - disable_timer(dev->dd); - - if (!list_empty(&dev->pending[0]) || - !list_empty(&dev->pending[1]) || - !list_empty(&dev->pending[2])) - ipath_dev_err(dev->dd, "pending list not empty!\n"); - if (!list_empty(&dev->piowait)) - ipath_dev_err(dev->dd, "piowait list not empty!\n"); - if (!list_empty(&dev->rnrwait)) - ipath_dev_err(dev->dd, "rnrwait list not empty!\n"); - if (!ipath_mcast_tree_empty()) - ipath_dev_err(dev->dd, "multicast table memory leak!\n"); - /* - * Note that ipath_unregister_ib_device() can be called before all - * the QPs are destroyed! - */ - qps_inuse = ipath_free_all_qps(&dev->qp_table); - if (qps_inuse) - ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n", - qps_inuse); - kfree(dev->qp_table.table); - kfree(dev->lk_table.table); - kfree(dev->txreq_bufs); - ib_dealloc_device(ibdev); -} - -static ssize_t show_rev(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct ipath_ibdev *dev = - container_of(device, struct ipath_ibdev, ibdev.dev); - - return sprintf(buf, "%x\n", dev->dd->ipath_pcirev); -} - -static ssize_t show_hca(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct ipath_ibdev *dev = - container_of(device, struct ipath_ibdev, ibdev.dev); - int ret; - - ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128); - if (ret < 0) - goto bail; - strcat(buf, "\n"); - ret = strlen(buf); - -bail: - return ret; -} - -static ssize_t show_stats(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct ipath_ibdev *dev = - container_of(device, struct ipath_ibdev, ibdev.dev); - int i; - int len; - - len = sprintf(buf, - "RC resends %d\n" - "RC no QACK %d\n" - "RC ACKs %d\n" - "RC SEQ NAKs %d\n" - "RC RDMA seq %d\n" - "RC RNR NAKs %d\n" - "RC OTH NAKs %d\n" - "RC timeouts %d\n" - "RC RDMA dup %d\n" - "piobuf wait %d\n" - "unaligned %d\n" - "PKT drops %d\n" - "WQE errs %d\n", - dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, - dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, - dev->n_other_naks, dev->n_timeouts, - dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned, - dev->n_pkt_drops, dev->n_wqe_errs); - for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { - const struct ipath_opcode_stats *si = &dev->opstats[i]; - - if (!si->n_packets && !si->n_bytes) - continue; - len += sprintf(buf + len, "%02x %llu/%llu\n", i, - (unsigned long long) si->n_packets, - (unsigned long long) si->n_bytes); - } - return len; -} - -static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); -static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); -static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL); - -static struct device_attribute *ipath_class_attributes[] = { - &dev_attr_hw_rev, - &dev_attr_hca_type, - &dev_attr_board_id, - &dev_attr_stats -}; - -static int ipath_verbs_register_sysfs(struct ib_device *dev) -{ - int i; - int ret; - - for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) { - ret = device_create_file(&dev->dev, - ipath_class_attributes[i]); - if (ret) - goto bail; - } - return 0; -bail: - for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) - device_remove_file(&dev->dev, ipath_class_attributes[i]); - return ret; -} diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h deleted file mode 100644 index ec167e545..000000000 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ /dev/null @@ -1,939 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IPATH_VERBS_H -#define IPATH_VERBS_H - -#include <linux/types.h> -#include <linux/spinlock.h> -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/kref.h> -#include <rdma/ib_pack.h> -#include <rdma/ib_user_verbs.h> - -#include "ipath_kernel.h" - -#define IPATH_MAX_RDMA_ATOMIC 4 - -#define QPN_MAX (1 << 24) -#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) - -/* - * Increment this value if any changes that break userspace ABI - * compatibility are made. - */ -#define IPATH_UVERBS_ABI_VERSION 2 - -/* - * Define an ib_cq_notify value that is not valid so we know when CQ - * notifications are armed. - */ -#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1) - -/* AETH NAK opcode values */ -#define IB_RNR_NAK 0x20 -#define IB_NAK_PSN_ERROR 0x60 -#define IB_NAK_INVALID_REQUEST 0x61 -#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 -#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 -#define IB_NAK_INVALID_RD_REQUEST 0x64 - -/* Flags for checking QP state (see ib_ipath_state_ops[]) */ -#define IPATH_POST_SEND_OK 0x01 -#define IPATH_POST_RECV_OK 0x02 -#define IPATH_PROCESS_RECV_OK 0x04 -#define IPATH_PROCESS_SEND_OK 0x08 -#define IPATH_PROCESS_NEXT_SEND_OK 0x10 -#define IPATH_FLUSH_SEND 0x20 -#define IPATH_FLUSH_RECV 0x40 -#define IPATH_PROCESS_OR_FLUSH_SEND \ - (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND) - -/* IB Performance Manager status values */ -#define IB_PMA_SAMPLE_STATUS_DONE 0x00 -#define IB_PMA_SAMPLE_STATUS_STARTED 0x01 -#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02 - -/* Mandatory IB performance counter select values. */ -#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001) -#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002) -#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003) -#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004) -#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005) - -struct ib_reth { - __be64 vaddr; - __be32 rkey; - __be32 length; -} __attribute__ ((packed)); - -struct ib_atomic_eth { - __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ - __be32 rkey; - __be64 swap_data; - __be64 compare_data; -} __attribute__ ((packed)); - -struct ipath_other_headers { - __be32 bth[3]; - union { - struct { - __be32 deth[2]; - __be32 imm_data; - } ud; - struct { - struct ib_reth reth; - __be32 imm_data; - } rc; - struct { - __be32 aeth; - __be32 atomic_ack_eth[2]; - } at; - __be32 imm_data; - __be32 aeth; - struct ib_atomic_eth atomic_eth; - } u; -} __attribute__ ((packed)); - -/* - * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes - * long (72 w/ imm_data). Only the first 56 bytes of the IB header - * will be in the eager header buffer. The remaining 12 or 16 bytes - * are in the data buffer. - */ -struct ipath_ib_header { - __be16 lrh[4]; - union { - struct { - struct ib_grh grh; - struct ipath_other_headers oth; - } l; - struct ipath_other_headers oth; - } u; -} __attribute__ ((packed)); - -struct ipath_pio_header { - __le32 pbc[2]; - struct ipath_ib_header hdr; -} __attribute__ ((packed)); - -/* - * There is one struct ipath_mcast for each multicast GID. - * All attached QPs are then stored as a list of - * struct ipath_mcast_qp. - */ -struct ipath_mcast_qp { - struct list_head list; - struct ipath_qp *qp; -}; - -struct ipath_mcast { - struct rb_node rb_node; - union ib_gid mgid; - struct list_head qp_list; - wait_queue_head_t wait; - atomic_t refcount; - int n_attached; -}; - -/* Protection domain */ -struct ipath_pd { - struct ib_pd ibpd; - int user; /* non-zero if created from user space */ -}; - -/* Address Handle */ -struct ipath_ah { - struct ib_ah ibah; - struct ib_ah_attr attr; -}; - -/* - * This structure is used by ipath_mmap() to validate an offset - * when an mmap() request is made. The vm_area_struct then uses - * this as its vm_private_data. - */ -struct ipath_mmap_info { - struct list_head pending_mmaps; - struct ib_ucontext *context; - void *obj; - __u64 offset; - struct kref ref; - unsigned size; -}; - -/* - * This structure is used to contain the head pointer, tail pointer, - * and completion queue entries as a single memory allocation so - * it can be mmap'ed into user space. - */ -struct ipath_cq_wc { - u32 head; /* index of next entry to fill */ - u32 tail; /* index of next ib_poll_cq() entry */ - union { - /* these are actually size ibcq.cqe + 1 */ - struct ib_uverbs_wc uqueue[0]; - struct ib_wc kqueue[0]; - }; -}; - -/* - * The completion queue structure. - */ -struct ipath_cq { - struct ib_cq ibcq; - struct tasklet_struct comptask; - spinlock_t lock; - u8 notify; - u8 triggered; - struct ipath_cq_wc *queue; - struct ipath_mmap_info *ip; -}; - -/* - * A segment is a linear region of low physical memory. - * XXX Maybe we should use phys addr here and kmap()/kunmap(). - * Used by the verbs layer. - */ -struct ipath_seg { - void *vaddr; - size_t length; -}; - -/* The number of ipath_segs that fit in a page. */ -#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg)) - -struct ipath_segarray { - struct ipath_seg segs[IPATH_SEGSZ]; -}; - -struct ipath_mregion { - struct ib_pd *pd; /* shares refcnt of ibmr.pd */ - u64 user_base; /* User's address for this region */ - u64 iova; /* IB start address of this region */ - size_t length; - u32 lkey; - u32 offset; /* offset (bytes) to start of region */ - int access_flags; - u32 max_segs; /* number of ipath_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - struct ipath_segarray *map[0]; /* the segments */ -}; - -/* - * These keep track of the copy progress within a memory region. - * Used by the verbs layer. - */ -struct ipath_sge { - struct ipath_mregion *mr; - void *vaddr; /* kernel virtual address of segment */ - u32 sge_length; /* length of the SGE */ - u32 length; /* remaining length of the segment */ - u16 m; /* current index: mr->map[m] */ - u16 n; /* current index: mr->map[m]->segs[n] */ -}; - -/* Memory region */ -struct ipath_mr { - struct ib_mr ibmr; - struct ib_umem *umem; - struct ipath_mregion mr; /* must be last */ -}; - -/* - * Send work request queue entry. - * The size of the sg_list is determined when the QP is created and stored - * in qp->s_max_sge. - */ -struct ipath_swqe { - struct ib_send_wr wr; /* don't use wr.sg_list */ - u32 psn; /* first packet sequence number */ - u32 lpsn; /* last packet sequence number */ - u32 ssn; /* send sequence number */ - u32 length; /* total length of data in sg_list */ - struct ipath_sge sg_list[0]; -}; - -/* - * Receive work request queue entry. - * The size of the sg_list is determined when the QP (or SRQ) is created - * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). - */ -struct ipath_rwqe { - u64 wr_id; - u8 num_sge; - struct ib_sge sg_list[0]; -}; - -/* - * This structure is used to contain the head pointer, tail pointer, - * and receive work queue entries as a single memory allocation so - * it can be mmap'ed into user space. - * Note that the wq array elements are variable size so you can't - * just index into the array to get the N'th element; - * use get_rwqe_ptr() instead. - */ -struct ipath_rwq { - u32 head; /* new work requests posted to the head */ - u32 tail; /* receives pull requests from here. */ - struct ipath_rwqe wq[0]; -}; - -struct ipath_rq { - struct ipath_rwq *wq; - spinlock_t lock; - u32 size; /* size of RWQE array */ - u8 max_sge; -}; - -struct ipath_srq { - struct ib_srq ibsrq; - struct ipath_rq rq; - struct ipath_mmap_info *ip; - /* send signal when number of RWQEs < limit */ - u32 limit; -}; - -struct ipath_sge_state { - struct ipath_sge *sg_list; /* next SGE to be used if any */ - struct ipath_sge sge; /* progress state for the current SGE */ - u8 num_sge; - u8 static_rate; -}; - -/* - * This structure holds the information that the send tasklet needs - * to send a RDMA read response or atomic operation. - */ -struct ipath_ack_entry { - u8 opcode; - u8 sent; - u32 psn; - union { - struct ipath_sge_state rdma_sge; - u64 atomic_data; - }; -}; - -/* - * Variables prefixed with s_ are for the requester (sender). - * Variables prefixed with r_ are for the responder (receiver). - * Variables prefixed with ack_ are for responder replies. - * - * Common variables are protected by both r_rq.lock and s_lock in that order - * which only happens in modify_qp() or changing the QP 'state'. - */ -struct ipath_qp { - struct ib_qp ibqp; - struct ipath_qp *next; /* link list for QPN hash table */ - struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */ - struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */ - struct list_head piowait; /* link for wait PIO buf */ - struct list_head timerwait; /* link for waiting for timeouts */ - struct ib_ah_attr remote_ah_attr; - struct ipath_ib_header s_hdr; /* next packet header to send */ - atomic_t refcount; - wait_queue_head_t wait; - wait_queue_head_t wait_dma; - struct tasklet_struct s_task; - struct ipath_mmap_info *ip; - struct ipath_sge_state *s_cur_sge; - struct ipath_verbs_txreq *s_tx; - struct ipath_sge_state s_sge; /* current send request data */ - struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1]; - struct ipath_sge_state s_ack_rdma_sge; - struct ipath_sge_state s_rdma_read_sge; - struct ipath_sge_state r_sge; /* current receive data */ - spinlock_t s_lock; - atomic_t s_dma_busy; - u16 s_pkt_delay; - u16 s_hdrwords; /* size of s_hdr in 32 bit words */ - u32 s_cur_size; /* size of send packet in bytes */ - u32 s_len; /* total length of s_sge */ - u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ - u32 s_last_psn; /* last response PSN processed */ - u32 s_psn; /* current packet sequence number */ - u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ - u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u64 r_wr_id; /* ID for current receive WQE */ - unsigned long r_aflags; - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ - u8 state; /* QP state */ - u8 s_state; /* opcode of last packet sent */ - u8 s_ack_state; /* opcode of packet to ACK */ - u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_state; /* opcode of last packet received */ - u8 r_nak_state; /* non-zero if NAK is pending */ - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 r_flags; - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - u8 qp_access_flags; - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; - u8 s_retry; /* requester retry counter */ - u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_pkey_index; /* PKEY index to use */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ - u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ - u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - u8 s_flags; - u8 s_dmult; - u8 s_draining; - u8 timeout; /* Timeout for this QP */ - enum ib_mtu path_mtu; - u32 remote_qpn; - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_last; /* last un-ACK'ed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - struct ipath_swqe *s_wq; /* send work queue */ - struct ipath_swqe *s_wqe; - struct ipath_sge *r_ud_sg_list; - struct ipath_rq r_rq; /* receive work queue */ - struct ipath_sge r_sg_list[0]; /* verified SGEs */ -}; - -/* - * Atomic bit definitions for r_aflags. - */ -#define IPATH_R_WRID_VALID 0 - -/* - * Bit definitions for r_flags. - */ -#define IPATH_R_REUSE_SGE 0x01 -#define IPATH_R_RDMAR_SEQ 0x02 - -/* - * Bit definitions for s_flags. - * - * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs - * before processing the next SWQE - * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs - * before processing the next SWQE - * IPATH_S_WAITING - waiting for RNR timeout or send buffer available. - * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE - * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating - * next send completion entry not via send DMA. - */ -#define IPATH_S_SIGNAL_REQ_WR 0x01 -#define IPATH_S_FENCE_PENDING 0x02 -#define IPATH_S_RDMAR_PENDING 0x04 -#define IPATH_S_ACK_PENDING 0x08 -#define IPATH_S_BUSY 0x10 -#define IPATH_S_WAITING 0x20 -#define IPATH_S_WAIT_SSN_CREDIT 0x40 -#define IPATH_S_WAIT_DMA 0x80 - -#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \ - IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA) - -#define IPATH_PSN_CREDIT 512 - -/* - * Since struct ipath_swqe is not a fixed size, we can't simply index into - * struct ipath_qp.s_wq. This function does the array index computation. - */ -static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp, - unsigned n) -{ - return (struct ipath_swqe *)((char *)qp->s_wq + - (sizeof(struct ipath_swqe) + - qp->s_max_sge * - sizeof(struct ipath_sge)) * n); -} - -/* - * Since struct ipath_rwqe is not a fixed size, we can't simply index into - * struct ipath_rwq.wq. This function does the array index computation. - */ -static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq, - unsigned n) -{ - return (struct ipath_rwqe *) - ((char *) rq->wq->wq + - (sizeof(struct ipath_rwqe) + - rq->max_sge * sizeof(struct ib_sge)) * n); -} - -/* - * QPN-map pages start out as NULL, they get allocated upon - * first use and are never deallocated. This way, - * large bitmaps are not allocated unless large numbers of QPs are used. - */ -struct qpn_map { - atomic_t n_free; - void *page; -}; - -struct ipath_qp_table { - spinlock_t lock; - u32 last; /* last QP number allocated */ - u32 max; /* size of the hash table */ - u32 nmaps; /* size of the map table */ - struct ipath_qp **table; - /* bit map of free numbers */ - struct qpn_map map[QPNMAP_ENTRIES]; -}; - -struct ipath_lkey_table { - spinlock_t lock; - u32 next; /* next unused index (speeds search) */ - u32 gen; /* generation count */ - u32 max; /* size of the table */ - struct ipath_mregion **table; -}; - -struct ipath_opcode_stats { - u64 n_packets; /* number of packets */ - u64 n_bytes; /* total number of bytes */ -}; - -struct ipath_ibdev { - struct ib_device ibdev; - struct ipath_devdata *dd; - struct list_head pending_mmaps; - spinlock_t mmap_offset_lock; - u32 mmap_offset; - int ib_unit; /* This is the device number */ - u16 sm_lid; /* in host order */ - u8 sm_sl; - u8 mkeyprot; - /* non-zero when timer is set */ - unsigned long mkey_lease_timeout; - - /* The following fields are really per port. */ - struct ipath_qp_table qp_table; - struct ipath_lkey_table lk_table; - struct list_head pending[3]; /* FIFO of QPs waiting for ACKs */ - struct list_head piowait; /* list for wait PIO buf */ - struct list_head txreq_free; - void *txreq_bufs; - /* list of QPs waiting for RNR timer */ - struct list_head rnrwait; - spinlock_t pending_lock; - __be64 sys_image_guid; /* in network order */ - __be64 gid_prefix; /* in network order */ - __be64 mkey; - - u32 n_pds_allocated; /* number of PDs allocated for device */ - spinlock_t n_pds_lock; - u32 n_ahs_allocated; /* number of AHs allocated for device */ - spinlock_t n_ahs_lock; - u32 n_cqs_allocated; /* number of CQs allocated for device */ - spinlock_t n_cqs_lock; - u32 n_qps_allocated; /* number of QPs allocated for device */ - spinlock_t n_qps_lock; - u32 n_srqs_allocated; /* number of SRQs allocated for device */ - spinlock_t n_srqs_lock; - u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ - spinlock_t n_mcast_grps_lock; - - u64 ipath_sword; /* total dwords sent (sample result) */ - u64 ipath_rword; /* total dwords received (sample result) */ - u64 ipath_spkts; /* total packets sent (sample result) */ - u64 ipath_rpkts; /* total packets received (sample result) */ - /* # of ticks no data sent (sample result) */ - u64 ipath_xmit_wait; - u64 rcv_errors; /* # of packets with SW detected rcv errs */ - u64 n_unicast_xmit; /* total unicast packets sent */ - u64 n_unicast_rcv; /* total unicast packets received */ - u64 n_multicast_xmit; /* total multicast packets sent */ - u64 n_multicast_rcv; /* total multicast packets received */ - u64 z_symbol_error_counter; /* starting count for PMA */ - u64 z_link_error_recovery_counter; /* starting count for PMA */ - u64 z_link_downed_counter; /* starting count for PMA */ - u64 z_port_rcv_errors; /* starting count for PMA */ - u64 z_port_rcv_remphys_errors; /* starting count for PMA */ - u64 z_port_xmit_discards; /* starting count for PMA */ - u64 z_port_xmit_data; /* starting count for PMA */ - u64 z_port_rcv_data; /* starting count for PMA */ - u64 z_port_xmit_packets; /* starting count for PMA */ - u64 z_port_rcv_packets; /* starting count for PMA */ - u32 z_pkey_violations; /* starting count for PMA */ - u32 z_local_link_integrity_errors; /* starting count for PMA */ - u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */ - u32 z_vl15_dropped; /* starting count for PMA */ - u32 n_rc_resends; - u32 n_rc_acks; - u32 n_rc_qacks; - u32 n_seq_naks; - u32 n_rdma_seq; - u32 n_rnr_naks; - u32 n_other_naks; - u32 n_timeouts; - u32 n_pkt_drops; - u32 n_vl15_dropped; - u32 n_wqe_errs; - u32 n_rdma_dup_busy; - u32 n_piowait; - u32 n_unaligned; - u32 port_cap_flags; - u32 pma_sample_start; - u32 pma_sample_interval; - __be16 pma_counter_select[5]; - u16 pma_tag; - u16 qkey_violations; - u16 mkey_violations; - u16 mkey_lease_period; - u16 pending_index; /* which pending queue is active */ - u8 pma_sample_status; - u8 subnet_timeout; - u8 vl_high_limit; - struct ipath_opcode_stats opstats[128]; -}; - -struct ipath_verbs_counters { - u64 symbol_error_counter; - u64 link_error_recovery_counter; - u64 link_downed_counter; - u64 port_rcv_errors; - u64 port_rcv_remphys_errors; - u64 port_xmit_discards; - u64 port_xmit_data; - u64 port_rcv_data; - u64 port_xmit_packets; - u64 port_rcv_packets; - u32 local_link_integrity_errors; - u32 excessive_buffer_overrun_errors; - u32 vl15_dropped; -}; - -struct ipath_verbs_txreq { - struct ipath_qp *qp; - struct ipath_swqe *wqe; - u32 map_len; - u32 len; - struct ipath_sge_state *ss; - struct ipath_pio_header hdr; - struct ipath_sdma_txreq txreq; -}; - -static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct ipath_mr, ibmr); -} - -static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct ipath_pd, ibpd); -} - -static inline struct ipath_ah *to_iah(struct ib_ah *ibah) -{ - return container_of(ibah, struct ipath_ah, ibah); -} - -static inline struct ipath_cq *to_icq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct ipath_cq, ibcq); -} - -static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq) -{ - return container_of(ibsrq, struct ipath_srq, ibsrq); -} - -static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct ipath_qp, ibqp); -} - -static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev) -{ - return container_of(ibdev, struct ipath_ibdev, ibdev); -} - -/* - * This must be called with s_lock held. - */ -static inline void ipath_schedule_send(struct ipath_qp *qp) -{ - if (qp->s_flags & IPATH_S_ANY_WAIT) - qp->s_flags &= ~IPATH_S_ANY_WAIT; - if (!(qp->s_flags & IPATH_S_BUSY)) - tasklet_hi_schedule(&qp->s_task); -} - -int ipath_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); - -/* - * Compare the lower 24 bits of the two values. - * Returns an integer <, ==, or > than zero. - */ -static inline int ipath_cmp24(u32 a, u32 b) -{ - return (((int) a) - ((int) b)) << 8; -} - -struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid); - -int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, - u64 *rwords, u64 *spkts, u64 *rpkts, - u64 *xmit_wait); - -int ipath_get_counters(struct ipath_devdata *dd, - struct ipath_verbs_counters *cntrs); - -int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); - -int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); - -int ipath_mcast_tree_empty(void); - -__be32 ipath_compute_aeth(struct ipath_qp *qp); - -struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn); - -struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); - -int ipath_destroy_qp(struct ib_qp *ibqp); - -int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err); - -int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata); - -int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_qp_init_attr *init_attr); - -unsigned ipath_free_all_qps(struct ipath_qp_table *qpt); - -int ipath_init_qp_table(struct ipath_ibdev *idev, int size); - -void ipath_get_credit(struct ipath_qp *qp, u32 aeth); - -unsigned ipath_ib_rate_to_mult(enum ib_rate rate); - -int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, - u32 hdrwords, struct ipath_sge_state *ss, u32 len); - -void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length); - -void ipath_skip_sge(struct ipath_sge_state *ss, u32 length); - -void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp); - -void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp); - -void ipath_restart_rc(struct ipath_qp *qp, u32 psn); - -void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err); - -int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr); - -void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp); - -int ipath_alloc_lkey(struct ipath_lkey_table *rkt, - struct ipath_mregion *mr); - -void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey); - -int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, - struct ib_sge *sge, int acc); - -int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, - u32 len, u64 vaddr, u32 rkey, int acc); - -int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); - -struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); - -int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, - struct ib_udata *udata); - -int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); - -int ipath_destroy_srq(struct ib_srq *ibsrq); - -void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); - -int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); - -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata); - -int ipath_destroy_cq(struct ib_cq *ibcq); - -int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); - -int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); - -struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc); - -struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 *iova_start); - -struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_udata *udata); - -int ipath_dereg_mr(struct ib_mr *ibmr); - -struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - -int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list, - int list_len, u64 iova); - -int ipath_unmap_fmr(struct list_head *fmr_list); - -int ipath_dealloc_fmr(struct ib_fmr *ibfmr); - -void ipath_release_mmap_info(struct kref *ref); - -struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev, - u32 size, - struct ib_ucontext *context, - void *obj); - -void ipath_update_mmap_info(struct ipath_ibdev *dev, - struct ipath_mmap_info *ip, - u32 size, void *obj); - -int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); - -void ipath_insert_rnr_queue(struct ipath_qp *qp); - -int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, - u32 *lengthp, struct ipath_sge_state *ss); - -int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only); - -u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, - struct ib_global_route *grh, u32 hwords, u32 nwords); - -void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp, - struct ipath_other_headers *ohdr, - u32 bth0, u32 bth2); - -void ipath_do_send(unsigned long data); - -void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, - enum ib_wc_status status); - -int ipath_make_rc_req(struct ipath_qp *qp); - -int ipath_make_uc_req(struct ipath_qp *qp); - -int ipath_make_ud_req(struct ipath_qp *qp); - -int ipath_register_ib_device(struct ipath_devdata *); - -void ipath_unregister_ib_device(struct ipath_ibdev *); - -void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32); - -int ipath_ib_piobufavail(struct ipath_ibdev *); - -unsigned ipath_get_npkeys(struct ipath_devdata *); - -u32 ipath_get_cr_errpkey(struct ipath_devdata *); - -unsigned ipath_get_pkey(struct ipath_devdata *, unsigned); - -extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; - -/* - * Below converts HCA-specific LinkTrainingState to IB PhysPortState - * values. - */ -extern const u8 ipath_cvt_physportstate[]; -#define IB_PHYSPORTSTATE_SLEEP 1 -#define IB_PHYSPORTSTATE_POLL 2 -#define IB_PHYSPORTSTATE_DISABLED 3 -#define IB_PHYSPORTSTATE_CFG_TRAIN 4 -#define IB_PHYSPORTSTATE_LINKUP 5 -#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6 - -extern const int ib_ipath_state_ops[]; - -extern unsigned int ib_ipath_lkey_table_size; - -extern unsigned int ib_ipath_max_cqes; - -extern unsigned int ib_ipath_max_cqs; - -extern unsigned int ib_ipath_max_qp_wrs; - -extern unsigned int ib_ipath_max_qps; - -extern unsigned int ib_ipath_max_sges; - -extern unsigned int ib_ipath_max_mcast_grps; - -extern unsigned int ib_ipath_max_mcast_qp_attached; - -extern unsigned int ib_ipath_max_srqs; - -extern unsigned int ib_ipath_max_srq_sges; - -extern unsigned int ib_ipath_max_srq_wrs; - -extern const u32 ib_ipath_rnr_table[]; - -extern struct ib_dma_mapping_ops ipath_dma_mapping_ops; - -#endif /* IPATH_VERBS_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c deleted file mode 100644 index 6216ea923..000000000 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/rculist.h> -#include <linux/sched.h> -#include <linux/slab.h> - -#include "ipath_verbs.h" - -/* - * Global table of GID to attached QPs. - * The table is global to all ipath devices since a send from one QP/device - * needs to be locally routed to any locally attached QPs on the same - * or different device. - */ -static struct rb_root mcast_tree; -static DEFINE_SPINLOCK(mcast_lock); - -/** - * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct - * @qp: the QP to link - */ -static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp) -{ - struct ipath_mcast_qp *mqp; - - mqp = kmalloc(sizeof *mqp, GFP_KERNEL); - if (!mqp) - goto bail; - - mqp->qp = qp; - atomic_inc(&qp->refcount); - -bail: - return mqp; -} - -static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp) -{ - struct ipath_qp *qp = mqp->qp; - - /* Notify ipath_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - - kfree(mqp); -} - -/** - * ipath_mcast_alloc - allocate the multicast GID structure - * @mgid: the multicast GID - * - * A list of QPs will be attached to this structure. - */ -static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid) -{ - struct ipath_mcast *mcast; - - mcast = kmalloc(sizeof *mcast, GFP_KERNEL); - if (!mcast) - goto bail; - - mcast->mgid = *mgid; - INIT_LIST_HEAD(&mcast->qp_list); - init_waitqueue_head(&mcast->wait); - atomic_set(&mcast->refcount, 0); - mcast->n_attached = 0; - -bail: - return mcast; -} - -static void ipath_mcast_free(struct ipath_mcast *mcast) -{ - struct ipath_mcast_qp *p, *tmp; - - list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) - ipath_mcast_qp_free(p); - - kfree(mcast); -} - -/** - * ipath_mcast_find - search the global table for the given multicast GID - * @mgid: the multicast GID to search for - * - * Returns NULL if not found. - * - * The caller is responsible for decrementing the reference count if found. - */ -struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid) -{ - struct rb_node *n; - unsigned long flags; - struct ipath_mcast *mcast; - - spin_lock_irqsave(&mcast_lock, flags); - n = mcast_tree.rb_node; - while (n) { - int ret; - - mcast = rb_entry(n, struct ipath_mcast, rb_node); - - ret = memcmp(mgid->raw, mcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) - n = n->rb_left; - else if (ret > 0) - n = n->rb_right; - else { - atomic_inc(&mcast->refcount); - spin_unlock_irqrestore(&mcast_lock, flags); - goto bail; - } - } - spin_unlock_irqrestore(&mcast_lock, flags); - - mcast = NULL; - -bail: - return mcast; -} - -/** - * ipath_mcast_add - insert mcast GID into table and attach QP struct - * @mcast: the mcast GID table - * @mqp: the QP to attach - * - * Return zero if both were added. Return EEXIST if the GID was already in - * the table but the QP was added. Return ESRCH if the QP was already - * attached and neither structure was added. - */ -static int ipath_mcast_add(struct ipath_ibdev *dev, - struct ipath_mcast *mcast, - struct ipath_mcast_qp *mqp) -{ - struct rb_node **n = &mcast_tree.rb_node; - struct rb_node *pn = NULL; - int ret; - - spin_lock_irq(&mcast_lock); - - while (*n) { - struct ipath_mcast *tmcast; - struct ipath_mcast_qp *p; - - pn = *n; - tmcast = rb_entry(pn, struct ipath_mcast, rb_node); - - ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) { - n = &pn->rb_left; - continue; - } - if (ret > 0) { - n = &pn->rb_right; - continue; - } - - /* Search the QP list to see if this is already there. */ - list_for_each_entry_rcu(p, &tmcast->qp_list, list) { - if (p->qp == mqp->qp) { - ret = ESRCH; - goto bail; - } - } - if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) { - ret = ENOMEM; - goto bail; - } - - tmcast->n_attached++; - - list_add_tail_rcu(&mqp->list, &tmcast->qp_list); - ret = EEXIST; - goto bail; - } - - spin_lock(&dev->n_mcast_grps_lock); - if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) { - spin_unlock(&dev->n_mcast_grps_lock); - ret = ENOMEM; - goto bail; - } - - dev->n_mcast_grps_allocated++; - spin_unlock(&dev->n_mcast_grps_lock); - - mcast->n_attached++; - - list_add_tail_rcu(&mqp->list, &mcast->qp_list); - - atomic_inc(&mcast->refcount); - rb_link_node(&mcast->rb_node, pn, n); - rb_insert_color(&mcast->rb_node, &mcast_tree); - - ret = 0; - -bail: - spin_unlock_irq(&mcast_lock); - - return ret; -} - -int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ipath_qp *qp = to_iqp(ibqp); - struct ipath_ibdev *dev = to_idev(ibqp->device); - struct ipath_mcast *mcast; - struct ipath_mcast_qp *mqp; - int ret; - - /* - * Allocate data structures since its better to do this outside of - * spin locks and it will most likely be needed. - */ - mcast = ipath_mcast_alloc(gid); - if (mcast == NULL) { - ret = -ENOMEM; - goto bail; - } - mqp = ipath_mcast_qp_alloc(qp); - if (mqp == NULL) { - ipath_mcast_free(mcast); - ret = -ENOMEM; - goto bail; - } - switch (ipath_mcast_add(dev, mcast, mqp)) { - case ESRCH: - /* Neither was used: can't attach the same QP twice. */ - ipath_mcast_qp_free(mqp); - ipath_mcast_free(mcast); - ret = -EINVAL; - goto bail; - case EEXIST: /* The mcast wasn't used */ - ipath_mcast_free(mcast); - break; - case ENOMEM: - /* Exceeded the maximum number of mcast groups. */ - ipath_mcast_qp_free(mqp); - ipath_mcast_free(mcast); - ret = -ENOMEM; - goto bail; - default: - break; - } - - ret = 0; - -bail: - return ret; -} - -int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ipath_qp *qp = to_iqp(ibqp); - struct ipath_ibdev *dev = to_idev(ibqp->device); - struct ipath_mcast *mcast = NULL; - struct ipath_mcast_qp *p, *tmp; - struct rb_node *n; - int last = 0; - int ret; - - spin_lock_irq(&mcast_lock); - - /* Find the GID in the mcast table. */ - n = mcast_tree.rb_node; - while (1) { - if (n == NULL) { - spin_unlock_irq(&mcast_lock); - ret = -EINVAL; - goto bail; - } - - mcast = rb_entry(n, struct ipath_mcast, rb_node); - ret = memcmp(gid->raw, mcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) - n = n->rb_left; - else if (ret > 0) - n = n->rb_right; - else - break; - } - - /* Search the QP list. */ - list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) { - if (p->qp != qp) - continue; - /* - * We found it, so remove it, but don't poison the forward - * link until we are sure there are no list walkers. - */ - list_del_rcu(&p->list); - mcast->n_attached--; - - /* If this was the last attached QP, remove the GID too. */ - if (list_empty(&mcast->qp_list)) { - rb_erase(&mcast->rb_node, &mcast_tree); - last = 1; - } - break; - } - - spin_unlock_irq(&mcast_lock); - - if (p) { - /* - * Wait for any list walkers to finish before freeing the - * list element. - */ - wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); - ipath_mcast_qp_free(p); - } - if (last) { - atomic_dec(&mcast->refcount); - wait_event(mcast->wait, !atomic_read(&mcast->refcount)); - ipath_mcast_free(mcast); - spin_lock_irq(&dev->n_mcast_grps_lock); - dev->n_mcast_grps_allocated--; - spin_unlock_irq(&dev->n_mcast_grps_lock); - } - - ret = 0; - -bail: - return ret; -} - -int ipath_mcast_tree_empty(void) -{ - return mcast_tree.rb_node == NULL; -} diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c deleted file mode 100644 index 1a7e20a75..000000000 --- a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * This file is conditionally built on PowerPC only. Otherwise weak symbol - * versions of the functions exported from here are used. - */ - -#include "ipath_kernel.h" - -/** - * ipath_enable_wc - enable write combining for MMIO writes to the device - * @dd: infinipath device - * - * Nothing to do on PowerPC, so just return without error. - */ -int ipath_enable_wc(struct ipath_devdata *dd) -{ - return 0; -} diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c deleted file mode 100644 index 7b6e4c843..000000000 --- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * This file is conditionally built on x86_64 only. Otherwise weak symbol - * versions of the functions exported from here are used. - */ - -#include <linux/pci.h> -#include <asm/processor.h> - -#include "ipath_kernel.h" - -/** - * ipath_enable_wc - enable write combining for MMIO writes to the device - * @dd: infinipath device - * - * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable - * write combining. - */ -int ipath_enable_wc(struct ipath_devdata *dd) -{ - int ret = 0; - u64 pioaddr, piolen; - unsigned bits; - const unsigned long addr = pci_resource_start(dd->pcidev, 0); - const size_t len = pci_resource_len(dd->pcidev, 0); - - /* - * Set the PIO buffers to be WCCOMB, so we get HT bursts to the - * chip. Linux (possibly the hardware) requires it to be on a power - * of 2 address matching the length (which has to be a power of 2). - * For rev1, that means the base address, for rev2, it will be just - * the PIO buffers themselves. - * For chips with two sets of buffers, the calculations are - * somewhat more complicated; we need to sum, and the piobufbase - * register has both offsets, 2K in low 32 bits, 4K in high 32 bits. - * The buffers are still packed, so a single range covers both. - */ - if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */ - unsigned long pio2kbase, pio4kbase; - pio2kbase = dd->ipath_piobufbase & 0xffffffffUL; - pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL; - if (pio2kbase < pio4kbase) { /* all, for now */ - pioaddr = addr + pio2kbase; - piolen = pio4kbase - pio2kbase + - dd->ipath_piobcnt4k * dd->ipath_4kalign; - } else { - pioaddr = addr + pio4kbase; - piolen = pio2kbase - pio4kbase + - dd->ipath_piobcnt2k * dd->ipath_palign; - } - } else { /* single buffer size (2K, currently) */ - pioaddr = addr + dd->ipath_piobufbase; - piolen = dd->ipath_piobcnt2k * dd->ipath_palign + - dd->ipath_piobcnt4k * dd->ipath_4kalign; - } - - for (bits = 0; !(piolen & (1ULL << bits)); bits++) - /* do nothing */ ; - - if (piolen != (1ULL << bits)) { - piolen >>= bits; - while (piolen >>= 1) - bits++; - piolen = 1ULL << (bits + 1); - } - if (pioaddr & (piolen - 1)) { - u64 atmp; - ipath_dbg("pioaddr %llx not on right boundary for size " - "%llx, fixing\n", - (unsigned long long) pioaddr, - (unsigned long long) piolen); - atmp = pioaddr & ~(piolen - 1); - if (atmp < addr || (atmp + piolen) > (addr + len)) { - ipath_dev_err(dd, "No way to align address/size " - "(%llx/%llx), no WC mtrr\n", - (unsigned long long) atmp, - (unsigned long long) piolen << 1); - ret = -ENODEV; - } else { - ipath_dbg("changing WC base from %llx to %llx, " - "len from %llx to %llx\n", - (unsigned long long) pioaddr, - (unsigned long long) atmp, - (unsigned long long) piolen, - (unsigned long long) piolen << 1); - pioaddr = atmp; - piolen <<= 1; - } - } - - if (!ret) { - dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen); - if (dd->wc_cookie < 0) { - ipath_dev_err(dd, "Seting mtrr failed on PIO buffers\n"); - ret = -ENODEV; - } else if (dd->wc_cookie == 0) - ipath_cdbg(VERBOSE, "Set mtrr for chip to WC not needed\n"); - else - ipath_cdbg(VERBOSE, "Set mtrr for chip to WC\n"); - } - - return ret; -} - -/** - * ipath_disable_wc - disable write combining for MMIO writes to the device - * @dd: infinipath device - */ -void ipath_disable_wc(struct ipath_devdata *dd) -{ - arch_phys_wc_del(dd->wc_cookie); -} |