summaryrefslogtreecommitdiff
path: root/drivers/misc/mic/scif
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/mic/scif')
-rw-r--r--drivers/misc/mic/scif/Makefile15
-rw-r--r--drivers/misc/mic/scif/scif_api.c1276
-rw-r--r--drivers/misc/mic/scif/scif_debugfs.c85
-rw-r--r--drivers/misc/mic/scif/scif_epd.c353
-rw-r--r--drivers/misc/mic/scif/scif_epd.h160
-rw-r--r--drivers/misc/mic/scif/scif_fd.c303
-rw-r--r--drivers/misc/mic/scif/scif_main.c388
-rw-r--r--drivers/misc/mic/scif/scif_main.h254
-rw-r--r--drivers/misc/mic/scif/scif_map.h113
-rw-r--r--drivers/misc/mic/scif/scif_nm.c237
-rw-r--r--drivers/misc/mic/scif/scif_nodeqp.c1307
-rw-r--r--drivers/misc/mic/scif/scif_nodeqp.h183
-rw-r--r--drivers/misc/mic/scif/scif_peer_bus.c124
-rw-r--r--drivers/misc/mic/scif/scif_peer_bus.h65
-rw-r--r--drivers/misc/mic/scif/scif_ports.c124
-rw-r--r--drivers/misc/mic/scif/scif_rb.c249
-rw-r--r--drivers/misc/mic/scif/scif_rb.h100
17 files changed, 5336 insertions, 0 deletions
diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile
new file mode 100644
index 000000000..bf10bb7e2
--- /dev/null
+++ b/drivers/misc/mic/scif/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile - SCIF driver.
+# Copyright(c) 2014, Intel Corporation.
+#
+obj-$(CONFIG_SCIF) += scif.o
+scif-objs := scif_main.o
+scif-objs += scif_peer_bus.o
+scif-objs += scif_ports.o
+scif-objs += scif_debugfs.o
+scif-objs += scif_fd.o
+scif-objs += scif_api.o
+scif-objs += scif_epd.o
+scif-objs += scif_rb.o
+scif-objs += scif_nodeqp.o
+scif-objs += scif_nm.o
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
new file mode 100644
index 000000000..f39d3135a
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -0,0 +1,1276 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/scif.h>
+#include "scif_main.h"
+#include "scif_map.h"
+
+static const char * const scif_ep_states[] = {
+ "Unbound",
+ "Bound",
+ "Listening",
+ "Connected",
+ "Connecting",
+ "Mapping",
+ "Closing",
+ "Close Listening",
+ "Disconnected",
+ "Zombie"};
+
+enum conn_async_state {
+ ASYNC_CONN_IDLE = 1, /* ep setup for async connect */
+ ASYNC_CONN_INPROGRESS, /* async connect in progress */
+ ASYNC_CONN_FLUSH_WORK /* async work flush in progress */
+};
+
+scif_epd_t scif_open(void)
+{
+ struct scif_endpt *ep;
+
+ might_sleep();
+ ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+ if (!ep)
+ goto err_ep_alloc;
+
+ ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
+ if (!ep->qp_info.qp)
+ goto err_qp_alloc;
+
+ spin_lock_init(&ep->lock);
+ mutex_init(&ep->sendlock);
+ mutex_init(&ep->recvlock);
+
+ ep->state = SCIFEP_UNBOUND;
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI open: ep %p success\n", ep);
+ return ep;
+
+err_qp_alloc:
+ kfree(ep);
+err_ep_alloc:
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(scif_open);
+
+/*
+ * scif_disconnect_ep - Disconnects the endpoint if found
+ * @epd: The end point returned from scif_open()
+ */
+static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
+{
+ struct scifmsg msg;
+ struct scif_endpt *fep = NULL;
+ struct scif_endpt *tmpep;
+ struct list_head *pos, *tmpq;
+ int err;
+
+ /*
+ * Wake up any threads blocked in send()/recv() before closing
+ * out the connection. Grabbing and releasing the send/recv lock
+ * will ensure that any blocked senders/receivers have exited for
+ * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
+ * close. Ring 3 endpoints are not affected since close will not
+ * be called while there are IOCTLs executing.
+ */
+ wake_up_interruptible(&ep->sendwq);
+ wake_up_interruptible(&ep->recvwq);
+ mutex_lock(&ep->sendlock);
+ mutex_unlock(&ep->sendlock);
+ mutex_lock(&ep->recvlock);
+ mutex_unlock(&ep->recvlock);
+
+ /* Remove from the connected list */
+ mutex_lock(&scif_info.connlock);
+ list_for_each_safe(pos, tmpq, &scif_info.connected) {
+ tmpep = list_entry(pos, struct scif_endpt, list);
+ if (tmpep == ep) {
+ list_del(pos);
+ fep = tmpep;
+ spin_lock(&ep->lock);
+ break;
+ }
+ }
+
+ if (!fep) {
+ /*
+ * The other side has completed the disconnect before
+ * the end point can be removed from the list. Therefore
+ * the ep lock is not locked, traverse the disconnected
+ * list to find the endpoint and release the conn lock.
+ */
+ list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+ tmpep = list_entry(pos, struct scif_endpt, list);
+ if (tmpep == ep) {
+ list_del(pos);
+ break;
+ }
+ }
+ mutex_unlock(&scif_info.connlock);
+ return NULL;
+ }
+
+ init_completion(&ep->discon);
+ msg.uop = SCIF_DISCNCT;
+ msg.src = ep->port;
+ msg.dst = ep->peer;
+ msg.payload[0] = (u64)ep;
+ msg.payload[1] = ep->remote_ep;
+
+ err = scif_nodeqp_send(ep->remote_dev, &msg);
+ spin_unlock(&ep->lock);
+ mutex_unlock(&scif_info.connlock);
+
+ if (!err)
+ /* Wait for the remote node to respond with SCIF_DISCNT_ACK */
+ wait_for_completion_timeout(&ep->discon,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ return ep;
+}
+
+int scif_close(scif_epd_t epd)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scif_endpt *tmpep;
+ struct list_head *pos, *tmpq;
+ enum scif_epd_state oldstate;
+ bool flush_conn;
+
+ dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
+ ep, scif_ep_states[ep->state]);
+ might_sleep();
+ spin_lock(&ep->lock);
+ flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
+ spin_unlock(&ep->lock);
+
+ if (flush_conn)
+ flush_work(&scif_info.conn_work);
+
+ spin_lock(&ep->lock);
+ oldstate = ep->state;
+
+ ep->state = SCIFEP_CLOSING;
+
+ switch (oldstate) {
+ case SCIFEP_ZOMBIE:
+ case SCIFEP_DISCONNECTED:
+ spin_unlock(&ep->lock);
+ /* Remove from the disconnected list */
+ mutex_lock(&scif_info.connlock);
+ list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+ tmpep = list_entry(pos, struct scif_endpt, list);
+ if (tmpep == ep) {
+ list_del(pos);
+ break;
+ }
+ }
+ mutex_unlock(&scif_info.connlock);
+ break;
+ case SCIFEP_UNBOUND:
+ case SCIFEP_BOUND:
+ case SCIFEP_CONNECTING:
+ spin_unlock(&ep->lock);
+ break;
+ case SCIFEP_MAPPING:
+ case SCIFEP_CONNECTED:
+ case SCIFEP_CLOSING:
+ {
+ spin_unlock(&ep->lock);
+ scif_disconnect_ep(ep);
+ break;
+ }
+ case SCIFEP_LISTENING:
+ case SCIFEP_CLLISTEN:
+ {
+ struct scif_conreq *conreq;
+ struct scifmsg msg;
+ struct scif_endpt *aep;
+
+ spin_unlock(&ep->lock);
+ spin_lock(&scif_info.eplock);
+
+ /* remove from listen list */
+ list_for_each_safe(pos, tmpq, &scif_info.listen) {
+ tmpep = list_entry(pos, struct scif_endpt, list);
+ if (tmpep == ep)
+ list_del(pos);
+ }
+ /* Remove any dangling accepts */
+ while (ep->acceptcnt) {
+ aep = list_first_entry(&ep->li_accept,
+ struct scif_endpt, liacceptlist);
+ list_del(&aep->liacceptlist);
+ scif_put_port(aep->port.port);
+ list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
+ tmpep = list_entry(pos, struct scif_endpt,
+ miacceptlist);
+ if (tmpep == aep) {
+ list_del(pos);
+ break;
+ }
+ }
+ spin_unlock(&scif_info.eplock);
+ mutex_lock(&scif_info.connlock);
+ list_for_each_safe(pos, tmpq, &scif_info.connected) {
+ tmpep = list_entry(pos,
+ struct scif_endpt, list);
+ if (tmpep == aep) {
+ list_del(pos);
+ break;
+ }
+ }
+ list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+ tmpep = list_entry(pos,
+ struct scif_endpt, list);
+ if (tmpep == aep) {
+ list_del(pos);
+ break;
+ }
+ }
+ mutex_unlock(&scif_info.connlock);
+ scif_teardown_ep(aep);
+ spin_lock(&scif_info.eplock);
+ scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
+ ep->acceptcnt--;
+ }
+
+ spin_lock(&ep->lock);
+ spin_unlock(&scif_info.eplock);
+
+ /* Remove and reject any pending connection requests. */
+ while (ep->conreqcnt) {
+ conreq = list_first_entry(&ep->conlist,
+ struct scif_conreq, list);
+ list_del(&conreq->list);
+
+ msg.uop = SCIF_CNCT_REJ;
+ msg.dst.node = conreq->msg.src.node;
+ msg.dst.port = conreq->msg.src.port;
+ msg.payload[0] = conreq->msg.payload[0];
+ msg.payload[1] = conreq->msg.payload[1];
+ /*
+ * No Error Handling on purpose for scif_nodeqp_send().
+ * If the remote node is lost we still want free the
+ * connection requests on the self node.
+ */
+ scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
+ &msg);
+ ep->conreqcnt--;
+ kfree(conreq);
+ }
+
+ spin_unlock(&ep->lock);
+ /* If a kSCIF accept is waiting wake it up */
+ wake_up_interruptible(&ep->conwq);
+ break;
+ }
+ }
+ scif_put_port(ep->port.port);
+ scif_teardown_ep(ep);
+ scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(scif_close);
+
+/**
+ * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
+ * accept new connections.
+ * @epd: The end point returned from scif_open()
+ */
+int __scif_flush(scif_epd_t epd)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+ switch (ep->state) {
+ case SCIFEP_LISTENING:
+ {
+ ep->state = SCIFEP_CLLISTEN;
+
+ /* If an accept is waiting wake it up */
+ wake_up_interruptible(&ep->conwq);
+ break;
+ }
+ default:
+ break;
+ }
+ return 0;
+}
+
+int scif_bind(scif_epd_t epd, u16 pn)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int ret = 0;
+ int tmp;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI bind: ep %p %s requested port number %d\n",
+ ep, scif_ep_states[ep->state], pn);
+ if (pn) {
+ /*
+ * Similar to IETF RFC 1700, SCIF ports below
+ * SCIF_ADMIN_PORT_END can only be bound by system (or root)
+ * processes or by processes executed by privileged users.
+ */
+ if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
+ ret = -EACCES;
+ goto scif_bind_admin_exit;
+ }
+ }
+
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_BOUND) {
+ ret = -EINVAL;
+ goto scif_bind_exit;
+ } else if (ep->state != SCIFEP_UNBOUND) {
+ ret = -EISCONN;
+ goto scif_bind_exit;
+ }
+
+ if (pn) {
+ tmp = scif_rsrv_port(pn);
+ if (tmp != pn) {
+ ret = -EINVAL;
+ goto scif_bind_exit;
+ }
+ } else {
+ pn = scif_get_new_port();
+ if (!pn) {
+ ret = -ENOSPC;
+ goto scif_bind_exit;
+ }
+ }
+
+ ep->state = SCIFEP_BOUND;
+ ep->port.node = scif_info.nodeid;
+ ep->port.port = pn;
+ ep->conn_async_state = ASYNC_CONN_IDLE;
+ ret = pn;
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI bind: bound to port number %d\n", pn);
+scif_bind_exit:
+ spin_unlock(&ep->lock);
+scif_bind_admin_exit:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(scif_bind);
+
+int scif_listen(scif_epd_t epd, int backlog)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
+ spin_lock(&ep->lock);
+ switch (ep->state) {
+ case SCIFEP_ZOMBIE:
+ case SCIFEP_CLOSING:
+ case SCIFEP_CLLISTEN:
+ case SCIFEP_UNBOUND:
+ case SCIFEP_DISCONNECTED:
+ spin_unlock(&ep->lock);
+ return -EINVAL;
+ case SCIFEP_LISTENING:
+ case SCIFEP_CONNECTED:
+ case SCIFEP_CONNECTING:
+ case SCIFEP_MAPPING:
+ spin_unlock(&ep->lock);
+ return -EISCONN;
+ case SCIFEP_BOUND:
+ break;
+ }
+
+ ep->state = SCIFEP_LISTENING;
+ ep->backlog = backlog;
+
+ ep->conreqcnt = 0;
+ ep->acceptcnt = 0;
+ INIT_LIST_HEAD(&ep->conlist);
+ init_waitqueue_head(&ep->conwq);
+ INIT_LIST_HEAD(&ep->li_accept);
+ spin_unlock(&ep->lock);
+
+ /*
+ * Listen status is complete so delete the qp information not needed
+ * on a listen before placing on the list of listening ep's
+ */
+ scif_teardown_ep(ep);
+ ep->qp_info.qp = NULL;
+
+ spin_lock(&scif_info.eplock);
+ list_add_tail(&ep->list, &scif_info.listen);
+ spin_unlock(&scif_info.eplock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(scif_listen);
+
+/*
+ ************************************************************************
+ * SCIF connection flow:
+ *
+ * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
+ * connections via a SCIF_CNCT_REQ message
+ * 2) A SCIF endpoint can initiate a SCIF connection by calling
+ * scif_connect(..) which calls scif_setup_qp_connect(..) which
+ * allocates the local qp for the endpoint ring buffer and then sends
+ * a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
+ * a SCIF_CNCT_REJ message
+ * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
+ * wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
+ * message otherwise
+ * 4) A thread blocked waiting for incoming connections allocates its local
+ * endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
+ * and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
+ * the node sends a SCIF_CNCT_REJ message
+ * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
+ * connecting endpoint is woken up as part of handling
+ * scif_cnctgnt_resp(..) following which it maps the remote endpoints'
+ * QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
+ * success or a SCIF_CNCT_GNTNACK message on failure and completes
+ * the scif_connect(..) API
+ * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
+ * in step 4 is woken up and completes the scif_accept(..) API
+ * 7) The SCIF connection is now established between the two SCIF endpoints.
+ */
+static int scif_conn_func(struct scif_endpt *ep)
+{
+ int err = 0;
+ struct scifmsg msg;
+ struct device *spdev;
+
+ /* Initiate the first part of the endpoint QP setup */
+ err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
+ SCIF_ENDPT_QP_SIZE, ep->remote_dev);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s err %d qp_offset 0x%llx\n",
+ __func__, err, ep->qp_info.qp_offset);
+ ep->state = SCIFEP_BOUND;
+ goto connect_error_simple;
+ }
+
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ goto cleanup_qp;
+ }
+ /* Format connect message and send it */
+ msg.src = ep->port;
+ msg.dst = ep->conn_port;
+ msg.uop = SCIF_CNCT_REQ;
+ msg.payload[0] = (u64)ep;
+ msg.payload[1] = ep->qp_info.qp_offset;
+ err = _scif_nodeqp_send(ep->remote_dev, &msg);
+ if (err)
+ goto connect_error_dec;
+ scif_put_peer_dev(spdev);
+ /*
+ * Wait for the remote node to respond with SCIF_CNCT_GNT or
+ * SCIF_CNCT_REJ message.
+ */
+ err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d timeout\n", __func__, __LINE__);
+ ep->state = SCIFEP_BOUND;
+ }
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ goto cleanup_qp;
+ }
+ if (ep->state == SCIFEP_MAPPING) {
+ err = scif_setup_qp_connect_response(ep->remote_dev,
+ ep->qp_info.qp,
+ ep->qp_info.gnt_pld);
+ /*
+ * If the resource to map the queue are not available then
+ * we need to tell the other side to terminate the accept
+ */
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ msg.uop = SCIF_CNCT_GNTNACK;
+ msg.payload[0] = ep->remote_ep;
+ _scif_nodeqp_send(ep->remote_dev, &msg);
+ ep->state = SCIFEP_BOUND;
+ goto connect_error_dec;
+ }
+
+ msg.uop = SCIF_CNCT_GNTACK;
+ msg.payload[0] = ep->remote_ep;
+ err = _scif_nodeqp_send(ep->remote_dev, &msg);
+ if (err) {
+ ep->state = SCIFEP_BOUND;
+ goto connect_error_dec;
+ }
+ ep->state = SCIFEP_CONNECTED;
+ mutex_lock(&scif_info.connlock);
+ list_add_tail(&ep->list, &scif_info.connected);
+ mutex_unlock(&scif_info.connlock);
+ dev_dbg(&ep->remote_dev->sdev->dev,
+ "SCIFAPI connect: ep %p connected\n", ep);
+ } else if (ep->state == SCIFEP_BOUND) {
+ dev_dbg(&ep->remote_dev->sdev->dev,
+ "SCIFAPI connect: ep %p connection refused\n", ep);
+ err = -ECONNREFUSED;
+ goto connect_error_dec;
+ }
+ scif_put_peer_dev(spdev);
+ return err;
+connect_error_dec:
+ scif_put_peer_dev(spdev);
+cleanup_qp:
+ scif_cleanup_ep_qp(ep);
+connect_error_simple:
+ return err;
+}
+
+/*
+ * scif_conn_handler:
+ *
+ * Workqueue handler for servicing non-blocking SCIF connect
+ *
+ */
+void scif_conn_handler(struct work_struct *work)
+{
+ struct scif_endpt *ep;
+
+ do {
+ ep = NULL;
+ spin_lock(&scif_info.nb_connect_lock);
+ if (!list_empty(&scif_info.nb_connect_list)) {
+ ep = list_first_entry(&scif_info.nb_connect_list,
+ struct scif_endpt, conn_list);
+ list_del(&ep->conn_list);
+ }
+ spin_unlock(&scif_info.nb_connect_lock);
+ if (ep)
+ ep->conn_err = scif_conn_func(ep);
+ } while (ep);
+}
+
+int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+ struct scif_dev *remote_dev;
+ struct device *spdev;
+
+ dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
+ scif_ep_states[ep->state]);
+
+ if (!scif_dev || dst->node > scif_info.maxid)
+ return -ENODEV;
+
+ might_sleep();
+
+ remote_dev = &scif_dev[dst->node];
+ spdev = scif_get_peer_dev(remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ return err;
+ }
+
+ spin_lock(&ep->lock);
+ switch (ep->state) {
+ case SCIFEP_ZOMBIE:
+ case SCIFEP_CLOSING:
+ err = -EINVAL;
+ break;
+ case SCIFEP_DISCONNECTED:
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+ ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+ else
+ err = -EINVAL;
+ break;
+ case SCIFEP_LISTENING:
+ case SCIFEP_CLLISTEN:
+ err = -EOPNOTSUPP;
+ break;
+ case SCIFEP_CONNECTING:
+ case SCIFEP_MAPPING:
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+ err = -EINPROGRESS;
+ else
+ err = -EISCONN;
+ break;
+ case SCIFEP_CONNECTED:
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+ ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+ else
+ err = -EISCONN;
+ break;
+ case SCIFEP_UNBOUND:
+ ep->port.port = scif_get_new_port();
+ if (!ep->port.port) {
+ err = -ENOSPC;
+ } else {
+ ep->port.node = scif_info.nodeid;
+ ep->conn_async_state = ASYNC_CONN_IDLE;
+ }
+ /* Fall through */
+ case SCIFEP_BOUND:
+ /*
+ * If a non-blocking connect has been already initiated
+ * (conn_async_state is either ASYNC_CONN_INPROGRESS or
+ * ASYNC_CONN_FLUSH_WORK), the end point could end up in
+ * SCIF_BOUND due an error in the connection process
+ * (e.g., connection refused) If conn_async_state is
+ * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
+ * so that the error status can be collected. If the state is
+ * already ASYNC_CONN_FLUSH_WORK - then set the error to
+ * EINPROGRESS since some other thread is waiting to collect
+ * error status.
+ */
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+ ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+ } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
+ err = -EINPROGRESS;
+ } else {
+ ep->conn_port = *dst;
+ init_waitqueue_head(&ep->sendwq);
+ init_waitqueue_head(&ep->recvwq);
+ init_waitqueue_head(&ep->conwq);
+ ep->conn_async_state = 0;
+
+ if (unlikely(non_block))
+ ep->conn_async_state = ASYNC_CONN_INPROGRESS;
+ }
+ break;
+ }
+
+ if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
+ goto connect_simple_unlock1;
+
+ ep->state = SCIFEP_CONNECTING;
+ ep->remote_dev = &scif_dev[dst->node];
+ ep->qp_info.qp->magic = SCIFEP_MAGIC;
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+ spin_lock(&scif_info.nb_connect_lock);
+ list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
+ spin_unlock(&scif_info.nb_connect_lock);
+ err = -EINPROGRESS;
+ schedule_work(&scif_info.conn_work);
+ }
+connect_simple_unlock1:
+ spin_unlock(&ep->lock);
+ scif_put_peer_dev(spdev);
+ if (err) {
+ return err;
+ } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
+ flush_work(&scif_info.conn_work);
+ err = ep->conn_err;
+ spin_lock(&ep->lock);
+ ep->conn_async_state = ASYNC_CONN_IDLE;
+ spin_unlock(&ep->lock);
+ } else {
+ err = scif_conn_func(ep);
+ }
+ return err;
+}
+
+int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
+{
+ return __scif_connect(epd, dst, false);
+}
+EXPORT_SYMBOL_GPL(scif_connect);
+
+/**
+ * scif_accept() - Accept a connection request from the remote node
+ *
+ * The function accepts a connection request from the remote node. Successful
+ * complete is indicate by a new end point being created and passed back
+ * to the caller for future reference.
+ *
+ * Upon successful complete a zero will be returned and the peer information
+ * will be filled in.
+ *
+ * If the end point is not in the listening state -EINVAL will be returned.
+ *
+ * If during the connection sequence resource allocation fails the -ENOMEM
+ * will be returned.
+ *
+ * If the function is called with the ASYNC flag set and no connection requests
+ * are pending it will return -EAGAIN.
+ *
+ * If the remote side is not sending any connection requests the caller may
+ * terminate this function with a signal. If so a -EINTR will be returned.
+ */
+int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
+ scif_epd_t *newepd, int flags)
+{
+ struct scif_endpt *lep = (struct scif_endpt *)epd;
+ struct scif_endpt *cep;
+ struct scif_conreq *conreq;
+ struct scifmsg msg;
+ int err;
+ struct device *spdev;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
+
+ if (flags & ~SCIF_ACCEPT_SYNC)
+ return -EINVAL;
+
+ if (!peer || !newepd)
+ return -EINVAL;
+
+ might_sleep();
+ spin_lock(&lep->lock);
+ if (lep->state != SCIFEP_LISTENING) {
+ spin_unlock(&lep->lock);
+ return -EINVAL;
+ }
+
+ if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
+ /* No connection request present and we do not want to wait */
+ spin_unlock(&lep->lock);
+ return -EAGAIN;
+ }
+
+ lep->files = current->files;
+retry_connection:
+ spin_unlock(&lep->lock);
+ /* Wait for the remote node to send us a SCIF_CNCT_REQ */
+ err = wait_event_interruptible(lep->conwq,
+ (lep->conreqcnt ||
+ (lep->state != SCIFEP_LISTENING)));
+ if (err)
+ return err;
+
+ if (lep->state != SCIFEP_LISTENING)
+ return -EINTR;
+
+ spin_lock(&lep->lock);
+
+ if (!lep->conreqcnt)
+ goto retry_connection;
+
+ /* Get the first connect request off the list */
+ conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
+ list_del(&conreq->list);
+ lep->conreqcnt--;
+ spin_unlock(&lep->lock);
+
+ /* Fill in the peer information */
+ peer->node = conreq->msg.src.node;
+ peer->port = conreq->msg.src.port;
+
+ cep = kzalloc(sizeof(*cep), GFP_KERNEL);
+ if (!cep) {
+ err = -ENOMEM;
+ goto scif_accept_error_epalloc;
+ }
+ spin_lock_init(&cep->lock);
+ mutex_init(&cep->sendlock);
+ mutex_init(&cep->recvlock);
+ cep->state = SCIFEP_CONNECTING;
+ cep->remote_dev = &scif_dev[peer->node];
+ cep->remote_ep = conreq->msg.payload[0];
+
+ cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
+ if (!cep->qp_info.qp) {
+ err = -ENOMEM;
+ goto scif_accept_error_qpalloc;
+ }
+
+ cep->qp_info.qp->magic = SCIFEP_MAGIC;
+ spdev = scif_get_peer_dev(cep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ goto scif_accept_error_map;
+ }
+ err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
+ conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
+ cep->remote_dev);
+ if (err) {
+ dev_dbg(&cep->remote_dev->sdev->dev,
+ "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
+ lep, cep, err, cep->qp_info.qp_offset);
+ scif_put_peer_dev(spdev);
+ goto scif_accept_error_map;
+ }
+
+ cep->port.node = lep->port.node;
+ cep->port.port = lep->port.port;
+ cep->peer.node = peer->node;
+ cep->peer.port = peer->port;
+ init_waitqueue_head(&cep->sendwq);
+ init_waitqueue_head(&cep->recvwq);
+ init_waitqueue_head(&cep->conwq);
+
+ msg.uop = SCIF_CNCT_GNT;
+ msg.src = cep->port;
+ msg.payload[0] = cep->remote_ep;
+ msg.payload[1] = cep->qp_info.qp_offset;
+ msg.payload[2] = (u64)cep;
+
+ err = _scif_nodeqp_send(cep->remote_dev, &msg);
+ scif_put_peer_dev(spdev);
+ if (err)
+ goto scif_accept_error_map;
+retry:
+ /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
+ err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
+ SCIF_NODE_ACCEPT_TIMEOUT);
+ if (!err && scifdev_alive(cep))
+ goto retry;
+ err = !err ? -ENODEV : 0;
+ if (err)
+ goto scif_accept_error_map;
+ kfree(conreq);
+
+ spin_lock(&cep->lock);
+
+ if (cep->state == SCIFEP_CLOSING) {
+ /*
+ * Remote failed to allocate resources and NAKed the grant.
+ * There is at this point nothing referencing the new end point.
+ */
+ spin_unlock(&cep->lock);
+ scif_teardown_ep(cep);
+ kfree(cep);
+
+ /* If call with sync flag then go back and wait. */
+ if (flags & SCIF_ACCEPT_SYNC) {
+ spin_lock(&lep->lock);
+ goto retry_connection;
+ }
+ return -EAGAIN;
+ }
+
+ scif_get_port(cep->port.port);
+ *newepd = (scif_epd_t)cep;
+ spin_unlock(&cep->lock);
+ return 0;
+scif_accept_error_map:
+ scif_teardown_ep(cep);
+scif_accept_error_qpalloc:
+ kfree(cep);
+scif_accept_error_epalloc:
+ msg.uop = SCIF_CNCT_REJ;
+ msg.dst.node = conreq->msg.src.node;
+ msg.dst.port = conreq->msg.src.port;
+ msg.payload[0] = conreq->msg.payload[0];
+ msg.payload[1] = conreq->msg.payload[1];
+ scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
+ kfree(conreq);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_accept);
+
+/*
+ * scif_msg_param_check:
+ * @epd: The end point returned from scif_open()
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
+ */
+static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
+{
+ int ret = -EINVAL;
+
+ if (len < 0)
+ goto err_ret;
+ if (flags && (!(flags & SCIF_RECV_BLOCK)))
+ goto err_ret;
+ ret = 0;
+err_ret:
+ return ret;
+}
+
+static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scifmsg notif_msg;
+ int curr_xfer_len = 0, sent_len = 0, write_count;
+ int ret = 0;
+ struct scif_qp *qp = ep->qp_info.qp;
+
+ if (flags & SCIF_SEND_BLOCK)
+ might_sleep();
+
+ spin_lock(&ep->lock);
+ while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
+ write_count = scif_rb_space(&qp->outbound_q);
+ if (write_count) {
+ /* Best effort to send as much data as possible */
+ curr_xfer_len = min(len - sent_len, write_count);
+ ret = scif_rb_write(&qp->outbound_q, msg,
+ curr_xfer_len);
+ if (ret < 0)
+ break;
+ /* Success. Update write pointer */
+ scif_rb_commit(&qp->outbound_q);
+ /*
+ * Send a notification to the peer about the
+ * produced data message.
+ */
+ notif_msg.src = ep->port;
+ notif_msg.uop = SCIF_CLIENT_SENT;
+ notif_msg.payload[0] = ep->remote_ep;
+ ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
+ if (ret)
+ break;
+ sent_len += curr_xfer_len;
+ msg = msg + curr_xfer_len;
+ continue;
+ }
+ curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
+ /* Not enough RB space. return for the Non Blocking case */
+ if (!(flags & SCIF_SEND_BLOCK))
+ break;
+
+ spin_unlock(&ep->lock);
+ /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
+ ret =
+ wait_event_interruptible(ep->sendwq,
+ (SCIFEP_CONNECTED != ep->state) ||
+ (scif_rb_space(&qp->outbound_q) >=
+ curr_xfer_len));
+ spin_lock(&ep->lock);
+ if (ret)
+ break;
+ }
+ if (sent_len)
+ ret = sent_len;
+ else if (!ret && SCIFEP_CONNECTED != ep->state)
+ ret = SCIFEP_DISCONNECTED == ep->state ?
+ -ECONNRESET : -ENOTCONN;
+ spin_unlock(&ep->lock);
+ return ret;
+}
+
+static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+ int read_size;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scifmsg notif_msg;
+ int curr_recv_len = 0, remaining_len = len, read_count;
+ int ret = 0;
+ struct scif_qp *qp = ep->qp_info.qp;
+
+ if (flags & SCIF_RECV_BLOCK)
+ might_sleep();
+ spin_lock(&ep->lock);
+ while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
+ SCIFEP_DISCONNECTED == ep->state)) {
+ read_count = scif_rb_count(&qp->inbound_q, remaining_len);
+ if (read_count) {
+ /*
+ * Best effort to recv as much data as there
+ * are bytes to read in the RB particularly
+ * important for the Non Blocking case.
+ */
+ curr_recv_len = min(remaining_len, read_count);
+ read_size = scif_rb_get_next(&qp->inbound_q,
+ msg, curr_recv_len);
+ if (ep->state == SCIFEP_CONNECTED) {
+ /*
+ * Update the read pointer only if the endpoint
+ * is still connected else the read pointer
+ * might no longer exist since the peer has
+ * freed resources!
+ */
+ scif_rb_update_read_ptr(&qp->inbound_q);
+ /*
+ * Send a notification to the peer about the
+ * consumed data message only if the EP is in
+ * SCIFEP_CONNECTED state.
+ */
+ notif_msg.src = ep->port;
+ notif_msg.uop = SCIF_CLIENT_RCVD;
+ notif_msg.payload[0] = ep->remote_ep;
+ ret = _scif_nodeqp_send(ep->remote_dev,
+ &notif_msg);
+ if (ret)
+ break;
+ }
+ remaining_len -= curr_recv_len;
+ msg = msg + curr_recv_len;
+ continue;
+ }
+ /*
+ * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
+ * we will keep looping forever.
+ */
+ if (ep->state == SCIFEP_DISCONNECTED)
+ break;
+ /*
+ * Return in the Non Blocking case if there is no data
+ * to read in this iteration.
+ */
+ if (!(flags & SCIF_RECV_BLOCK))
+ break;
+ curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
+ spin_unlock(&ep->lock);
+ /*
+ * Wait for a SCIF_CLIENT_SEND message in the blocking case
+ * or until other side disconnects.
+ */
+ ret =
+ wait_event_interruptible(ep->recvwq,
+ SCIFEP_CONNECTED != ep->state ||
+ scif_rb_count(&qp->inbound_q,
+ curr_recv_len)
+ >= curr_recv_len);
+ spin_lock(&ep->lock);
+ if (ret)
+ break;
+ }
+ if (len - remaining_len)
+ ret = len - remaining_len;
+ else if (!ret && ep->state != SCIFEP_CONNECTED)
+ ret = ep->state == SCIFEP_DISCONNECTED ?
+ -ECONNRESET : -ENOTCONN;
+ spin_unlock(&ep->lock);
+ return ret;
+}
+
+/**
+ * scif_user_send() - Send data to connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_send().
+ */
+int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+ int sent_len = 0;
+ char *tmp;
+ int loop_len;
+ int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+ if (!len)
+ return 0;
+
+ err = scif_msg_param_check(epd, len, flags);
+ if (err)
+ goto send_err;
+
+ tmp = kmalloc(chunk_len, GFP_KERNEL);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto send_err;
+ }
+ /*
+ * Grabbing the lock before breaking up the transfer in
+ * multiple chunks is required to ensure that messages do
+ * not get fragmented and reordered.
+ */
+ mutex_lock(&ep->sendlock);
+ while (sent_len != len) {
+ loop_len = len - sent_len;
+ loop_len = min(chunk_len, loop_len);
+ if (copy_from_user(tmp, msg, loop_len)) {
+ err = -EFAULT;
+ goto send_free_err;
+ }
+ err = _scif_send(epd, tmp, loop_len, flags);
+ if (err < 0)
+ goto send_free_err;
+ sent_len += err;
+ msg += err;
+ if (err != loop_len)
+ goto send_free_err;
+ }
+send_free_err:
+ mutex_unlock(&ep->sendlock);
+ kfree(tmp);
+send_err:
+ return err < 0 ? err : sent_len;
+}
+
+/**
+ * scif_user_recv() - Receive data from connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_recv().
+ */
+int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+ int recv_len = 0;
+ char *tmp;
+ int loop_len;
+ int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+ if (!len)
+ return 0;
+
+ err = scif_msg_param_check(epd, len, flags);
+ if (err)
+ goto recv_err;
+
+ tmp = kmalloc(chunk_len, GFP_KERNEL);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto recv_err;
+ }
+ /*
+ * Grabbing the lock before breaking up the transfer in
+ * multiple chunks is required to ensure that messages do
+ * not get fragmented and reordered.
+ */
+ mutex_lock(&ep->recvlock);
+ while (recv_len != len) {
+ loop_len = len - recv_len;
+ loop_len = min(chunk_len, loop_len);
+ err = _scif_recv(epd, tmp, loop_len, flags);
+ if (err < 0)
+ goto recv_free_err;
+ if (copy_to_user(msg, tmp, err)) {
+ err = -EFAULT;
+ goto recv_free_err;
+ }
+ recv_len += err;
+ msg += err;
+ if (err != loop_len)
+ goto recv_free_err;
+ }
+recv_free_err:
+ mutex_unlock(&ep->recvlock);
+ kfree(tmp);
+recv_err:
+ return err < 0 ? err : recv_len;
+}
+
+/**
+ * scif_send() - Send data to connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_send().
+ */
+int scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int ret;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+ if (!len)
+ return 0;
+
+ ret = scif_msg_param_check(epd, len, flags);
+ if (ret)
+ return ret;
+ if (!ep->remote_dev)
+ return -ENOTCONN;
+ /*
+ * Grab the mutex lock in the blocking case only
+ * to ensure messages do not get fragmented/reordered.
+ * The non blocking mode is protected using spin locks
+ * in _scif_send().
+ */
+ if (flags & SCIF_SEND_BLOCK)
+ mutex_lock(&ep->sendlock);
+
+ ret = _scif_send(epd, msg, len, flags);
+
+ if (flags & SCIF_SEND_BLOCK)
+ mutex_unlock(&ep->sendlock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(scif_send);
+
+/**
+ * scif_recv() - Receive data from connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_recv().
+ */
+int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int ret;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+ if (!len)
+ return 0;
+
+ ret = scif_msg_param_check(epd, len, flags);
+ if (ret)
+ return ret;
+ /*
+ * Grab the mutex lock in the blocking case only
+ * to ensure messages do not get fragmented/reordered.
+ * The non blocking mode is protected using spin locks
+ * in _scif_send().
+ */
+ if (flags & SCIF_RECV_BLOCK)
+ mutex_lock(&ep->recvlock);
+
+ ret = _scif_recv(epd, msg, len, flags);
+
+ if (flags & SCIF_RECV_BLOCK)
+ mutex_unlock(&ep->recvlock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(scif_recv);
+
+int scif_get_node_ids(u16 *nodes, int len, u16 *self)
+{
+ int online = 0;
+ int offset = 0;
+ int node;
+
+ if (!scif_is_mgmt_node())
+ scif_get_node_info();
+
+ *self = scif_info.nodeid;
+ mutex_lock(&scif_info.conflock);
+ len = min_t(int, len, scif_info.total);
+ for (node = 0; node <= scif_info.maxid; node++) {
+ if (_scifdev_alive(&scif_dev[node])) {
+ online++;
+ if (offset < len)
+ nodes[offset++] = node;
+ }
+ }
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
+ scif_info.total, online, offset);
+ mutex_unlock(&scif_info.conflock);
+
+ return online;
+}
+EXPORT_SYMBOL_GPL(scif_get_node_ids);
diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c
new file mode 100644
index 000000000..51f14e2a1
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_debugfs.c
@@ -0,0 +1,85 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "../common/mic_dev.h"
+#include "scif_main.h"
+
+/* Debugfs parent dir */
+static struct dentry *scif_dbg;
+
+static int scif_dev_test(struct seq_file *s, void *unused)
+{
+ int node;
+
+ seq_printf(s, "Total Nodes %d Self Node Id %d Maxid %d\n",
+ scif_info.total, scif_info.nodeid,
+ scif_info.maxid);
+
+ if (!scif_dev)
+ return 0;
+
+ seq_printf(s, "%-16s\t%-16s\n", "node_id", "state");
+
+ for (node = 0; node <= scif_info.maxid; node++)
+ seq_printf(s, "%-16d\t%-16s\n", scif_dev[node].node,
+ _scifdev_alive(&scif_dev[node]) ?
+ "Running" : "Offline");
+ return 0;
+}
+
+static int scif_dev_test_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, scif_dev_test, inode->i_private);
+}
+
+static int scif_dev_test_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations scif_dev_ops = {
+ .owner = THIS_MODULE,
+ .open = scif_dev_test_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = scif_dev_test_release
+};
+
+void __init scif_init_debugfs(void)
+{
+ struct dentry *d;
+
+ scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ if (!scif_dbg) {
+ dev_err(scif_info.mdev.this_device,
+ "can't create debugfs dir scif\n");
+ return;
+ }
+
+ d = debugfs_create_file("scif_dev", 0444, scif_dbg,
+ NULL, &scif_dev_ops);
+ debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log);
+ debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable);
+}
+
+void scif_exit_debugfs(void)
+{
+ debugfs_remove_recursive(scif_dbg);
+}
diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c
new file mode 100644
index 000000000..b4bfbb08a
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_epd.c
@@ -0,0 +1,353 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include "scif_map.h"
+
+void scif_cleanup_ep_qp(struct scif_endpt *ep)
+{
+ struct scif_qp *qp = ep->qp_info.qp;
+
+ if (qp->outbound_q.rb_base) {
+ scif_iounmap((void *)qp->outbound_q.rb_base,
+ qp->outbound_q.size, ep->remote_dev);
+ qp->outbound_q.rb_base = NULL;
+ }
+ if (qp->remote_qp) {
+ scif_iounmap((void *)qp->remote_qp,
+ sizeof(struct scif_qp), ep->remote_dev);
+ qp->remote_qp = NULL;
+ }
+ if (qp->local_qp) {
+ scif_unmap_single(qp->local_qp, ep->remote_dev,
+ sizeof(struct scif_qp));
+ qp->local_qp = 0x0;
+ }
+ if (qp->local_buf) {
+ scif_unmap_single(qp->local_buf, ep->remote_dev,
+ SCIF_ENDPT_QP_SIZE);
+ qp->local_buf = 0;
+ }
+}
+
+void scif_teardown_ep(void *endpt)
+{
+ struct scif_endpt *ep = endpt;
+ struct scif_qp *qp = ep->qp_info.qp;
+
+ if (qp) {
+ spin_lock(&ep->lock);
+ scif_cleanup_ep_qp(ep);
+ spin_unlock(&ep->lock);
+ kfree(qp->inbound_q.rb_base);
+ kfree(qp);
+ }
+}
+
+/*
+ * Enqueue the endpoint to the zombie list for cleanup.
+ * The endpoint should not be accessed once this API returns.
+ */
+void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held)
+{
+ if (!eplock_held)
+ spin_lock(&scif_info.eplock);
+ spin_lock(&ep->lock);
+ ep->state = SCIFEP_ZOMBIE;
+ spin_unlock(&ep->lock);
+ list_add_tail(&ep->list, &scif_info.zombie);
+ scif_info.nr_zombies++;
+ if (!eplock_held)
+ spin_unlock(&scif_info.eplock);
+ schedule_work(&scif_info.misc_work);
+}
+
+static struct scif_endpt *scif_find_listen_ep(u16 port)
+{
+ struct scif_endpt *ep = NULL;
+ struct list_head *pos, *tmpq;
+
+ spin_lock(&scif_info.eplock);
+ list_for_each_safe(pos, tmpq, &scif_info.listen) {
+ ep = list_entry(pos, struct scif_endpt, list);
+ if (ep->port.port == port) {
+ spin_lock(&ep->lock);
+ spin_unlock(&scif_info.eplock);
+ return ep;
+ }
+ }
+ spin_unlock(&scif_info.eplock);
+ return NULL;
+}
+
+void scif_cleanup_zombie_epd(void)
+{
+ struct list_head *pos, *tmpq;
+ struct scif_endpt *ep;
+
+ spin_lock(&scif_info.eplock);
+ list_for_each_safe(pos, tmpq, &scif_info.zombie) {
+ ep = list_entry(pos, struct scif_endpt, list);
+ list_del(pos);
+ scif_info.nr_zombies--;
+ kfree(ep);
+ }
+ spin_unlock(&scif_info.eplock);
+}
+
+/**
+ * scif_cnctreq() - Respond to SCIF_CNCT_REQ interrupt message
+ * @msg: Interrupt message
+ *
+ * This message is initiated by the remote node to request a connection
+ * to the local node. This function looks for an end point in the
+ * listen state on the requested port id.
+ *
+ * If it finds a listening port it places the connect request on the
+ * listening end points queue and wakes up any pending accept calls.
+ *
+ * If it does not find a listening end point it sends a connection
+ * reject message to the remote node.
+ */
+void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = NULL;
+ struct scif_conreq *conreq;
+
+ conreq = kmalloc(sizeof(*conreq), GFP_KERNEL);
+ if (!conreq)
+ /* Lack of resources so reject the request. */
+ goto conreq_sendrej;
+
+ ep = scif_find_listen_ep(msg->dst.port);
+ if (!ep)
+ /* Send reject due to no listening ports */
+ goto conreq_sendrej_free;
+
+ if (ep->backlog <= ep->conreqcnt) {
+ /* Send reject due to too many pending requests */
+ spin_unlock(&ep->lock);
+ goto conreq_sendrej_free;
+ }
+
+ conreq->msg = *msg;
+ list_add_tail(&conreq->list, &ep->conlist);
+ ep->conreqcnt++;
+ wake_up_interruptible(&ep->conwq);
+ spin_unlock(&ep->lock);
+ return;
+
+conreq_sendrej_free:
+ kfree(conreq);
+conreq_sendrej:
+ msg->uop = SCIF_CNCT_REJ;
+ scif_nodeqp_send(&scif_dev[msg->src.node], msg);
+}
+
+/**
+ * scif_cnctgnt() - Respond to SCIF_CNCT_GNT interrupt message
+ * @msg: Interrupt message
+ *
+ * An accept() on the remote node has occurred and sent this message
+ * to indicate success. Place the end point in the MAPPING state and
+ * save the remote nodes memory information. Then wake up the connect
+ * request so it can finish.
+ */
+void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ if (SCIFEP_CONNECTING == ep->state) {
+ ep->peer.node = msg->src.node;
+ ep->peer.port = msg->src.port;
+ ep->qp_info.gnt_pld = msg->payload[1];
+ ep->remote_ep = msg->payload[2];
+ ep->state = SCIFEP_MAPPING;
+
+ wake_up(&ep->conwq);
+ }
+ spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_cnctgnt_ack() - Respond to SCIF_CNCT_GNTACK interrupt message
+ * @msg: Interrupt message
+ *
+ * The remote connection request has finished mapping the local memory.
+ * Place the connection in the connected state and wake up the pending
+ * accept() call.
+ */
+void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ mutex_lock(&scif_info.connlock);
+ spin_lock(&ep->lock);
+ /* New ep is now connected with all resources set. */
+ ep->state = SCIFEP_CONNECTED;
+ list_add_tail(&ep->list, &scif_info.connected);
+ wake_up(&ep->conwq);
+ spin_unlock(&ep->lock);
+ mutex_unlock(&scif_info.connlock);
+}
+
+/**
+ * scif_cnctgnt_nack() - Respond to SCIF_CNCT_GNTNACK interrupt message
+ * @msg: Interrupt message
+ *
+ * The remote connection request failed to map the local memory it was sent.
+ * Place the end point in the CLOSING state to indicate it and wake up
+ * the pending accept();
+ */
+void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ ep->state = SCIFEP_CLOSING;
+ wake_up(&ep->conwq);
+ spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_cnctrej() - Respond to SCIF_CNCT_REJ interrupt message
+ * @msg: Interrupt message
+ *
+ * The remote end has rejected the connection request. Set the end
+ * point back to the bound state and wake up the pending connect().
+ */
+void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ if (SCIFEP_CONNECTING == ep->state) {
+ ep->state = SCIFEP_BOUND;
+ wake_up(&ep->conwq);
+ }
+ spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_discnct() - Respond to SCIF_DISCNCT interrupt message
+ * @msg: Interrupt message
+ *
+ * The remote node has indicated close() has been called on its end
+ * point. Remove the local end point from the connected list, set its
+ * state to disconnected and ensure accesses to the remote node are
+ * shutdown.
+ *
+ * When all accesses to the remote end have completed then send a
+ * DISCNT_ACK to indicate it can remove its resources and complete
+ * the close routine.
+ */
+void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = NULL;
+ struct scif_endpt *tmpep;
+ struct list_head *pos, *tmpq;
+
+ mutex_lock(&scif_info.connlock);
+ list_for_each_safe(pos, tmpq, &scif_info.connected) {
+ tmpep = list_entry(pos, struct scif_endpt, list);
+ /*
+ * The local ep may have sent a disconnect and and been closed
+ * due to a message response time out. It may have been
+ * allocated again and formed a new connection so we want to
+ * check if the remote ep matches
+ */
+ if (((u64)tmpep == msg->payload[1]) &&
+ ((u64)tmpep->remote_ep == msg->payload[0])) {
+ list_del(pos);
+ ep = tmpep;
+ spin_lock(&ep->lock);
+ break;
+ }
+ }
+
+ /*
+ * If the terminated end is not found then this side started closing
+ * before the other side sent the disconnect. If so the ep will no
+ * longer be on the connected list. Regardless the other side
+ * needs to be acked to let it know close is complete.
+ */
+ if (!ep) {
+ mutex_unlock(&scif_info.connlock);
+ goto discnct_ack;
+ }
+
+ ep->state = SCIFEP_DISCONNECTED;
+ list_add_tail(&ep->list, &scif_info.disconnected);
+
+ wake_up_interruptible(&ep->sendwq);
+ wake_up_interruptible(&ep->recvwq);
+ spin_unlock(&ep->lock);
+ mutex_unlock(&scif_info.connlock);
+
+discnct_ack:
+ msg->uop = SCIF_DISCNT_ACK;
+ scif_nodeqp_send(&scif_dev[msg->src.node], msg);
+}
+
+/**
+ * scif_discnct_ack() - Respond to SCIF_DISCNT_ACK interrupt message
+ * @msg: Interrupt message
+ *
+ * Remote side has indicated it has not more references to local resources
+ */
+void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ ep->state = SCIFEP_DISCONNECTED;
+ spin_unlock(&ep->lock);
+ complete(&ep->discon);
+}
+
+/**
+ * scif_clientsend() - Respond to SCIF_CLIENT_SEND interrupt message
+ * @msg: Interrupt message
+ *
+ * Remote side is confirming send or receive interrupt handling is complete.
+ */
+void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ if (SCIFEP_CONNECTED == ep->state)
+ wake_up_interruptible(&ep->recvwq);
+ spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_clientrcvd() - Respond to SCIF_CLIENT_RCVD interrupt message
+ * @msg: Interrupt message
+ *
+ * Remote side is confirming send or receive interrupt handling is complete.
+ */
+void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+ spin_lock(&ep->lock);
+ if (SCIFEP_CONNECTED == ep->state)
+ wake_up_interruptible(&ep->sendwq);
+ spin_unlock(&ep->lock);
+}
diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h
new file mode 100644
index 000000000..331322a25
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_epd.h
@@ -0,0 +1,160 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_EPD_H
+#define SCIF_EPD_H
+
+#include <linux/delay.h>
+#include <linux/scif.h>
+#include <linux/scif_ioctl.h>
+
+#define SCIF_EPLOCK_HELD true
+
+enum scif_epd_state {
+ SCIFEP_UNBOUND,
+ SCIFEP_BOUND,
+ SCIFEP_LISTENING,
+ SCIFEP_CONNECTED,
+ SCIFEP_CONNECTING,
+ SCIFEP_MAPPING,
+ SCIFEP_CLOSING,
+ SCIFEP_CLLISTEN,
+ SCIFEP_DISCONNECTED,
+ SCIFEP_ZOMBIE
+};
+
+/*
+ * struct scif_conreq - Data structure added to the connection list.
+ *
+ * @msg: connection request message received
+ * @list: link to list of connection requests
+ */
+struct scif_conreq {
+ struct scifmsg msg;
+ struct list_head list;
+};
+
+/* Size of the RB for the Endpoint QP */
+#define SCIF_ENDPT_QP_SIZE 0x1000
+
+/*
+ * scif_endpt_qp_info - SCIF endpoint queue pair
+ *
+ * @qp - Qpair for this endpoint
+ * @qp_offset - DMA address of the QP
+ * @gnt_pld - Payload in a SCIF_CNCT_GNT message containing the
+ * physical address of the remote_qp.
+ */
+struct scif_endpt_qp_info {
+ struct scif_qp *qp;
+ dma_addr_t qp_offset;
+ dma_addr_t gnt_pld;
+};
+
+/*
+ * struct scif_endpt - The SCIF endpoint data structure
+ *
+ * @state: end point state
+ * @lock: lock synchronizing access to endpoint fields like state etc
+ * @port: self port information
+ * @peer: peer port information
+ * @backlog: maximum pending connection requests
+ * @qp_info: Endpoint QP information for SCIF messaging
+ * @remote_dev: scifdev used by this endpt to communicate with remote node.
+ * @remote_ep: remote endpoint
+ * @conreqcnt: Keep track of number of connection requests.
+ * @files: Open file information used to match the id passed in with
+ * the flush routine.
+ * @conlist: list of connection requests
+ * @conwq: waitqueue for connection processing
+ * @discon: completion used during disconnection
+ * @sendwq: waitqueue used during sending messages
+ * @recvwq: waitqueue used during message receipt
+ * @sendlock: Synchronize ordering of messages sent
+ * @recvlock: Synchronize ordering of messages received
+ * @list: link to list of various endpoints like connected, listening etc
+ * @li_accept: pending ACCEPTREG
+ * @acceptcnt: pending ACCEPTREG cnt
+ * @liacceptlist: link to listen accept
+ * @miacceptlist: link to uaccept
+ * @listenep: associated listen ep
+ * @conn_work: Non blocking connect work
+ * @conn_port: Connection port
+ * @conn_err: Errors during connection
+ * @conn_async_state: Async connection
+ * @conn_list: List of async connection requests
+ */
+struct scif_endpt {
+ enum scif_epd_state state;
+ spinlock_t lock;
+ struct scif_port_id port;
+ struct scif_port_id peer;
+ int backlog;
+ struct scif_endpt_qp_info qp_info;
+ struct scif_dev *remote_dev;
+ u64 remote_ep;
+ int conreqcnt;
+ struct files_struct *files;
+ struct list_head conlist;
+ wait_queue_head_t conwq;
+ struct completion discon;
+ wait_queue_head_t sendwq;
+ wait_queue_head_t recvwq;
+ struct mutex sendlock;
+ struct mutex recvlock;
+ struct list_head list;
+ struct list_head li_accept;
+ int acceptcnt;
+ struct list_head liacceptlist;
+ struct list_head miacceptlist;
+ struct scif_endpt *listenep;
+ struct scif_port_id conn_port;
+ int conn_err;
+ int conn_async_state;
+ struct list_head conn_list;
+};
+
+static inline int scifdev_alive(struct scif_endpt *ep)
+{
+ return _scifdev_alive(ep->remote_dev);
+}
+
+void scif_cleanup_zombie_epd(void);
+void scif_teardown_ep(void *endpt);
+void scif_cleanup_ep_qp(struct scif_endpt *ep);
+void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held);
+void scif_get_node_info(void);
+void scif_send_acks(struct scif_dev *dev);
+void scif_conn_handler(struct work_struct *work);
+int scif_rsrv_port(u16 port);
+void scif_get_port(u16 port);
+int scif_get_new_port(void);
+void scif_put_port(u16 port);
+int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags);
+int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags);
+void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
+int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
+int __scif_flush(scif_epd_t epd);
+#endif /* SCIF_EPD_H */
diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c
new file mode 100644
index 000000000..eccf7e713
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_fd.c
@@ -0,0 +1,303 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+
+static int scif_fdopen(struct inode *inode, struct file *f)
+{
+ struct scif_endpt *priv = scif_open();
+
+ if (!priv)
+ return -ENOMEM;
+ f->private_data = priv;
+ return 0;
+}
+
+static int scif_fdclose(struct inode *inode, struct file *f)
+{
+ struct scif_endpt *priv = f->private_data;
+
+ return scif_close(priv);
+}
+
+static int scif_fdflush(struct file *f, fl_owner_t id)
+{
+ struct scif_endpt *ep = f->private_data;
+
+ spin_lock(&ep->lock);
+ /*
+ * The listening endpoint stashes the open file information before
+ * waiting for incoming connections. The release callback would never be
+ * called if the application closed the endpoint, while waiting for
+ * incoming connections from a separate thread since the file descriptor
+ * reference count is bumped up in the accept IOCTL. Call the flush
+ * routine if the id matches the endpoint open file information so that
+ * the listening endpoint can be woken up and the fd released.
+ */
+ if (ep->files == id)
+ __scif_flush(ep);
+ spin_unlock(&ep->lock);
+ return 0;
+}
+
+static __always_inline void scif_err_debug(int err, const char *str)
+{
+ /*
+ * ENOTCONN is a common uninteresting error which is
+ * flooding debug messages to the console unnecessarily.
+ */
+ if (err < 0 && err != -ENOTCONN)
+ dev_dbg(scif_info.mdev.this_device, "%s err %d\n", str, err);
+}
+
+static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+ struct scif_endpt *priv = f->private_data;
+ void __user *argp = (void __user *)arg;
+ int err = 0;
+ struct scifioctl_msg request;
+ bool non_block = false;
+
+ non_block = !!(f->f_flags & O_NONBLOCK);
+
+ switch (cmd) {
+ case SCIF_BIND:
+ {
+ int pn;
+
+ if (copy_from_user(&pn, argp, sizeof(pn)))
+ return -EFAULT;
+
+ pn = scif_bind(priv, pn);
+ if (pn < 0)
+ return pn;
+
+ if (copy_to_user(argp, &pn, sizeof(pn)))
+ return -EFAULT;
+
+ return 0;
+ }
+ case SCIF_LISTEN:
+ return scif_listen(priv, arg);
+ case SCIF_CONNECT:
+ {
+ struct scifioctl_connect req;
+ struct scif_endpt *ep = (struct scif_endpt *)priv;
+
+ if (copy_from_user(&req, argp, sizeof(req)))
+ return -EFAULT;
+
+ err = __scif_connect(priv, &req.peer, non_block);
+ if (err < 0)
+ return err;
+
+ req.self.node = ep->port.node;
+ req.self.port = ep->port.port;
+
+ if (copy_to_user(argp, &req, sizeof(req)))
+ return -EFAULT;
+
+ return 0;
+ }
+ /*
+ * Accept is done in two halves. The request ioctl does the basic
+ * functionality of accepting the request and returning the information
+ * about it including the internal ID of the end point. The register
+ * is done with the internal ID on a new file descriptor opened by the
+ * requesting process.
+ */
+ case SCIF_ACCEPTREQ:
+ {
+ struct scifioctl_accept request;
+ scif_epd_t *ep = (scif_epd_t *)&request.endpt;
+
+ if (copy_from_user(&request, argp, sizeof(request)))
+ return -EFAULT;
+
+ err = scif_accept(priv, &request.peer, ep, request.flags);
+ if (err < 0)
+ return err;
+
+ if (copy_to_user(argp, &request, sizeof(request))) {
+ scif_close(*ep);
+ return -EFAULT;
+ }
+ /*
+ * Add to the list of user mode eps where the second half
+ * of the accept is not yet completed.
+ */
+ spin_lock(&scif_info.eplock);
+ list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept);
+ list_add_tail(&((*ep)->liacceptlist), &priv->li_accept);
+ (*ep)->listenep = priv;
+ priv->acceptcnt++;
+ spin_unlock(&scif_info.eplock);
+
+ return 0;
+ }
+ case SCIF_ACCEPTREG:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scif_endpt *newep;
+ struct scif_endpt *lisep;
+ struct scif_endpt *fep = NULL;
+ struct scif_endpt *tmpep;
+ struct list_head *pos, *tmpq;
+
+ /* Finally replace the pointer to the accepted endpoint */
+ if (copy_from_user(&newep, argp, sizeof(void *)))
+ return -EFAULT;
+
+ /* Remove form the user accept queue */
+ spin_lock(&scif_info.eplock);
+ list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
+ tmpep = list_entry(pos,
+ struct scif_endpt, miacceptlist);
+ if (tmpep == newep) {
+ list_del(pos);
+ fep = tmpep;
+ break;
+ }
+ }
+
+ if (!fep) {
+ spin_unlock(&scif_info.eplock);
+ return -ENOENT;
+ }
+
+ lisep = newep->listenep;
+ list_for_each_safe(pos, tmpq, &lisep->li_accept) {
+ tmpep = list_entry(pos,
+ struct scif_endpt, liacceptlist);
+ if (tmpep == newep) {
+ list_del(pos);
+ lisep->acceptcnt--;
+ break;
+ }
+ }
+
+ spin_unlock(&scif_info.eplock);
+
+ /* Free the resources automatically created from the open. */
+ scif_teardown_ep(priv);
+ scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD);
+ f->private_data = newep;
+ return 0;
+ }
+ case SCIF_SEND:
+ {
+ struct scif_endpt *priv = f->private_data;
+
+ if (copy_from_user(&request, argp,
+ sizeof(struct scifioctl_msg))) {
+ err = -EFAULT;
+ goto send_err;
+ }
+ err = scif_user_send(priv, (void __user *)request.msg,
+ request.len, request.flags);
+ if (err < 0)
+ goto send_err;
+ if (copy_to_user(&
+ ((struct scifioctl_msg __user *)argp)->out_len,
+ &err, sizeof(err))) {
+ err = -EFAULT;
+ goto send_err;
+ }
+ err = 0;
+send_err:
+ scif_err_debug(err, "scif_send");
+ return err;
+ }
+ case SCIF_RECV:
+ {
+ struct scif_endpt *priv = f->private_data;
+
+ if (copy_from_user(&request, argp,
+ sizeof(struct scifioctl_msg))) {
+ err = -EFAULT;
+ goto recv_err;
+ }
+
+ err = scif_user_recv(priv, (void __user *)request.msg,
+ request.len, request.flags);
+ if (err < 0)
+ goto recv_err;
+
+ if (copy_to_user(&
+ ((struct scifioctl_msg __user *)argp)->out_len,
+ &err, sizeof(err))) {
+ err = -EFAULT;
+ goto recv_err;
+ }
+ err = 0;
+recv_err:
+ scif_err_debug(err, "scif_recv");
+ return err;
+ }
+ case SCIF_GET_NODEIDS:
+ {
+ struct scifioctl_node_ids node_ids;
+ int entries;
+ u16 *nodes;
+ void __user *unodes, *uself;
+ u16 self;
+
+ if (copy_from_user(&node_ids, argp, sizeof(node_ids))) {
+ err = -EFAULT;
+ goto getnodes_err2;
+ }
+
+ entries = min_t(int, scif_info.maxid, node_ids.len);
+ nodes = kmalloc_array(entries, sizeof(u16), GFP_KERNEL);
+ if (entries && !nodes) {
+ err = -ENOMEM;
+ goto getnodes_err2;
+ }
+ node_ids.len = scif_get_node_ids(nodes, entries, &self);
+
+ unodes = (void __user *)node_ids.nodes;
+ if (copy_to_user(unodes, nodes, sizeof(u16) * entries)) {
+ err = -EFAULT;
+ goto getnodes_err1;
+ }
+
+ uself = (void __user *)node_ids.self;
+ if (copy_to_user(uself, &self, sizeof(u16))) {
+ err = -EFAULT;
+ goto getnodes_err1;
+ }
+
+ if (copy_to_user(argp, &node_ids, sizeof(node_ids))) {
+ err = -EFAULT;
+ goto getnodes_err1;
+ }
+getnodes_err1:
+ kfree(nodes);
+getnodes_err2:
+ return err;
+ }
+ }
+ return -EINVAL;
+}
+
+const struct file_operations scif_fops = {
+ .open = scif_fdopen,
+ .release = scif_fdclose,
+ .unlocked_ioctl = scif_fdioctl,
+ .flush = scif_fdflush,
+ .owner = THIS_MODULE,
+};
diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c
new file mode 100644
index 000000000..6ce851f5c
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_main.c
@@ -0,0 +1,388 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/module.h>
+#include <linux/idr.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+#include "scif_main.h"
+#include "scif_map.h"
+
+struct scif_info scif_info = {
+ .mdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "scif",
+ .fops = &scif_fops,
+ }
+};
+
+struct scif_dev *scif_dev;
+static atomic_t g_loopb_cnt;
+
+/* Runs in the context of intr_wq */
+static void scif_intr_bh_handler(struct work_struct *work)
+{
+ struct scif_dev *scifdev =
+ container_of(work, struct scif_dev, intr_bh);
+
+ if (scifdev_self(scifdev))
+ scif_loopb_msg_handler(scifdev, scifdev->qpairs);
+ else
+ scif_nodeqp_intrhandler(scifdev, scifdev->qpairs);
+}
+
+int scif_setup_intr_wq(struct scif_dev *scifdev)
+{
+ if (!scifdev->intr_wq) {
+ snprintf(scifdev->intr_wqname, sizeof(scifdev->intr_wqname),
+ "SCIF INTR %d", scifdev->node);
+ scifdev->intr_wq =
+ alloc_ordered_workqueue(scifdev->intr_wqname, 0);
+ if (!scifdev->intr_wq)
+ return -ENOMEM;
+ INIT_WORK(&scifdev->intr_bh, scif_intr_bh_handler);
+ }
+ return 0;
+}
+
+void scif_destroy_intr_wq(struct scif_dev *scifdev)
+{
+ if (scifdev->intr_wq) {
+ destroy_workqueue(scifdev->intr_wq);
+ scifdev->intr_wq = NULL;
+ }
+}
+
+irqreturn_t scif_intr_handler(int irq, void *data)
+{
+ struct scif_dev *scifdev = data;
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ sdev->hw_ops->ack_interrupt(sdev, scifdev->db);
+ queue_work(scifdev->intr_wq, &scifdev->intr_bh);
+ return IRQ_HANDLED;
+}
+
+static int scif_peer_probe(struct scif_peer_dev *spdev)
+{
+ struct scif_dev *scifdev = &scif_dev[spdev->dnode];
+
+ mutex_lock(&scif_info.conflock);
+ scif_info.total++;
+ scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
+ mutex_unlock(&scif_info.conflock);
+ rcu_assign_pointer(scifdev->spdev, spdev);
+
+ /* In the future SCIF kernel client devices will be added here */
+ return 0;
+}
+
+static void scif_peer_remove(struct scif_peer_dev *spdev)
+{
+ struct scif_dev *scifdev = &scif_dev[spdev->dnode];
+
+ /* In the future SCIF kernel client devices will be removed here */
+ spdev = rcu_dereference(scifdev->spdev);
+ if (spdev)
+ RCU_INIT_POINTER(scifdev->spdev, NULL);
+ synchronize_rcu();
+
+ mutex_lock(&scif_info.conflock);
+ scif_info.total--;
+ mutex_unlock(&scif_info.conflock);
+}
+
+static void scif_qp_setup_handler(struct work_struct *work)
+{
+ struct scif_dev *scifdev = container_of(work, struct scif_dev,
+ qp_dwork.work);
+ struct scif_hw_dev *sdev = scifdev->sdev;
+ dma_addr_t da = 0;
+ int err;
+
+ if (scif_is_mgmt_node()) {
+ struct mic_bootparam *bp = sdev->dp;
+
+ da = bp->scif_card_dma_addr;
+ scifdev->rdb = bp->h2c_scif_db;
+ } else {
+ struct mic_bootparam __iomem *bp = sdev->rdp;
+
+ da = readq(&bp->scif_host_dma_addr);
+ scifdev->rdb = ioread8(&bp->c2h_scif_db);
+ }
+ if (da) {
+ err = scif_qp_response(da, scifdev);
+ if (err)
+ dev_err(&scifdev->sdev->dev,
+ "scif_qp_response err %d\n", err);
+ } else {
+ schedule_delayed_work(&scifdev->qp_dwork,
+ msecs_to_jiffies(1000));
+ }
+}
+
+static int scif_setup_scifdev(struct scif_hw_dev *sdev)
+{
+ int i;
+ u8 num_nodes;
+
+ if (sdev->snode) {
+ struct mic_bootparam __iomem *bp = sdev->rdp;
+
+ num_nodes = ioread8(&bp->tot_nodes);
+ } else {
+ struct mic_bootparam *bp = sdev->dp;
+
+ num_nodes = bp->tot_nodes;
+ }
+ scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL);
+ if (!scif_dev)
+ return -ENOMEM;
+ for (i = 0; i < num_nodes; i++) {
+ struct scif_dev *scifdev = &scif_dev[i];
+
+ scifdev->node = i;
+ scifdev->exit = OP_IDLE;
+ init_waitqueue_head(&scifdev->disconn_wq);
+ mutex_init(&scifdev->lock);
+ INIT_WORK(&scifdev->init_msg_work, scif_qp_response_ack);
+ INIT_DELAYED_WORK(&scifdev->p2p_dwork,
+ scif_poll_qp_state);
+ INIT_DELAYED_WORK(&scifdev->qp_dwork,
+ scif_qp_setup_handler);
+ INIT_LIST_HEAD(&scifdev->p2p);
+ RCU_INIT_POINTER(scifdev->spdev, NULL);
+ }
+ return 0;
+}
+
+static void scif_destroy_scifdev(void)
+{
+ kfree(scif_dev);
+}
+
+static int scif_probe(struct scif_hw_dev *sdev)
+{
+ struct scif_dev *scifdev;
+ int rc;
+
+ dev_set_drvdata(&sdev->dev, sdev);
+ if (1 == atomic_add_return(1, &g_loopb_cnt)) {
+ struct scif_dev *loopb_dev;
+
+ rc = scif_setup_scifdev(sdev);
+ if (rc)
+ goto exit;
+ scifdev = &scif_dev[sdev->dnode];
+ scifdev->sdev = sdev;
+ loopb_dev = &scif_dev[sdev->snode];
+ loopb_dev->sdev = sdev;
+ rc = scif_setup_loopback_qp(loopb_dev);
+ if (rc)
+ goto free_sdev;
+ } else {
+ scifdev = &scif_dev[sdev->dnode];
+ scifdev->sdev = sdev;
+ }
+ rc = scif_setup_intr_wq(scifdev);
+ if (rc)
+ goto destroy_loopb;
+ rc = scif_setup_qp(scifdev);
+ if (rc)
+ goto destroy_intr;
+ scifdev->db = sdev->hw_ops->next_db(sdev);
+ scifdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
+ "SCIF_INTR", scifdev,
+ scifdev->db);
+ if (IS_ERR(scifdev->cookie)) {
+ rc = PTR_ERR(scifdev->cookie);
+ goto free_qp;
+ }
+ if (scif_is_mgmt_node()) {
+ struct mic_bootparam *bp = sdev->dp;
+
+ bp->c2h_scif_db = scifdev->db;
+ bp->scif_host_dma_addr = scifdev->qp_dma_addr;
+ } else {
+ struct mic_bootparam __iomem *bp = sdev->rdp;
+
+ iowrite8(scifdev->db, &bp->h2c_scif_db);
+ writeq(scifdev->qp_dma_addr, &bp->scif_card_dma_addr);
+ }
+ schedule_delayed_work(&scifdev->qp_dwork,
+ msecs_to_jiffies(1000));
+ return rc;
+free_qp:
+ scif_free_qp(scifdev);
+destroy_intr:
+ scif_destroy_intr_wq(scifdev);
+destroy_loopb:
+ if (atomic_dec_and_test(&g_loopb_cnt))
+ scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
+free_sdev:
+ scif_destroy_scifdev();
+exit:
+ return rc;
+}
+
+void scif_stop(struct scif_dev *scifdev)
+{
+ struct scif_dev *dev;
+ int i;
+
+ for (i = scif_info.maxid; i >= 0; i--) {
+ dev = &scif_dev[i];
+ if (scifdev_self(dev))
+ continue;
+ scif_handle_remove_node(i);
+ }
+}
+
+static void scif_remove(struct scif_hw_dev *sdev)
+{
+ struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+ if (scif_is_mgmt_node()) {
+ struct mic_bootparam *bp = sdev->dp;
+
+ bp->c2h_scif_db = -1;
+ bp->scif_host_dma_addr = 0x0;
+ } else {
+ struct mic_bootparam __iomem *bp = sdev->rdp;
+
+ iowrite8(-1, &bp->h2c_scif_db);
+ writeq(0x0, &bp->scif_card_dma_addr);
+ }
+ if (scif_is_mgmt_node()) {
+ scif_disconnect_node(scifdev->node, true);
+ } else {
+ scif_info.card_initiated_exit = true;
+ scif_stop(scifdev);
+ }
+ if (atomic_dec_and_test(&g_loopb_cnt))
+ scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
+ if (scifdev->cookie) {
+ sdev->hw_ops->free_irq(sdev, scifdev->cookie, scifdev);
+ scifdev->cookie = NULL;
+ }
+ scif_destroy_intr_wq(scifdev);
+ cancel_delayed_work(&scifdev->qp_dwork);
+ scif_free_qp(scifdev);
+ scifdev->rdb = -1;
+ scifdev->sdev = NULL;
+}
+
+static struct scif_peer_driver scif_peer_driver = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .probe = scif_peer_probe,
+ .remove = scif_peer_remove,
+};
+
+static struct scif_hw_dev_id id_table[] = {
+ { MIC_SCIF_DEV, SCIF_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct scif_driver scif_driver = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = scif_probe,
+ .remove = scif_remove,
+};
+
+static int _scif_init(void)
+{
+ spin_lock_init(&scif_info.eplock);
+ spin_lock_init(&scif_info.nb_connect_lock);
+ spin_lock_init(&scif_info.port_lock);
+ mutex_init(&scif_info.conflock);
+ mutex_init(&scif_info.connlock);
+ INIT_LIST_HEAD(&scif_info.uaccept);
+ INIT_LIST_HEAD(&scif_info.listen);
+ INIT_LIST_HEAD(&scif_info.zombie);
+ INIT_LIST_HEAD(&scif_info.connected);
+ INIT_LIST_HEAD(&scif_info.disconnected);
+ INIT_LIST_HEAD(&scif_info.nb_connect_list);
+ init_waitqueue_head(&scif_info.exitwq);
+ scif_info.en_msg_log = 0;
+ scif_info.p2p_enable = 1;
+ INIT_WORK(&scif_info.misc_work, scif_misc_handler);
+ INIT_WORK(&scif_info.conn_work, scif_conn_handler);
+ idr_init(&scif_ports);
+ return 0;
+}
+
+static void _scif_exit(void)
+{
+ idr_destroy(&scif_ports);
+ scif_destroy_scifdev();
+}
+
+static int __init scif_init(void)
+{
+ struct miscdevice *mdev = &scif_info.mdev;
+ int rc;
+
+ _scif_init();
+ rc = scif_peer_bus_init();
+ if (rc)
+ goto exit;
+ rc = scif_peer_register_driver(&scif_peer_driver);
+ if (rc)
+ goto peer_bus_exit;
+ rc = scif_register_driver(&scif_driver);
+ if (rc)
+ goto unreg_scif_peer;
+ rc = misc_register(mdev);
+ if (rc)
+ goto unreg_scif;
+ scif_init_debugfs();
+ return 0;
+unreg_scif:
+ scif_unregister_driver(&scif_driver);
+unreg_scif_peer:
+ scif_peer_unregister_driver(&scif_peer_driver);
+peer_bus_exit:
+ scif_peer_bus_exit();
+exit:
+ _scif_exit();
+ return rc;
+}
+
+static void __exit scif_exit(void)
+{
+ scif_exit_debugfs();
+ misc_deregister(&scif_info.mdev);
+ scif_unregister_driver(&scif_driver);
+ scif_peer_unregister_driver(&scif_peer_driver);
+ scif_peer_bus_exit();
+ _scif_exit();
+}
+
+module_init(scif_init);
+module_exit(scif_exit);
+
+MODULE_DEVICE_TABLE(scif, id_table);
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) SCIF driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
new file mode 100644
index 000000000..580bc63e1
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -0,0 +1,254 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_MAIN_H
+#define SCIF_MAIN_H
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/miscdevice.h>
+#include <linux/dmaengine.h>
+#include <linux/file.h>
+#include <linux/scif.h>
+
+#include "../common/mic_dev.h"
+
+#define SCIF_MGMT_NODE 0
+#define SCIF_DEFAULT_WATCHDOG_TO 30
+#define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ)
+#define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ)
+
+/*
+ * Generic state used for certain node QP message exchanges
+ * like Unregister, Alloc etc.
+ */
+enum scif_msg_state {
+ OP_IDLE = 1,
+ OP_IN_PROGRESS,
+ OP_COMPLETED,
+ OP_FAILED
+};
+
+/*
+ * struct scif_info - Global SCIF information
+ *
+ * @nodeid: Node ID this node is to others
+ * @maxid: Max known node ID
+ * @total: Total number of SCIF nodes
+ * @nr_zombies: number of zombie endpoints
+ * @eplock: Lock to synchronize listening, zombie endpoint lists
+ * @connlock: Lock to synchronize connected and disconnected lists
+ * @nb_connect_lock: Synchronize non blocking connect operations
+ * @port_lock: Synchronize access to SCIF ports
+ * @uaccept: List of user acceptreq waiting for acceptreg
+ * @listen: List of listening end points
+ * @zombie: List of zombie end points with pending RMA's
+ * @connected: List of end points in connected state
+ * @disconnected: List of end points in disconnected state
+ * @nb_connect_list: List for non blocking connections
+ * @misc_work: miscellaneous SCIF tasks
+ * @conflock: Lock to synchronize SCIF node configuration changes
+ * @en_msg_log: Enable debug message logging
+ * @p2p_enable: Enable P2P SCIF network
+ * @mdev: The MISC device
+ * @conn_work: Work for workqueue handling all connections
+ * @exitwq: Wait queue for waiting for an EXIT node QP message response
+ * @loopb_dev: Dummy SCIF device used for loopback
+ * @loopb_wq: Workqueue used for handling loopback messages
+ * @loopb_wqname[16]: Name of loopback workqueue
+ * @loopb_work: Used for submitting work to loopb_wq
+ * @loopb_recv_q: List of messages received on the loopb_wq
+ * @card_initiated_exit: set when the card has initiated the exit
+ */
+struct scif_info {
+ u8 nodeid;
+ u8 maxid;
+ u8 total;
+ u32 nr_zombies;
+ spinlock_t eplock;
+ struct mutex connlock;
+ spinlock_t nb_connect_lock;
+ spinlock_t port_lock;
+ struct list_head uaccept;
+ struct list_head listen;
+ struct list_head zombie;
+ struct list_head connected;
+ struct list_head disconnected;
+ struct list_head nb_connect_list;
+ struct work_struct misc_work;
+ struct mutex conflock;
+ u8 en_msg_log;
+ u8 p2p_enable;
+ struct miscdevice mdev;
+ struct work_struct conn_work;
+ wait_queue_head_t exitwq;
+ struct scif_dev *loopb_dev;
+ struct workqueue_struct *loopb_wq;
+ char loopb_wqname[16];
+ struct work_struct loopb_work;
+ struct list_head loopb_recv_q;
+ bool card_initiated_exit;
+};
+
+/*
+ * struct scif_p2p_info - SCIF mapping information used for P2P
+ *
+ * @ppi_peer_id - SCIF peer node id
+ * @ppi_sg - Scatter list for bar information (One for mmio and one for aper)
+ * @sg_nentries - Number of entries in the scatterlist
+ * @ppi_da: DMA address for MMIO and APER bars
+ * @ppi_len: Length of MMIO and APER bars
+ * @ppi_list: Link in list of mapping information
+ */
+struct scif_p2p_info {
+ u8 ppi_peer_id;
+ struct scatterlist *ppi_sg[2];
+ u64 sg_nentries[2];
+ dma_addr_t ppi_da[2];
+ u64 ppi_len[2];
+#define SCIF_PPI_MMIO 0
+#define SCIF_PPI_APER 1
+ struct list_head ppi_list;
+};
+
+/*
+ * struct scif_dev - SCIF remote device specific fields
+ *
+ * @node: Node id
+ * @p2p: List of P2P mapping information
+ * @qpairs: The node queue pair for exchanging control messages
+ * @intr_wq: Workqueue for handling Node QP messages
+ * @intr_wqname: Name of node QP workqueue for handling interrupts
+ * @intr_bh: Used for submitting work to intr_wq
+ * @lock: Lock used for synchronizing access to the scif device
+ * @sdev: SCIF hardware device on the SCIF hardware bus
+ * @db: doorbell the peer will trigger to generate an interrupt on self
+ * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer
+ * @cookie: Cookie received while registering the interrupt handler
+ * init_msg_work: work scheduled for SCIF_INIT message processing
+ * @p2p_dwork: Delayed work to enable polling for P2P state
+ * @qp_dwork: Delayed work for enabling polling for remote QP information
+ * @p2p_retry: Number of times to retry polling of P2P state
+ * @base_addr: P2P aperture bar base address
+ * @mic_mw mmio: The peer MMIO information used for P2P
+ * @spdev: SCIF peer device on the SCIF peer bus
+ * @node_remove_ack_pending: True if a node_remove_ack is pending
+ * @exit_ack_pending: true if an exit_ack is pending
+ * @disconn_wq: Used while waiting for a node remove response
+ * @disconn_rescnt: Keeps track of number of node remove requests sent
+ * @exit: Status of exit message
+ * @qp_dma_addr: Queue pair DMA address passed to the peer
+*/
+struct scif_dev {
+ u8 node;
+ struct list_head p2p;
+ struct scif_qp *qpairs;
+ struct workqueue_struct *intr_wq;
+ char intr_wqname[16];
+ struct work_struct intr_bh;
+ struct mutex lock;
+ struct scif_hw_dev *sdev;
+ int db;
+ int rdb;
+ struct mic_irq *cookie;
+ struct work_struct init_msg_work;
+ struct delayed_work p2p_dwork;
+ struct delayed_work qp_dwork;
+ int p2p_retry;
+ dma_addr_t base_addr;
+ struct mic_mw mmio;
+ struct scif_peer_dev __rcu *spdev;
+ bool node_remove_ack_pending;
+ bool exit_ack_pending;
+ wait_queue_head_t disconn_wq;
+ atomic_t disconn_rescnt;
+ enum scif_msg_state exit;
+ dma_addr_t qp_dma_addr;
+};
+
+extern struct scif_info scif_info;
+extern struct idr scif_ports;
+extern struct scif_dev *scif_dev;
+extern const struct file_operations scif_fops;
+
+/* Size of the RB for the Node QP */
+#define SCIF_NODE_QP_SIZE 0x10000
+
+#include "scif_nodeqp.h"
+
+/*
+ * scifdev_self:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device passed is the self aka Loopback SCIF device.
+ */
+static inline int scifdev_self(struct scif_dev *dev)
+{
+ return dev->node == scif_info.nodeid;
+}
+
+static inline bool scif_is_mgmt_node(void)
+{
+ return !scif_info.nodeid;
+}
+
+/*
+ * scifdev_is_p2p:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device is a MIC Peer to Peer SCIF device.
+ */
+static inline bool scifdev_is_p2p(struct scif_dev *dev)
+{
+ if (scif_is_mgmt_node())
+ return false;
+ else
+ return dev != &scif_dev[SCIF_MGMT_NODE] &&
+ !scifdev_self(dev);
+}
+
+/*
+ * scifdev_alive:
+ * @scifdev: The remote SCIF Device
+ *
+ * Returns true if the remote SCIF Device is running or sleeping for
+ * this endpoint.
+ */
+static inline int _scifdev_alive(struct scif_dev *scifdev)
+{
+ struct scif_peer_dev *spdev;
+
+ rcu_read_lock();
+ spdev = rcu_dereference(scifdev->spdev);
+ rcu_read_unlock();
+ return !!spdev;
+}
+
+#include "scif_epd.h"
+
+void __init scif_init_debugfs(void);
+void scif_exit_debugfs(void);
+int scif_setup_intr_wq(struct scif_dev *scifdev);
+void scif_destroy_intr_wq(struct scif_dev *scifdev);
+void scif_cleanup_scifdev(struct scif_dev *dev);
+void scif_handle_remove_node(int node);
+void scif_disconnect_node(u32 node_id, bool mgmt_initiated);
+void scif_free_qp(struct scif_dev *dev);
+void scif_misc_handler(struct work_struct *work);
+void scif_stop(struct scif_dev *scifdev);
+irqreturn_t scif_intr_handler(int irq, void *data);
+#endif /* SCIF_MAIN_H */
diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h
new file mode 100644
index 000000000..20e50b4e1
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_map.h
@@ -0,0 +1,113 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_MAP_H
+#define SCIF_MAP_H
+
+#include "../bus/scif_bus.h"
+
+static __always_inline void *
+scif_alloc_coherent(dma_addr_t *dma_handle,
+ struct scif_dev *scifdev, size_t size,
+ gfp_t gfp)
+{
+ void *va;
+
+ if (scifdev_self(scifdev)) {
+ va = kmalloc(size, gfp);
+ if (va)
+ *dma_handle = virt_to_phys(va);
+ } else {
+ va = dma_alloc_coherent(&scifdev->sdev->dev,
+ size, dma_handle, gfp);
+ if (va && scifdev_is_p2p(scifdev))
+ *dma_handle = *dma_handle + scifdev->base_addr;
+ }
+ return va;
+}
+
+static __always_inline void
+scif_free_coherent(void *va, dma_addr_t local,
+ struct scif_dev *scifdev, size_t size)
+{
+ if (scifdev_self(scifdev)) {
+ kfree(va);
+ } else {
+ if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr)
+ local = local - scifdev->base_addr;
+ dma_free_coherent(&scifdev->sdev->dev,
+ size, va, local);
+ }
+}
+
+static __always_inline int
+scif_map_single(dma_addr_t *dma_handle,
+ void *local, struct scif_dev *scifdev, size_t size)
+{
+ int err = 0;
+
+ if (scifdev_self(scifdev)) {
+ *dma_handle = virt_to_phys((local));
+ } else {
+ *dma_handle = dma_map_single(&scifdev->sdev->dev,
+ local, size, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&scifdev->sdev->dev, *dma_handle))
+ err = -ENOMEM;
+ else if (scifdev_is_p2p(scifdev))
+ *dma_handle = *dma_handle + scifdev->base_addr;
+ }
+ if (err)
+ *dma_handle = 0;
+ return err;
+}
+
+static __always_inline void
+scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev,
+ size_t size)
+{
+ if (!scifdev_self(scifdev)) {
+ if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr)
+ local = local - scifdev->base_addr;
+ dma_unmap_single(&scifdev->sdev->dev, local,
+ size, DMA_BIDIRECTIONAL);
+ }
+}
+
+static __always_inline void *
+scif_ioremap(dma_addr_t phys, size_t size, struct scif_dev *scifdev)
+{
+ void *out_virt;
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ if (scifdev_self(scifdev))
+ out_virt = phys_to_virt(phys);
+ else
+ out_virt = (void __force *)
+ sdev->hw_ops->ioremap(sdev, phys, size);
+ return out_virt;
+}
+
+static __always_inline void
+scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev)
+{
+ if (!scifdev_self(scifdev)) {
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt);
+ }
+}
+#endif /* SCIF_MAP_H */
diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c
new file mode 100644
index 000000000..9b4c5382d
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nm.c
@@ -0,0 +1,237 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_peer_bus.h"
+
+#include "scif_main.h"
+#include "scif_map.h"
+
+/**
+ * scif_invalidate_ep() - Set state for all connected endpoints
+ * to disconnected and wake up all send/recv waitqueues
+ */
+static void scif_invalidate_ep(int node)
+{
+ struct scif_endpt *ep;
+ struct list_head *pos, *tmpq;
+
+ flush_work(&scif_info.conn_work);
+ mutex_lock(&scif_info.connlock);
+ list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+ ep = list_entry(pos, struct scif_endpt, list);
+ if (ep->remote_dev->node == node) {
+ spin_lock(&ep->lock);
+ scif_cleanup_ep_qp(ep);
+ spin_unlock(&ep->lock);
+ }
+ }
+ list_for_each_safe(pos, tmpq, &scif_info.connected) {
+ ep = list_entry(pos, struct scif_endpt, list);
+ if (ep->remote_dev->node == node) {
+ list_del(pos);
+ spin_lock(&ep->lock);
+ ep->state = SCIFEP_DISCONNECTED;
+ list_add_tail(&ep->list, &scif_info.disconnected);
+ scif_cleanup_ep_qp(ep);
+ wake_up_interruptible(&ep->sendwq);
+ wake_up_interruptible(&ep->recvwq);
+ spin_unlock(&ep->lock);
+ }
+ }
+ mutex_unlock(&scif_info.connlock);
+}
+
+void scif_free_qp(struct scif_dev *scifdev)
+{
+ struct scif_qp *qp = scifdev->qpairs;
+
+ if (!qp)
+ return;
+ scif_free_coherent((void *)qp->inbound_q.rb_base,
+ qp->local_buf, scifdev, qp->inbound_q.size);
+ scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp));
+ kfree(scifdev->qpairs);
+ scifdev->qpairs = NULL;
+}
+
+static void scif_cleanup_qp(struct scif_dev *dev)
+{
+ struct scif_qp *qp = &dev->qpairs[0];
+
+ if (!qp)
+ return;
+ scif_iounmap((void *)qp->remote_qp, sizeof(struct scif_qp), dev);
+ scif_iounmap((void *)qp->outbound_q.rb_base,
+ sizeof(struct scif_qp), dev);
+ qp->remote_qp = NULL;
+ qp->local_write = 0;
+ qp->inbound_q.current_write_offset = 0;
+ qp->inbound_q.current_read_offset = 0;
+ if (scifdev_is_p2p(dev))
+ scif_free_qp(dev);
+}
+
+void scif_send_acks(struct scif_dev *dev)
+{
+ struct scifmsg msg;
+
+ if (dev->node_remove_ack_pending) {
+ msg.uop = SCIF_NODE_REMOVE_ACK;
+ msg.src.node = scif_info.nodeid;
+ msg.dst.node = SCIF_MGMT_NODE;
+ msg.payload[0] = dev->node;
+ scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg);
+ dev->node_remove_ack_pending = false;
+ }
+ if (dev->exit_ack_pending) {
+ msg.uop = SCIF_EXIT_ACK;
+ msg.src.node = scif_info.nodeid;
+ msg.dst.node = dev->node;
+ scif_nodeqp_send(dev, &msg);
+ dev->exit_ack_pending = false;
+ }
+}
+
+/*
+ * scif_cleanup_scifdev
+ *
+ * @dev: Remote SCIF device.
+ * Uninitialize SCIF data structures for remote SCIF device.
+ */
+void scif_cleanup_scifdev(struct scif_dev *dev)
+{
+ struct scif_hw_dev *sdev = dev->sdev;
+
+ if (!dev->sdev)
+ return;
+ if (scifdev_is_p2p(dev)) {
+ if (dev->cookie) {
+ sdev->hw_ops->free_irq(sdev, dev->cookie, dev);
+ dev->cookie = NULL;
+ }
+ scif_destroy_intr_wq(dev);
+ }
+ scif_destroy_p2p(dev);
+ scif_invalidate_ep(dev->node);
+ scif_send_acks(dev);
+ if (!dev->node && scif_info.card_initiated_exit) {
+ /*
+ * Send an SCIF_EXIT message which is the last message from MIC
+ * to the Host and wait for a SCIF_EXIT_ACK
+ */
+ scif_send_exit(dev);
+ scif_info.card_initiated_exit = false;
+ }
+ scif_cleanup_qp(dev);
+}
+
+/*
+ * scif_remove_node:
+ *
+ * @node: Node to remove
+ */
+void scif_handle_remove_node(int node)
+{
+ struct scif_dev *scifdev = &scif_dev[node];
+ struct scif_peer_dev *spdev;
+
+ rcu_read_lock();
+ spdev = rcu_dereference(scifdev->spdev);
+ rcu_read_unlock();
+ if (spdev)
+ scif_peer_unregister_device(spdev);
+ else
+ scif_send_acks(scifdev);
+}
+
+static int scif_send_rmnode_msg(int node, int remove_node)
+{
+ struct scifmsg notif_msg;
+ struct scif_dev *dev = &scif_dev[node];
+
+ notif_msg.uop = SCIF_NODE_REMOVE;
+ notif_msg.src.node = scif_info.nodeid;
+ notif_msg.dst.node = node;
+ notif_msg.payload[0] = remove_node;
+ return scif_nodeqp_send(dev, &notif_msg);
+}
+
+/**
+ * scif_node_disconnect:
+ *
+ * @node_id[in]: source node id.
+ * @mgmt_initiated: Disconnection initiated from the mgmt node
+ *
+ * Disconnect a node from the scif network.
+ */
+void scif_disconnect_node(u32 node_id, bool mgmt_initiated)
+{
+ int ret;
+ int msg_cnt = 0;
+ u32 i = 0;
+ struct scif_dev *scifdev = &scif_dev[node_id];
+
+ if (!node_id)
+ return;
+
+ atomic_set(&scifdev->disconn_rescnt, 0);
+
+ /* Destroy p2p network */
+ for (i = 1; i <= scif_info.maxid; i++) {
+ if (i == node_id)
+ continue;
+ ret = scif_send_rmnode_msg(i, node_id);
+ if (!ret)
+ msg_cnt++;
+ }
+ /* Wait for the remote nodes to respond with SCIF_NODE_REMOVE_ACK */
+ ret = wait_event_timeout(scifdev->disconn_wq,
+ (atomic_read(&scifdev->disconn_rescnt)
+ == msg_cnt), SCIF_NODE_ALIVE_TIMEOUT);
+ /* Tell the card to clean up */
+ if (mgmt_initiated && _scifdev_alive(scifdev))
+ /*
+ * Send an SCIF_EXIT message which is the last message from Host
+ * to the MIC and wait for a SCIF_EXIT_ACK
+ */
+ scif_send_exit(scifdev);
+ atomic_set(&scifdev->disconn_rescnt, 0);
+ /* Tell the mgmt node to clean up */
+ ret = scif_send_rmnode_msg(SCIF_MGMT_NODE, node_id);
+ if (!ret)
+ /* Wait for mgmt node to respond with SCIF_NODE_REMOVE_ACK */
+ wait_event_timeout(scifdev->disconn_wq,
+ (atomic_read(&scifdev->disconn_rescnt) == 1),
+ SCIF_NODE_ALIVE_TIMEOUT);
+}
+
+void scif_get_node_info(void)
+{
+ struct scifmsg msg;
+ DECLARE_COMPLETION_ONSTACK(node_info);
+
+ msg.uop = SCIF_GET_NODE_INFO;
+ msg.src.node = scif_info.nodeid;
+ msg.dst.node = SCIF_MGMT_NODE;
+ msg.payload[3] = (u64)&node_info;
+
+ if ((scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg)))
+ return;
+
+ /* Wait for a response with SCIF_GET_NODE_INFO */
+ wait_for_completion(&node_info);
+}
diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c
new file mode 100644
index 000000000..6dfdae345
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nodeqp.c
@@ -0,0 +1,1307 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+#include "scif_main.h"
+#include "scif_nodeqp.h"
+#include "scif_map.h"
+
+/*
+ ************************************************************************
+ * SCIF node Queue Pair (QP) setup flow:
+ *
+ * 1) SCIF driver gets probed with a scif_hw_dev via the scif_hw_bus
+ * 2) scif_setup_qp(..) allocates the local qp and calls
+ * scif_setup_qp_connect(..) which allocates and maps the local
+ * buffer for the inbound QP
+ * 3) The local node updates the device page with the DMA address of the QP
+ * 4) A delayed work is scheduled (qp_dwork) which periodically reads if
+ * the peer node has updated its QP DMA address
+ * 5) Once a valid non zero address is found in the QP DMA address field
+ * in the device page, the local node maps the remote node's QP,
+ * updates its outbound QP and sends a SCIF_INIT message to the peer
+ * 6) The SCIF_INIT message is received by the peer node QP interrupt bottom
+ * half handler by calling scif_init(..)
+ * 7) scif_init(..) registers a new SCIF peer node by calling
+ * scif_peer_register_device(..) which signifies the addition of a new
+ * SCIF node
+ * 8) On the mgmt node, P2P network setup/teardown is initiated if all the
+ * remote nodes are online via scif_p2p_setup(..)
+ * 9) For P2P setup, the host maps the remote nodes' aperture and memory
+ * bars and sends a SCIF_NODE_ADD message to both nodes
+ * 10) As part of scif_nodeadd, both nodes set up their local inbound
+ * QPs and send a SCIF_NODE_ADD_ACK to the mgmt node
+ * 11) As part of scif_node_add_ack(..) the mgmt node forwards the
+ * SCIF_NODE_ADD_ACK to the remote nodes
+ * 12) As part of scif_node_add_ack(..) the remote nodes update their
+ * outbound QPs, make sure they can access memory on the remote node
+ * and then add a new SCIF peer node by calling
+ * scif_peer_register_device(..) which signifies the addition of a new
+ * SCIF node.
+ * 13) The SCIF network is now established across all nodes.
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by non mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) A non mgmt node now cleans up all local data structures and sends a
+ * SCIF_EXIT message to the peer and waits for a SCIF_EXIT_ACK
+ * 4) As part of scif_exit(..) handling scif_disconnect_node(..) is called
+ * 5) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the
+ * peers and waits for a SCIF_NODE_REMOVE_ACK
+ * 6) As part of scif_node_remove(..) a remote node unregisters the peer
+ * node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 7) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ * it sends itself a node remove message whose handling cleans up local
+ * data structures and unregisters the peer node from the SCIF network
+ * 8) The mgmt node sends a SCIF_EXIT_ACK
+ * 9) Upon receipt of the SCIF_EXIT_ACK the node initiating the teardown
+ * completes the SCIF remove routine
+ * 10) The SCIF network is now torn down for the node initiating the
+ * teardown sequence
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) The mgmt node calls scif_disconnect_node(..)
+ * 4) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the peers
+ * and waits for a SCIF_NODE_REMOVE_ACK
+ * 5) As part of scif_node_remove(..) a remote node unregisters the peer
+ * node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 6) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ * it unregisters the peer node from the SCIF network
+ * 7) The mgmt node sends a SCIF_EXIT message and waits for a SCIF_EXIT_ACK.
+ * 8) A non mgmt node upon receipt of a SCIF_EXIT message calls scif_stop(..)
+ * which would clean up local data structures for all SCIF nodes and
+ * then send a SCIF_EXIT_ACK back to the mgmt node
+ * 9) Upon receipt of the SCIF_EXIT_ACK the the mgmt node sends itself a node
+ * remove message whose handling cleans up local data structures and
+ * destroys any P2P mappings.
+ * 10) The SCIF hardware device for which a remove callback was received is now
+ * disconnected from the SCIF network.
+ */
+/*
+ * Initializes "local" data structures for the QP. Allocates the QP
+ * ring buffer (rb) and initializes the "in bound" queue.
+ */
+int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
+ int local_size, struct scif_dev *scifdev)
+{
+ void *local_q = NULL;
+ int err = 0;
+ u32 tmp_rd = 0;
+
+ spin_lock_init(&qp->send_lock);
+ spin_lock_init(&qp->recv_lock);
+
+ local_q = kzalloc(local_size, GFP_KERNEL);
+ if (!local_q) {
+ err = -ENOMEM;
+ return err;
+ }
+ err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+ if (err)
+ goto kfree;
+ /*
+ * To setup the inbound_q, the buffer lives locally, the read pointer
+ * is remote and the write pointer is local.
+ */
+ scif_rb_init(&qp->inbound_q,
+ &tmp_rd,
+ &qp->local_write,
+ local_q, get_count_order(local_size));
+ /*
+ * The read pointer is NULL initially and it is unsafe to use the ring
+ * buffer til this changes!
+ */
+ qp->inbound_q.read_ptr = NULL;
+ err = scif_map_single(qp_offset, qp,
+ scifdev, sizeof(struct scif_qp));
+ if (err)
+ goto unmap;
+ qp->local_qp = *qp_offset;
+ return err;
+unmap:
+ scif_unmap_single(qp->local_buf, scifdev, local_size);
+ qp->local_buf = 0;
+kfree:
+ kfree(local_q);
+ return err;
+}
+
+/* When the other side has already done it's allocation, this is called */
+int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
+ dma_addr_t phys, int local_size,
+ struct scif_dev *scifdev)
+{
+ void *local_q;
+ void *remote_q;
+ struct scif_qp *remote_qp;
+ int remote_size;
+ int err = 0;
+
+ spin_lock_init(&qp->send_lock);
+ spin_lock_init(&qp->recv_lock);
+ /* Start by figuring out where we need to point */
+ remote_qp = scif_ioremap(phys, sizeof(struct scif_qp), scifdev);
+ if (!remote_qp)
+ return -EIO;
+ qp->remote_qp = remote_qp;
+ if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+ err = -EIO;
+ goto iounmap;
+ }
+ qp->remote_buf = remote_qp->local_buf;
+ remote_size = qp->remote_qp->inbound_q.size;
+ remote_q = scif_ioremap(qp->remote_buf, remote_size, scifdev);
+ if (!remote_q) {
+ err = -EIO;
+ goto iounmap;
+ }
+ qp->remote_qp->local_write = 0;
+ /*
+ * To setup the outbound_q, the buffer lives in remote memory,
+ * the read pointer is local, the write pointer is remote
+ */
+ scif_rb_init(&qp->outbound_q,
+ &qp->local_read,
+ &qp->remote_qp->local_write,
+ remote_q,
+ get_count_order(remote_size));
+ local_q = kzalloc(local_size, GFP_KERNEL);
+ if (!local_q) {
+ err = -ENOMEM;
+ goto iounmap_1;
+ }
+ err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+ if (err)
+ goto kfree;
+ qp->remote_qp->local_read = 0;
+ /*
+ * To setup the inbound_q, the buffer lives locally, the read pointer
+ * is remote and the write pointer is local
+ */
+ scif_rb_init(&qp->inbound_q,
+ &qp->remote_qp->local_read,
+ &qp->local_write,
+ local_q, get_count_order(local_size));
+ err = scif_map_single(qp_offset, qp, scifdev,
+ sizeof(struct scif_qp));
+ if (err)
+ goto unmap;
+ qp->local_qp = *qp_offset;
+ return err;
+unmap:
+ scif_unmap_single(qp->local_buf, scifdev, local_size);
+ qp->local_buf = 0;
+kfree:
+ kfree(local_q);
+iounmap_1:
+ scif_iounmap(remote_q, remote_size, scifdev);
+ qp->outbound_q.rb_base = NULL;
+iounmap:
+ scif_iounmap(qp->remote_qp, sizeof(struct scif_qp), scifdev);
+ qp->remote_qp = NULL;
+ return err;
+}
+
+int scif_setup_qp_connect_response(struct scif_dev *scifdev,
+ struct scif_qp *qp, u64 payload)
+{
+ int err = 0;
+ void *r_buf;
+ int remote_size;
+ phys_addr_t tmp_phys;
+
+ qp->remote_qp = scif_ioremap(payload, sizeof(struct scif_qp), scifdev);
+
+ if (!qp->remote_qp) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+ dev_err(&scifdev->sdev->dev,
+ "SCIFEP_MAGIC mismatch between self %d remote %d\n",
+ scif_dev[scif_info.nodeid].node, scifdev->node);
+ err = -ENODEV;
+ goto error;
+ }
+
+ tmp_phys = qp->remote_qp->local_buf;
+ remote_size = qp->remote_qp->inbound_q.size;
+ r_buf = scif_ioremap(tmp_phys, remote_size, scifdev);
+
+ if (!r_buf)
+ return -EIO;
+
+ qp->local_read = 0;
+ scif_rb_init(&qp->outbound_q,
+ &qp->local_read,
+ &qp->remote_qp->local_write,
+ r_buf,
+ get_count_order(remote_size));
+ /*
+ * resetup the inbound_q now that we know where the
+ * inbound_read really is.
+ */
+ scif_rb_init(&qp->inbound_q,
+ &qp->remote_qp->local_read,
+ &qp->local_write,
+ qp->inbound_q.rb_base,
+ get_count_order(qp->inbound_q.size));
+error:
+ return err;
+}
+
+static __always_inline void
+scif_send_msg_intr(struct scif_dev *scifdev)
+{
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ if (scifdev_is_p2p(scifdev))
+ sdev->hw_ops->send_p2p_intr(sdev, scifdev->rdb, &scifdev->mmio);
+ else
+ sdev->hw_ops->send_intr(sdev, scifdev->rdb);
+}
+
+int scif_qp_response(phys_addr_t phys, struct scif_dev *scifdev)
+{
+ int err = 0;
+ struct scifmsg msg;
+
+ err = scif_setup_qp_connect_response(scifdev, scifdev->qpairs, phys);
+ if (!err) {
+ /*
+ * Now that everything is setup and mapped, we're ready
+ * to tell the peer about our queue's location
+ */
+ msg.uop = SCIF_INIT;
+ msg.dst.node = scifdev->node;
+ err = scif_nodeqp_send(scifdev, &msg);
+ }
+ return err;
+}
+
+void scif_send_exit(struct scif_dev *scifdev)
+{
+ struct scifmsg msg;
+ int ret;
+
+ scifdev->exit = OP_IN_PROGRESS;
+ msg.uop = SCIF_EXIT;
+ msg.src.node = scif_info.nodeid;
+ msg.dst.node = scifdev->node;
+ ret = scif_nodeqp_send(scifdev, &msg);
+ if (ret)
+ goto done;
+ /* Wait for a SCIF_EXIT_ACK message */
+ wait_event_timeout(scif_info.exitwq, scifdev->exit == OP_COMPLETED,
+ SCIF_NODE_ALIVE_TIMEOUT);
+done:
+ scifdev->exit = OP_IDLE;
+}
+
+int scif_setup_qp(struct scif_dev *scifdev)
+{
+ int err = 0;
+ int local_size;
+ struct scif_qp *qp;
+
+ local_size = SCIF_NODE_QP_SIZE;
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp) {
+ err = -ENOMEM;
+ return err;
+ }
+ qp->magic = SCIFEP_MAGIC;
+ scifdev->qpairs = qp;
+ err = scif_setup_qp_connect(qp, &scifdev->qp_dma_addr,
+ local_size, scifdev);
+ if (err)
+ goto free_qp;
+ /*
+ * We're as setup as we can be. The inbound_q is setup, w/o a usable
+ * outbound q. When we get a message, the read_ptr will be updated,
+ * and we will pull the message.
+ */
+ return err;
+free_qp:
+ kfree(scifdev->qpairs);
+ scifdev->qpairs = NULL;
+ return err;
+}
+
+static void scif_p2p_freesg(struct scatterlist *sg)
+{
+ kfree(sg);
+}
+
+static struct scatterlist *
+scif_p2p_setsg(phys_addr_t pa, int page_size, int page_cnt)
+{
+ struct scatterlist *sg;
+ struct page *page;
+ int i;
+
+ sg = kcalloc(page_cnt, sizeof(struct scatterlist), GFP_KERNEL);
+ if (!sg)
+ return NULL;
+ sg_init_table(sg, page_cnt);
+ for (i = 0; i < page_cnt; i++) {
+ page = pfn_to_page(pa >> PAGE_SHIFT);
+ sg_set_page(&sg[i], page, page_size, 0);
+ pa += page_size;
+ }
+ return sg;
+}
+
+/* Init p2p mappings required to access peerdev from scifdev */
+static struct scif_p2p_info *
+scif_init_p2p_info(struct scif_dev *scifdev, struct scif_dev *peerdev)
+{
+ struct scif_p2p_info *p2p;
+ int num_mmio_pages, num_aper_pages, sg_page_shift, err, num_aper_chunks;
+ struct scif_hw_dev *psdev = peerdev->sdev;
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ num_mmio_pages = psdev->mmio->len >> PAGE_SHIFT;
+ num_aper_pages = psdev->aper->len >> PAGE_SHIFT;
+
+ p2p = kzalloc(sizeof(*p2p), GFP_KERNEL);
+ if (!p2p)
+ return NULL;
+ p2p->ppi_sg[SCIF_PPI_MMIO] = scif_p2p_setsg(psdev->mmio->pa,
+ PAGE_SIZE, num_mmio_pages);
+ if (!p2p->ppi_sg[SCIF_PPI_MMIO])
+ goto free_p2p;
+ p2p->sg_nentries[SCIF_PPI_MMIO] = num_mmio_pages;
+ sg_page_shift = get_order(min(psdev->aper->len, (u64)(1 << 30)));
+ num_aper_chunks = num_aper_pages >> (sg_page_shift - PAGE_SHIFT);
+ p2p->ppi_sg[SCIF_PPI_APER] = scif_p2p_setsg(psdev->aper->pa,
+ 1 << sg_page_shift,
+ num_aper_chunks);
+ p2p->sg_nentries[SCIF_PPI_APER] = num_aper_chunks;
+ err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+ num_mmio_pages, PCI_DMA_BIDIRECTIONAL);
+ if (err != num_mmio_pages)
+ goto scif_p2p_free;
+ err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+ num_aper_chunks, PCI_DMA_BIDIRECTIONAL);
+ if (err != num_aper_chunks)
+ goto dma_unmap;
+ p2p->ppi_da[SCIF_PPI_MMIO] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_MMIO]);
+ p2p->ppi_da[SCIF_PPI_APER] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_APER]);
+ p2p->ppi_len[SCIF_PPI_MMIO] = num_mmio_pages;
+ p2p->ppi_len[SCIF_PPI_APER] = num_aper_pages;
+ p2p->ppi_peer_id = peerdev->node;
+ return p2p;
+dma_unmap:
+ dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+ p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+scif_p2p_free:
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+free_p2p:
+ kfree(p2p);
+ return NULL;
+}
+
+/**
+ * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
+ * @dst: Destination node
+ *
+ * Connect the src and dst node by setting up the p2p connection
+ * between them. Management node here acts like a proxy.
+ */
+static void scif_node_connect(struct scif_dev *scifdev, int dst)
+{
+ struct scif_dev *dev_j = scifdev;
+ struct scif_dev *dev_i = NULL;
+ struct scif_p2p_info *p2p_ij = NULL; /* bus addr for j from i */
+ struct scif_p2p_info *p2p_ji = NULL; /* bus addr for i from j */
+ struct scif_p2p_info *p2p;
+ struct list_head *pos, *tmp;
+ struct scifmsg msg;
+ int err;
+ u64 tmppayload;
+
+ if (dst < 1 || dst > scif_info.maxid)
+ return;
+
+ dev_i = &scif_dev[dst];
+
+ if (!_scifdev_alive(dev_i))
+ return;
+ /*
+ * If the p2p connection is already setup or in the process of setting
+ * up then just ignore this request. The requested node will get
+ * informed by SCIF_NODE_ADD_ACK or SCIF_NODE_ADD_NACK
+ */
+ if (!list_empty(&dev_i->p2p)) {
+ list_for_each_safe(pos, tmp, &dev_i->p2p) {
+ p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+ if (p2p->ppi_peer_id == dev_j->node)
+ return;
+ }
+ }
+ p2p_ij = scif_init_p2p_info(dev_i, dev_j);
+ if (!p2p_ij)
+ return;
+ p2p_ji = scif_init_p2p_info(dev_j, dev_i);
+ if (!p2p_ji)
+ return;
+ list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p);
+ list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p);
+
+ /*
+ * Send a SCIF_NODE_ADD to dev_i, pass it its bus address
+ * as seen from dev_j
+ */
+ msg.uop = SCIF_NODE_ADD;
+ msg.src.node = dev_j->node;
+ msg.dst.node = dev_i->node;
+
+ msg.payload[0] = p2p_ji->ppi_da[SCIF_PPI_APER];
+ msg.payload[1] = p2p_ij->ppi_da[SCIF_PPI_MMIO];
+ msg.payload[2] = p2p_ij->ppi_da[SCIF_PPI_APER];
+ msg.payload[3] = p2p_ij->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+ err = scif_nodeqp_send(dev_i, &msg);
+ if (err) {
+ dev_err(&scifdev->sdev->dev,
+ "%s %d error %d\n", __func__, __LINE__, err);
+ return;
+ }
+
+ /* Same as above but to dev_j */
+ msg.uop = SCIF_NODE_ADD;
+ msg.src.node = dev_i->node;
+ msg.dst.node = dev_j->node;
+
+ tmppayload = msg.payload[0];
+ msg.payload[0] = msg.payload[2];
+ msg.payload[2] = tmppayload;
+ msg.payload[1] = p2p_ji->ppi_da[SCIF_PPI_MMIO];
+ msg.payload[3] = p2p_ji->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+ scif_nodeqp_send(dev_j, &msg);
+}
+
+static void scif_p2p_setup(void)
+{
+ int i, j;
+
+ if (!scif_info.p2p_enable)
+ return;
+
+ for (i = 1; i <= scif_info.maxid; i++)
+ if (!_scifdev_alive(&scif_dev[i]))
+ return;
+
+ for (i = 1; i <= scif_info.maxid; i++) {
+ for (j = 1; j <= scif_info.maxid; j++) {
+ struct scif_dev *scifdev = &scif_dev[i];
+
+ if (i == j)
+ continue;
+ scif_node_connect(scifdev, j);
+ }
+ }
+}
+
+void scif_qp_response_ack(struct work_struct *work)
+{
+ struct scif_dev *scifdev = container_of(work, struct scif_dev,
+ init_msg_work);
+ struct scif_peer_dev *spdev;
+
+ /* Drop the INIT message if it has already been received */
+ if (_scifdev_alive(scifdev))
+ return;
+
+ spdev = scif_peer_register_device(scifdev);
+ if (IS_ERR(spdev))
+ return;
+
+ if (scif_is_mgmt_node()) {
+ mutex_lock(&scif_info.conflock);
+ scif_p2p_setup();
+ mutex_unlock(&scif_info.conflock);
+ }
+}
+
+static char *message_types[] = {"BAD",
+ "INIT",
+ "EXIT",
+ "SCIF_EXIT_ACK",
+ "SCIF_NODE_ADD",
+ "SCIF_NODE_ADD_ACK",
+ "SCIF_NODE_ADD_NACK",
+ "REMOVE_NODE",
+ "REMOVE_NODE_ACK",
+ "CNCT_REQ",
+ "CNCT_GNT",
+ "CNCT_GNTACK",
+ "CNCT_GNTNACK",
+ "CNCT_REJ",
+ "DISCNCT",
+ "DISCNT_ACK",
+ "CLIENT_SENT",
+ "CLIENT_RCVD",
+ "SCIF_GET_NODE_INFO"};
+
+static void
+scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg,
+ const char *label)
+{
+ if (!scif_info.en_msg_log)
+ return;
+ if (msg->uop > SCIF_MAX_MSG) {
+ dev_err(&scifdev->sdev->dev,
+ "%s: unknown msg type %d\n", label, msg->uop);
+ return;
+ }
+ dev_info(&scifdev->sdev->dev,
+ "%s: msg type %s, src %d:%d, dest %d:%d payload 0x%llx:0x%llx:0x%llx:0x%llx\n",
+ label, message_types[msg->uop], msg->src.node, msg->src.port,
+ msg->dst.node, msg->dst.port, msg->payload[0], msg->payload[1],
+ msg->payload[2], msg->payload[3]);
+}
+
+int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_qp *qp = scifdev->qpairs;
+ int err = -ENOMEM, loop_cnt = 0;
+
+ scif_display_message(scifdev, msg, "Sent");
+ if (!qp) {
+ err = -EINVAL;
+ goto error;
+ }
+ spin_lock(&qp->send_lock);
+
+ while ((err = scif_rb_write(&qp->outbound_q,
+ msg, sizeof(struct scifmsg)))) {
+ mdelay(1);
+#define SCIF_NODEQP_SEND_TO_MSEC (3 * 1000)
+ if (loop_cnt++ > (SCIF_NODEQP_SEND_TO_MSEC)) {
+ err = -ENODEV;
+ break;
+ }
+ }
+ if (!err)
+ scif_rb_commit(&qp->outbound_q);
+ spin_unlock(&qp->send_lock);
+ if (!err) {
+ if (scifdev_self(scifdev))
+ /*
+ * For loopback we need to emulate an interrupt by
+ * queuing work for the queue handling real node
+ * Qp interrupts.
+ */
+ queue_work(scifdev->intr_wq, &scifdev->intr_bh);
+ else
+ scif_send_msg_intr(scifdev);
+ }
+error:
+ if (err)
+ dev_dbg(&scifdev->sdev->dev,
+ "%s %d error %d uop %d\n",
+ __func__, __LINE__, err, msg->uop);
+ return err;
+}
+
+/**
+ * scif_nodeqp_send - Send a message on the node queue pair
+ * @scifdev: Scif Device.
+ * @msg: The message to be sent.
+ */
+int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ int err;
+ struct device *spdev = NULL;
+
+ if (msg->uop > SCIF_EXIT_ACK) {
+ /* Dont send messages once the exit flow has begun */
+ if (OP_IDLE != scifdev->exit)
+ return -ENODEV;
+ spdev = scif_get_peer_dev(scifdev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ return err;
+ }
+ }
+ err = _scif_nodeqp_send(scifdev, msg);
+ if (msg->uop > SCIF_EXIT_ACK)
+ scif_put_peer_dev(spdev);
+ return err;
+}
+
+/*
+ * scif_misc_handler:
+ *
+ * Work queue handler for servicing miscellaneous SCIF tasks.
+ * Examples include:
+ * 1) Cleanup of zombie endpoints.
+ */
+void scif_misc_handler(struct work_struct *work)
+{
+ scif_cleanup_zombie_epd();
+}
+
+/**
+ * scif_init() - Respond to SCIF_INIT interrupt message
+ * @scifdev: Remote SCIF device node
+ * @msg: Interrupt message
+ */
+static __always_inline void
+scif_init(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ /*
+ * Allow the thread waiting for device page updates for the peer QP DMA
+ * address to complete initializing the inbound_q.
+ */
+ flush_delayed_work(&scifdev->qp_dwork);
+ /*
+ * Delegate the peer device registration to a workqueue, otherwise if
+ * SCIF client probe (called during peer device registration) calls
+ * scif_connect(..), it will block the message processing thread causing
+ * a deadlock.
+ */
+ schedule_work(&scifdev->init_msg_work);
+}
+
+/**
+ * scif_exit() - Respond to SCIF_EXIT interrupt message
+ * @scifdev: Remote SCIF device node
+ * @msg: Interrupt message
+ *
+ * This function stops the SCIF interface for the node which sent
+ * the SCIF_EXIT message and starts waiting for that node to
+ * resetup the queue pair again.
+ */
+static __always_inline void
+scif_exit(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+ scifdev->exit_ack_pending = true;
+ if (scif_is_mgmt_node())
+ scif_disconnect_node(scifdev->node, false);
+ else
+ scif_stop(scifdev);
+ schedule_delayed_work(&scifdev->qp_dwork,
+ msecs_to_jiffies(1000));
+}
+
+/**
+ * scif_exitack() - Respond to SCIF_EXIT_ACK interrupt message
+ * @scifdev: Remote SCIF device node
+ * @msg: Interrupt message
+ *
+ */
+static __always_inline void
+scif_exit_ack(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+ scifdev->exit = OP_COMPLETED;
+ wake_up(&scif_info.exitwq);
+}
+
+/**
+ * scif_node_add() - Respond to SCIF_NODE_ADD interrupt message
+ * @scifdev: Remote SCIF device node
+ * @msg: Interrupt message
+ *
+ * When the mgmt node driver has finished initializing a MIC node queue pair it
+ * marks the node as online. It then looks for all currently online MIC cards
+ * and send a SCIF_NODE_ADD message to identify the ID of the new card for
+ * peer to peer initialization
+ *
+ * The local node allocates its incoming queue and sends its address in the
+ * SCIF_NODE_ADD_ACK message back to the mgmt node, the mgmt node "reflects"
+ * this message to the new node
+ */
+static __always_inline void
+scif_node_add(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_dev *newdev;
+ dma_addr_t qp_offset;
+ int qp_connect;
+ struct scif_hw_dev *sdev;
+
+ dev_dbg(&scifdev->sdev->dev,
+ "Scifdev %d:%d received NODE_ADD msg for node %d\n",
+ scifdev->node, msg->dst.node, msg->src.node);
+ dev_dbg(&scifdev->sdev->dev,
+ "Remote address for this node's aperture %llx\n",
+ msg->payload[0]);
+ newdev = &scif_dev[msg->src.node];
+ newdev->node = msg->src.node;
+ newdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+ sdev = newdev->sdev;
+
+ if (scif_setup_intr_wq(newdev)) {
+ dev_err(&scifdev->sdev->dev,
+ "failed to setup interrupts for %d\n", msg->src.node);
+ goto interrupt_setup_error;
+ }
+ newdev->mmio.va = ioremap_nocache(msg->payload[1], sdev->mmio->len);
+ if (!newdev->mmio.va) {
+ dev_err(&scifdev->sdev->dev,
+ "failed to map mmio for %d\n", msg->src.node);
+ goto mmio_map_error;
+ }
+ newdev->qpairs = kzalloc(sizeof(*newdev->qpairs), GFP_KERNEL);
+ if (!newdev->qpairs)
+ goto qp_alloc_error;
+ /*
+ * Set the base address of the remote node's memory since it gets
+ * added to qp_offset
+ */
+ newdev->base_addr = msg->payload[0];
+
+ qp_connect = scif_setup_qp_connect(newdev->qpairs, &qp_offset,
+ SCIF_NODE_QP_SIZE, newdev);
+ if (qp_connect) {
+ dev_err(&scifdev->sdev->dev,
+ "failed to setup qp_connect %d\n", qp_connect);
+ goto qp_connect_error;
+ }
+
+ newdev->db = sdev->hw_ops->next_db(sdev);
+ newdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
+ "SCIF_INTR", newdev,
+ newdev->db);
+ if (IS_ERR(newdev->cookie))
+ goto qp_connect_error;
+ newdev->qpairs->magic = SCIFEP_MAGIC;
+ newdev->qpairs->qp_state = SCIF_QP_OFFLINE;
+
+ msg->uop = SCIF_NODE_ADD_ACK;
+ msg->dst.node = msg->src.node;
+ msg->src.node = scif_info.nodeid;
+ msg->payload[0] = qp_offset;
+ msg->payload[2] = newdev->db;
+ scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+ return;
+qp_connect_error:
+ kfree(newdev->qpairs);
+ newdev->qpairs = NULL;
+qp_alloc_error:
+ iounmap(newdev->mmio.va);
+ newdev->mmio.va = NULL;
+mmio_map_error:
+interrupt_setup_error:
+ dev_err(&scifdev->sdev->dev,
+ "node add failed for node %d\n", msg->src.node);
+ msg->uop = SCIF_NODE_ADD_NACK;
+ msg->dst.node = msg->src.node;
+ msg->src.node = scif_info.nodeid;
+ scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+}
+
+void scif_poll_qp_state(struct work_struct *work)
+{
+#define SCIF_NODE_QP_RETRY 100
+#define SCIF_NODE_QP_TIMEOUT 100
+ struct scif_dev *peerdev = container_of(work, struct scif_dev,
+ p2p_dwork.work);
+ struct scif_qp *qp = &peerdev->qpairs[0];
+
+ if (qp->qp_state != SCIF_QP_ONLINE ||
+ qp->remote_qp->qp_state != SCIF_QP_ONLINE) {
+ if (peerdev->p2p_retry++ == SCIF_NODE_QP_RETRY) {
+ dev_err(&peerdev->sdev->dev,
+ "Warning: QP check timeout with state %d\n",
+ qp->qp_state);
+ goto timeout;
+ }
+ schedule_delayed_work(&peerdev->p2p_dwork,
+ msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT));
+ return;
+ }
+ scif_peer_register_device(peerdev);
+ return;
+timeout:
+ dev_err(&peerdev->sdev->dev,
+ "%s %d remote node %d offline, state = 0x%x\n",
+ __func__, __LINE__, peerdev->node, qp->qp_state);
+ qp->remote_qp->qp_state = SCIF_QP_OFFLINE;
+ scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_ack() - Respond to SCIF_NODE_ADD_ACK interrupt message
+ * @scifdev: Remote SCIF device node
+ * @msg: Interrupt message
+ *
+ * After a MIC node receives the SCIF_NODE_ADD_ACK message it send this
+ * message to the mgmt node to confirm the sequence is finished.
+ *
+ */
+static __always_inline void
+scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_dev *peerdev;
+ struct scif_qp *qp;
+ struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+ dev_dbg(&scifdev->sdev->dev,
+ "Scifdev %d received SCIF_NODE_ADD_ACK msg src %d dst %d\n",
+ scifdev->node, msg->src.node, msg->dst.node);
+ dev_dbg(&scifdev->sdev->dev,
+ "payload %llx %llx %llx %llx\n", msg->payload[0],
+ msg->payload[1], msg->payload[2], msg->payload[3]);
+ if (scif_is_mgmt_node()) {
+ /*
+ * the lock serializes with scif_qp_response_ack. The mgmt node
+ * is forwarding the NODE_ADD_ACK message from src to dst we
+ * need to make sure that the dst has already received a
+ * NODE_ADD for src and setup its end of the qp to dst
+ */
+ mutex_lock(&scif_info.conflock);
+ msg->payload[1] = scif_info.maxid;
+ scif_nodeqp_send(dst_dev, msg);
+ mutex_unlock(&scif_info.conflock);
+ return;
+ }
+ peerdev = &scif_dev[msg->src.node];
+ peerdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+ peerdev->node = msg->src.node;
+
+ qp = &peerdev->qpairs[0];
+
+ if ((scif_setup_qp_connect_response(peerdev, &peerdev->qpairs[0],
+ msg->payload[0])))
+ goto local_error;
+ peerdev->rdb = msg->payload[2];
+ qp->remote_qp->qp_state = SCIF_QP_ONLINE;
+ schedule_delayed_work(&peerdev->p2p_dwork, 0);
+ return;
+local_error:
+ scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_nack: Respond to SCIF_NODE_ADD_NACK interrupt message
+ * @msg: Interrupt message
+ *
+ * SCIF_NODE_ADD failed, so inform the waiting wq.
+ */
+static __always_inline void
+scif_node_add_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ if (scif_is_mgmt_node()) {
+ struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+ dev_dbg(&scifdev->sdev->dev,
+ "SCIF_NODE_ADD_NACK received from %d\n", scifdev->node);
+ scif_nodeqp_send(dst_dev, msg);
+ }
+}
+
+/*
+ * scif_node_remove: Handle SCIF_NODE_REMOVE message
+ * @msg: Interrupt message
+ *
+ * Handle node removal.
+ */
+static __always_inline void
+scif_node_remove(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ int node = msg->payload[0];
+ struct scif_dev *scdev = &scif_dev[node];
+
+ scdev->node_remove_ack_pending = true;
+ scif_handle_remove_node(node);
+}
+
+/*
+ * scif_node_remove_ack: Handle SCIF_NODE_REMOVE_ACK message
+ * @msg: Interrupt message
+ *
+ * The peer has acked a SCIF_NODE_REMOVE message.
+ */
+static __always_inline void
+scif_node_remove_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_dev *sdev = &scif_dev[msg->payload[0]];
+
+ atomic_inc(&sdev->disconn_rescnt);
+ wake_up(&sdev->disconn_wq);
+}
+
+/**
+ * scif_get_node_info: Respond to SCIF_GET_NODE_INFO interrupt message
+ * @msg: Interrupt message
+ *
+ * Retrieve node info i.e maxid and total from the mgmt node.
+ */
+static __always_inline void
+scif_get_node_info_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ if (scif_is_mgmt_node()) {
+ swap(msg->dst.node, msg->src.node);
+ mutex_lock(&scif_info.conflock);
+ msg->payload[1] = scif_info.maxid;
+ msg->payload[2] = scif_info.total;
+ mutex_unlock(&scif_info.conflock);
+ scif_nodeqp_send(scifdev, msg);
+ } else {
+ struct completion *node_info =
+ (struct completion *)msg->payload[3];
+
+ mutex_lock(&scif_info.conflock);
+ scif_info.maxid = msg->payload[1];
+ scif_info.total = msg->payload[2];
+ complete_all(node_info);
+ mutex_unlock(&scif_info.conflock);
+ }
+}
+
+static void
+scif_msg_unknown(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ /* Bogus Node Qp Message? */
+ dev_err(&scifdev->sdev->dev,
+ "Unknown message 0x%xn scifdev->node 0x%x\n",
+ msg->uop, scifdev->node);
+}
+
+static void (*scif_intr_func[SCIF_MAX_MSG + 1])
+ (struct scif_dev *, struct scifmsg *msg) = {
+ scif_msg_unknown, /* Error */
+ scif_init, /* SCIF_INIT */
+ scif_exit, /* SCIF_EXIT */
+ scif_exit_ack, /* SCIF_EXIT_ACK */
+ scif_node_add, /* SCIF_NODE_ADD */
+ scif_node_add_ack, /* SCIF_NODE_ADD_ACK */
+ scif_node_add_nack, /* SCIF_NODE_ADD_NACK */
+ scif_node_remove, /* SCIF_NODE_REMOVE */
+ scif_node_remove_ack, /* SCIF_NODE_REMOVE_ACK */
+ scif_cnctreq, /* SCIF_CNCT_REQ */
+ scif_cnctgnt, /* SCIF_CNCT_GNT */
+ scif_cnctgnt_ack, /* SCIF_CNCT_GNTACK */
+ scif_cnctgnt_nack, /* SCIF_CNCT_GNTNACK */
+ scif_cnctrej, /* SCIF_CNCT_REJ */
+ scif_discnct, /* SCIF_DISCNCT */
+ scif_discnt_ack, /* SCIF_DISCNT_ACK */
+ scif_clientsend, /* SCIF_CLIENT_SENT */
+ scif_clientrcvd, /* SCIF_CLIENT_RCVD */
+ scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */
+};
+
+/**
+ * scif_nodeqp_msg_handler() - Common handler for node messages
+ * @scifdev: Remote device to respond to
+ * @qp: Remote memory pointer
+ * @msg: The message to be handled.
+ *
+ * This routine calls the appropriate routine to handle a Node Qp
+ * message receipt
+ */
+static int scif_max_msg_id = SCIF_MAX_MSG;
+
+static void
+scif_nodeqp_msg_handler(struct scif_dev *scifdev,
+ struct scif_qp *qp, struct scifmsg *msg)
+{
+ scif_display_message(scifdev, msg, "Rcvd");
+
+ if (msg->uop > (u32)scif_max_msg_id) {
+ /* Bogus Node Qp Message? */
+ dev_err(&scifdev->sdev->dev,
+ "Unknown message 0x%xn scifdev->node 0x%x\n",
+ msg->uop, scifdev->node);
+ return;
+ }
+
+ scif_intr_func[msg->uop](scifdev, msg);
+}
+
+/**
+ * scif_nodeqp_intrhandler() - Interrupt handler for node messages
+ * @scifdev: Remote device to respond to
+ * @qp: Remote memory pointer
+ *
+ * This routine is triggered by the interrupt mechanism. It reads
+ * messages from the node queue RB and calls the Node QP Message handling
+ * routine.
+ */
+void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+ struct scifmsg msg;
+ int read_size;
+
+ do {
+ read_size = scif_rb_get_next(&qp->inbound_q, &msg, sizeof(msg));
+ if (!read_size)
+ break;
+ scif_nodeqp_msg_handler(scifdev, qp, &msg);
+ /*
+ * The node queue pair is unmapped so skip the read pointer
+ * update after receipt of a SCIF_EXIT_ACK
+ */
+ if (SCIF_EXIT_ACK == msg.uop)
+ break;
+ scif_rb_update_read_ptr(&qp->inbound_q);
+ } while (1);
+}
+
+/**
+ * scif_loopb_wq_handler - Loopback Workqueue Handler.
+ * @work: loop back work
+ *
+ * This work queue routine is invoked by the loopback work queue handler.
+ * It grabs the recv lock, dequeues any available messages from the head
+ * of the loopback message list, calls the node QP message handler,
+ * waits for it to return, then frees up this message and dequeues more
+ * elements of the list if available.
+ */
+static void scif_loopb_wq_handler(struct work_struct *unused)
+{
+ struct scif_dev *scifdev = scif_info.loopb_dev;
+ struct scif_qp *qp = scifdev->qpairs;
+ struct scif_loopb_msg *msg;
+
+ do {
+ msg = NULL;
+ spin_lock(&qp->recv_lock);
+ if (!list_empty(&scif_info.loopb_recv_q)) {
+ msg = list_first_entry(&scif_info.loopb_recv_q,
+ struct scif_loopb_msg,
+ list);
+ list_del(&msg->list);
+ }
+ spin_unlock(&qp->recv_lock);
+
+ if (msg) {
+ scif_nodeqp_msg_handler(scifdev, qp, &msg->msg);
+ kfree(msg);
+ }
+ } while (msg);
+}
+
+/**
+ * scif_loopb_msg_handler() - Workqueue handler for loopback messages.
+ * @scifdev: SCIF device
+ * @qp: Queue pair.
+ *
+ * This work queue routine is triggered when a loopback message is received.
+ *
+ * We need special handling for receiving Node Qp messages on a loopback SCIF
+ * device via two workqueues for receiving messages.
+ *
+ * The reason we need the extra workqueue which is not required with *normal*
+ * non-loopback SCIF devices is the potential classic deadlock described below:
+ *
+ * Thread A tries to send a message on a loopback SCIF device and blocks since
+ * there is no space in the RB while it has the send_lock held or another
+ * lock called lock X for example.
+ *
+ * Thread B: The Loopback Node QP message receive workqueue receives the message
+ * and tries to send a message (eg an ACK) to the loopback SCIF device. It tries
+ * to grab the send lock again or lock X and deadlocks with Thread A. The RB
+ * cannot be drained any further due to this classic deadlock.
+ *
+ * In order to avoid deadlocks as mentioned above we have an extra level of
+ * indirection achieved by having two workqueues.
+ * 1) The first workqueue whose handler is scif_loopb_msg_handler reads
+ * messages from the Node QP RB, adds them to a list and queues work for the
+ * second workqueue.
+ *
+ * 2) The second workqueue whose handler is scif_loopb_wq_handler dequeues
+ * messages from the list, handles them, frees up the memory and dequeues
+ * more elements from the list if possible.
+ */
+int
+scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+ int read_size;
+ struct scif_loopb_msg *msg;
+
+ do {
+ msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+ read_size = scif_rb_get_next(&qp->inbound_q, &msg->msg,
+ sizeof(struct scifmsg));
+ if (read_size != sizeof(struct scifmsg)) {
+ kfree(msg);
+ scif_rb_update_read_ptr(&qp->inbound_q);
+ break;
+ }
+ spin_lock(&qp->recv_lock);
+ list_add_tail(&msg->list, &scif_info.loopb_recv_q);
+ spin_unlock(&qp->recv_lock);
+ queue_work(scif_info.loopb_wq, &scif_info.loopb_work);
+ scif_rb_update_read_ptr(&qp->inbound_q);
+ } while (read_size == sizeof(struct scifmsg));
+ return read_size;
+}
+
+/**
+ * scif_setup_loopback_qp - One time setup work for Loopback Node Qp.
+ * @scifdev: SCIF device
+ *
+ * Sets up the required loopback workqueues, queue pairs and ring buffers
+ */
+int scif_setup_loopback_qp(struct scif_dev *scifdev)
+{
+ int err = 0;
+ void *local_q;
+ struct scif_qp *qp;
+ struct scif_peer_dev *spdev;
+
+ err = scif_setup_intr_wq(scifdev);
+ if (err)
+ goto exit;
+ INIT_LIST_HEAD(&scif_info.loopb_recv_q);
+ snprintf(scif_info.loopb_wqname, sizeof(scif_info.loopb_wqname),
+ "SCIF LOOPB %d", scifdev->node);
+ scif_info.loopb_wq =
+ alloc_ordered_workqueue(scif_info.loopb_wqname, 0);
+ if (!scif_info.loopb_wq) {
+ err = -ENOMEM;
+ goto destroy_intr;
+ }
+ INIT_WORK(&scif_info.loopb_work, scif_loopb_wq_handler);
+ /* Allocate Self Qpair */
+ scifdev->qpairs = kzalloc(sizeof(*scifdev->qpairs), GFP_KERNEL);
+ if (!scifdev->qpairs) {
+ err = -ENOMEM;
+ goto destroy_loopb_wq;
+ }
+
+ qp = scifdev->qpairs;
+ qp->magic = SCIFEP_MAGIC;
+ spin_lock_init(&qp->send_lock);
+ spin_lock_init(&qp->recv_lock);
+
+ local_q = kzalloc(SCIF_NODE_QP_SIZE, GFP_KERNEL);
+ if (!local_q) {
+ err = -ENOMEM;
+ goto free_qpairs;
+ }
+ /*
+ * For loopback the inbound_q and outbound_q are essentially the same
+ * since the Node sends a message on the loopback interface to the
+ * outbound_q which is then received on the inbound_q.
+ */
+ scif_rb_init(&qp->outbound_q,
+ &qp->local_read,
+ &qp->local_write,
+ local_q, get_count_order(SCIF_NODE_QP_SIZE));
+
+ scif_rb_init(&qp->inbound_q,
+ &qp->local_read,
+ &qp->local_write,
+ local_q, get_count_order(SCIF_NODE_QP_SIZE));
+ scif_info.nodeid = scifdev->node;
+ spdev = scif_peer_register_device(scifdev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ goto free_local_q;
+ }
+ scif_info.loopb_dev = scifdev;
+ return err;
+free_local_q:
+ kfree(local_q);
+free_qpairs:
+ kfree(scifdev->qpairs);
+destroy_loopb_wq:
+ destroy_workqueue(scif_info.loopb_wq);
+destroy_intr:
+ scif_destroy_intr_wq(scifdev);
+exit:
+ return err;
+}
+
+/**
+ * scif_destroy_loopback_qp - One time uninit work for Loopback Node Qp
+ * @scifdev: SCIF device
+ *
+ * Destroys the workqueues and frees up the Ring Buffer and Queue Pair memory.
+ */
+int scif_destroy_loopback_qp(struct scif_dev *scifdev)
+{
+ struct scif_peer_dev *spdev;
+
+ rcu_read_lock();
+ spdev = rcu_dereference(scifdev->spdev);
+ rcu_read_unlock();
+ if (spdev)
+ scif_peer_unregister_device(spdev);
+ destroy_workqueue(scif_info.loopb_wq);
+ scif_destroy_intr_wq(scifdev);
+ kfree(scifdev->qpairs->outbound_q.rb_base);
+ kfree(scifdev->qpairs);
+ scifdev->sdev = NULL;
+ scif_info.loopb_dev = NULL;
+ return 0;
+}
+
+void scif_destroy_p2p(struct scif_dev *scifdev)
+{
+ struct scif_dev *peer_dev;
+ struct scif_p2p_info *p2p;
+ struct list_head *pos, *tmp;
+ int bd;
+
+ mutex_lock(&scif_info.conflock);
+ /* Free P2P mappings in the given node for all its peer nodes */
+ list_for_each_safe(pos, tmp, &scifdev->p2p) {
+ p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+ dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+ p2p->sg_nentries[SCIF_PPI_MMIO],
+ DMA_BIDIRECTIONAL);
+ dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+ p2p->sg_nentries[SCIF_PPI_APER],
+ DMA_BIDIRECTIONAL);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+ list_del(pos);
+ kfree(p2p);
+ }
+
+ /* Free P2P mapping created in the peer nodes for the given node */
+ for (bd = SCIF_MGMT_NODE + 1; bd <= scif_info.maxid; bd++) {
+ peer_dev = &scif_dev[bd];
+ list_for_each_safe(pos, tmp, &peer_dev->p2p) {
+ p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+ if (p2p->ppi_peer_id == scifdev->node) {
+ dma_unmap_sg(&peer_dev->sdev->dev,
+ p2p->ppi_sg[SCIF_PPI_MMIO],
+ p2p->sg_nentries[SCIF_PPI_MMIO],
+ DMA_BIDIRECTIONAL);
+ dma_unmap_sg(&peer_dev->sdev->dev,
+ p2p->ppi_sg[SCIF_PPI_APER],
+ p2p->sg_nentries[SCIF_PPI_APER],
+ DMA_BIDIRECTIONAL);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+ list_del(pos);
+ kfree(p2p);
+ }
+ }
+ }
+ mutex_unlock(&scif_info.conflock);
+}
diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h
new file mode 100644
index 000000000..6c0ed6783
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nodeqp.h
@@ -0,0 +1,183 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_NODEQP
+#define SCIF_NODEQP
+
+#include "scif_rb.h"
+#include "scif_peer_bus.h"
+
+#define SCIF_INIT 1 /* First message sent to the peer node for discovery */
+#define SCIF_EXIT 2 /* Last message from the peer informing intent to exit */
+#define SCIF_EXIT_ACK 3 /* Response to SCIF_EXIT message */
+#define SCIF_NODE_ADD 4 /* Tell Online nodes a new node exits */
+#define SCIF_NODE_ADD_ACK 5 /* Confirm to mgmt node sequence is finished */
+#define SCIF_NODE_ADD_NACK 6 /* SCIF_NODE_ADD failed */
+#define SCIF_NODE_REMOVE 7 /* Request to deactivate a SCIF node */
+#define SCIF_NODE_REMOVE_ACK 8 /* Response to a SCIF_NODE_REMOVE message */
+#define SCIF_CNCT_REQ 9 /* Phys addr of Request connection to a port */
+#define SCIF_CNCT_GNT 10 /* Phys addr of new Grant connection request */
+#define SCIF_CNCT_GNTACK 11 /* Error type Reject a connection request */
+#define SCIF_CNCT_GNTNACK 12 /* Error type Reject a connection request */
+#define SCIF_CNCT_REJ 13 /* Error type Reject a connection request */
+#define SCIF_DISCNCT 14 /* Notify peer that connection is being terminated */
+#define SCIF_DISCNT_ACK 15 /* Notify peer that connection is being terminated */
+#define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */
+#define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */
+#define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/
+#define SCIF_MAX_MSG SCIF_GET_NODE_INFO
+
+/*
+ * struct scifmsg - Node QP message format
+ *
+ * @src: Source information
+ * @dst: Destination information
+ * @uop: The message opcode
+ * @payload: Unique payload format for each message
+ */
+struct scifmsg {
+ struct scif_port_id src;
+ struct scif_port_id dst;
+ u32 uop;
+ u64 payload[4];
+} __packed;
+
+/*
+ * struct scif_qp - Node Queue Pair
+ *
+ * Interesting structure -- a little difficult because we can only
+ * write across the PCIe, so any r/w pointer we need to read is
+ * local. We only need to read the read pointer on the inbound_q
+ * and read the write pointer in the outbound_q
+ *
+ * @magic: Magic value to ensure the peer sees the QP correctly
+ * @outbound_q: The outbound ring buffer for sending messages
+ * @inbound_q: The inbound ring buffer for receiving messages
+ * @local_write: Local write index
+ * @local_read: Local read index
+ * @remote_qp: The remote queue pair
+ * @local_buf: DMA address of local ring buffer
+ * @local_qp: DMA address of the local queue pair data structure
+ * @remote_buf: DMA address of remote ring buffer
+ * @qp_state: QP state i.e. online or offline used for P2P
+ * @send_lock: synchronize access to outbound queue
+ * @recv_lock: Synchronize access to inbound queue
+ */
+struct scif_qp {
+ u64 magic;
+#define SCIFEP_MAGIC 0x5c1f000000005c1fULL
+ struct scif_rb outbound_q;
+ struct scif_rb inbound_q;
+
+ u32 local_write __aligned(64);
+ u32 local_read __aligned(64);
+ struct scif_qp *remote_qp;
+ dma_addr_t local_buf;
+ dma_addr_t local_qp;
+ dma_addr_t remote_buf;
+ u32 qp_state;
+#define SCIF_QP_OFFLINE 0xdead
+#define SCIF_QP_ONLINE 0xc0de
+ spinlock_t send_lock;
+ spinlock_t recv_lock;
+};
+
+/*
+ * struct scif_loopb_msg - An element in the loopback Node QP message list.
+ *
+ * @msg - The SCIF node QP message
+ * @list - link in the list of messages
+ */
+struct scif_loopb_msg {
+ struct scifmsg msg;
+ struct list_head list;
+};
+
+int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
+int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp);
+int scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp);
+int scif_setup_qp(struct scif_dev *scifdev);
+int scif_qp_response(phys_addr_t phys, struct scif_dev *dev);
+int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
+ int local_size, struct scif_dev *scifdev);
+int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
+ dma_addr_t phys, int local_size,
+ struct scif_dev *scifdev);
+int scif_setup_qp_connect_response(struct scif_dev *scifdev,
+ struct scif_qp *qp, u64 payload);
+int scif_setup_loopback_qp(struct scif_dev *scifdev);
+int scif_destroy_loopback_qp(struct scif_dev *scifdev);
+void scif_poll_qp_state(struct work_struct *work);
+void scif_qp_response_ack(struct work_struct *work);
+void scif_destroy_p2p(struct scif_dev *scifdev);
+void scif_send_exit(struct scif_dev *scifdev);
+static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev)
+{
+ struct scif_peer_dev *spdev;
+ struct device *spdev_ret;
+
+ rcu_read_lock();
+ spdev = rcu_dereference(scifdev->spdev);
+ if (spdev)
+ spdev_ret = get_device(&spdev->dev);
+ else
+ spdev_ret = ERR_PTR(-ENODEV);
+ rcu_read_unlock();
+ return spdev_ret;
+}
+
+static inline void scif_put_peer_dev(struct device *dev)
+{
+ put_device(dev);
+}
+#endif /* SCIF_NODEQP */
diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c
new file mode 100644
index 000000000..589ae9ad2
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_peer_bus.c
@@ -0,0 +1,124 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ */
+#include "scif_main.h"
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+
+static inline struct scif_peer_dev *
+dev_to_scif_peer(struct device *dev)
+{
+ return container_of(dev, struct scif_peer_dev, dev);
+}
+
+static inline struct scif_peer_driver *
+drv_to_scif_peer(struct device_driver *drv)
+{
+ return container_of(drv, struct scif_peer_driver, driver);
+}
+
+static int scif_peer_dev_match(struct device *dv, struct device_driver *dr)
+{
+ return !strncmp(dev_name(dv), dr->name, 4);
+}
+
+static int scif_peer_dev_probe(struct device *d)
+{
+ struct scif_peer_dev *dev = dev_to_scif_peer(d);
+ struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver);
+
+ return drv->probe(dev);
+}
+
+static int scif_peer_dev_remove(struct device *d)
+{
+ struct scif_peer_dev *dev = dev_to_scif_peer(d);
+ struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver);
+
+ drv->remove(dev);
+ return 0;
+}
+
+static struct bus_type scif_peer_bus = {
+ .name = "scif_peer_bus",
+ .match = scif_peer_dev_match,
+ .probe = scif_peer_dev_probe,
+ .remove = scif_peer_dev_remove,
+};
+
+int scif_peer_register_driver(struct scif_peer_driver *driver)
+{
+ driver->driver.bus = &scif_peer_bus;
+ return driver_register(&driver->driver);
+}
+
+void scif_peer_unregister_driver(struct scif_peer_driver *driver)
+{
+ driver_unregister(&driver->driver);
+}
+
+static void scif_peer_release_dev(struct device *d)
+{
+ struct scif_peer_dev *sdev = dev_to_scif_peer(d);
+ struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+ scif_cleanup_scifdev(scifdev);
+ kfree(sdev);
+}
+
+struct scif_peer_dev *
+scif_peer_register_device(struct scif_dev *scifdev)
+{
+ int ret;
+ struct scif_peer_dev *spdev;
+
+ spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
+ if (!spdev)
+ return ERR_PTR(-ENOMEM);
+
+ spdev->dev.parent = scifdev->sdev->dev.parent;
+ spdev->dev.release = scif_peer_release_dev;
+ spdev->dnode = scifdev->node;
+ spdev->dev.bus = &scif_peer_bus;
+
+ dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
+ /*
+ * device_register() causes the bus infrastructure to look for a
+ * matching driver.
+ */
+ ret = device_register(&spdev->dev);
+ if (ret)
+ goto free_spdev;
+ return spdev;
+free_spdev:
+ kfree(spdev);
+ return ERR_PTR(ret);
+}
+
+void scif_peer_unregister_device(struct scif_peer_dev *sdev)
+{
+ device_unregister(&sdev->dev);
+}
+
+int scif_peer_bus_init(void)
+{
+ return bus_register(&scif_peer_bus);
+}
+
+void scif_peer_bus_exit(void)
+{
+ bus_unregister(&scif_peer_bus);
+}
diff --git a/drivers/misc/mic/scif/scif_peer_bus.h b/drivers/misc/mic/scif/scif_peer_bus.h
new file mode 100644
index 000000000..33f0dbb30
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_peer_bus.h
@@ -0,0 +1,65 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ */
+#ifndef _SCIF_PEER_BUS_H_
+#define _SCIF_PEER_BUS_H_
+
+#include <linux/device.h>
+#include <linux/mic_common.h>
+
+/*
+ * Peer devices show up as PCIe devices for the mgmt node but not the cards.
+ * The mgmt node discovers all the cards on the PCIe bus and informs the other
+ * cards about their peers. Upon notification of a peer a node adds a peer
+ * device to the peer bus to maintain symmetry in the way devices are
+ * discovered across all nodes in the SCIF network.
+ */
+/**
+ * scif_peer_dev - representation of a peer SCIF device
+ * @dev: underlying device
+ * @dnode - The destination node which this device will communicate with.
+ */
+struct scif_peer_dev {
+ struct device dev;
+ u8 dnode;
+};
+
+/**
+ * scif_peer_driver - operations for a scif_peer I/O driver
+ * @driver: underlying device driver (populate name and owner).
+ * @id_table: the ids serviced by this driver.
+ * @probe: the function to call when a device is found. Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct scif_peer_driver {
+ struct device_driver driver;
+ const struct scif_peer_dev_id *id_table;
+
+ int (*probe)(struct scif_peer_dev *dev);
+ void (*remove)(struct scif_peer_dev *dev);
+};
+
+struct scif_dev;
+
+int scif_peer_register_driver(struct scif_peer_driver *driver);
+void scif_peer_unregister_driver(struct scif_peer_driver *driver);
+
+struct scif_peer_dev *scif_peer_register_device(struct scif_dev *sdev);
+void scif_peer_unregister_device(struct scif_peer_dev *sdev);
+
+int scif_peer_bus_init(void);
+void scif_peer_bus_exit(void);
+#endif /* _SCIF_PEER_BUS_H */
diff --git a/drivers/misc/mic/scif/scif_ports.c b/drivers/misc/mic/scif/scif_ports.c
new file mode 100644
index 000000000..594e18d27
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_ports.c
@@ -0,0 +1,124 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/idr.h>
+
+#include "scif_main.h"
+
+#define SCIF_PORT_COUNT 0x10000 /* Ports available */
+
+struct idr scif_ports;
+
+/*
+ * struct scif_port - SCIF port information
+ *
+ * @ref_cnt - Reference count since there can be multiple endpoints
+ * created via scif_accept(..) simultaneously using a port.
+ */
+struct scif_port {
+ int ref_cnt;
+};
+
+/**
+ * __scif_get_port - Reserve a specified port # for SCIF and add it
+ * to the global list.
+ * @port : port # to be reserved.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
+ * On memory allocation failure, returns -ENOMEM.
+ */
+static int __scif_get_port(int start, int end)
+{
+ int id;
+ struct scif_port *port = kzalloc(sizeof(*port), GFP_ATOMIC);
+
+ if (!port)
+ return -ENOMEM;
+ spin_lock(&scif_info.port_lock);
+ id = idr_alloc(&scif_ports, port, start, end, GFP_ATOMIC);
+ if (id >= 0)
+ port->ref_cnt++;
+ spin_unlock(&scif_info.port_lock);
+ return id;
+}
+
+/**
+ * scif_rsrv_port - Reserve a specified port # for SCIF.
+ * @port : port # to be reserved.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
+ * On memory allocation failure, returns -ENOMEM.
+ */
+int scif_rsrv_port(u16 port)
+{
+ return __scif_get_port(port, port + 1);
+}
+
+/**
+ * scif_get_new_port - Get and reserve any port # for SCIF in the range
+ * SCIF_PORT_RSVD + 1 to SCIF_PORT_COUNT - 1.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if no ports available.
+ * On memory allocation failure, returns -ENOMEM.
+ */
+int scif_get_new_port(void)
+{
+ return __scif_get_port(SCIF_PORT_RSVD + 1, SCIF_PORT_COUNT);
+}
+
+/**
+ * scif_get_port - Increment the reference count for a SCIF port
+ * @id : SCIF port
+ *
+ * @return : None
+ */
+void scif_get_port(u16 id)
+{
+ struct scif_port *port;
+
+ if (!id)
+ return;
+ spin_lock(&scif_info.port_lock);
+ port = idr_find(&scif_ports, id);
+ if (port)
+ port->ref_cnt++;
+ spin_unlock(&scif_info.port_lock);
+}
+
+/**
+ * scif_put_port - Release a reserved SCIF port
+ * @id : SCIF port to be released.
+ *
+ * @return : None
+ */
+void scif_put_port(u16 id)
+{
+ struct scif_port *port;
+
+ if (!id)
+ return;
+ spin_lock(&scif_info.port_lock);
+ port = idr_find(&scif_ports, id);
+ if (port) {
+ port->ref_cnt--;
+ if (!port->ref_cnt) {
+ idr_remove(&scif_ports, id);
+ kfree(port);
+ }
+ }
+ spin_unlock(&scif_info.port_lock);
+}
diff --git a/drivers/misc/mic/scif/scif_rb.c b/drivers/misc/mic/scif/scif_rb.c
new file mode 100644
index 000000000..637cc4686
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rb.c
@@ -0,0 +1,249 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/circ_buf.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/errno.h>
+
+#include "scif_rb.h"
+
+#define scif_rb_ring_cnt(head, tail, size) CIRC_CNT(head, tail, size)
+#define scif_rb_ring_space(head, tail, size) CIRC_SPACE(head, tail, size)
+
+/**
+ * scif_rb_init - Initializes the ring buffer
+ * @rb: ring buffer
+ * @read_ptr: A pointer to the read offset
+ * @write_ptr: A pointer to the write offset
+ * @rb_base: A pointer to the base of the ring buffer
+ * @size: The size of the ring buffer in powers of two
+ */
+void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
+ void *rb_base, u8 size)
+{
+ rb->rb_base = rb_base;
+ rb->size = (1 << size);
+ rb->read_ptr = read_ptr;
+ rb->write_ptr = write_ptr;
+ rb->current_read_offset = *read_ptr;
+ rb->current_write_offset = *write_ptr;
+}
+
+/* Copies a message to the ring buffer -- handles the wrap around case */
+static void memcpy_torb(struct scif_rb *rb, void *header,
+ void *msg, u32 size)
+{
+ u32 size1, size2;
+
+ if (header + size >= rb->rb_base + rb->size) {
+ /* Need to call two copies if it wraps around */
+ size1 = (u32)(rb->rb_base + rb->size - header);
+ size2 = size - size1;
+ memcpy_toio((void __iomem __force *)header, msg, size1);
+ memcpy_toio((void __iomem __force *)rb->rb_base,
+ msg + size1, size2);
+ } else {
+ memcpy_toio((void __iomem __force *)header, msg, size);
+ }
+}
+
+/* Copies a message from the ring buffer -- handles the wrap around case */
+static void memcpy_fromrb(struct scif_rb *rb, void *header,
+ void *msg, u32 size)
+{
+ u32 size1, size2;
+
+ if (header + size >= rb->rb_base + rb->size) {
+ /* Need to call two copies if it wraps around */
+ size1 = (u32)(rb->rb_base + rb->size - header);
+ size2 = size - size1;
+ memcpy_fromio(msg, (void __iomem __force *)header, size1);
+ memcpy_fromio(msg + size1,
+ (void __iomem __force *)rb->rb_base, size2);
+ } else {
+ memcpy_fromio(msg, (void __iomem __force *)header, size);
+ }
+}
+
+/**
+ * scif_rb_space - Query space available for writing to the RB
+ * @rb: ring buffer
+ *
+ * Return: size available for writing to RB in bytes.
+ */
+u32 scif_rb_space(struct scif_rb *rb)
+{
+ rb->current_read_offset = *rb->read_ptr;
+ /*
+ * Update from the HW read pointer only once the peer has exposed the
+ * new empty slot. This barrier is paired with the memory barrier
+ * scif_rb_update_read_ptr()
+ */
+ mb();
+ return scif_rb_ring_space(rb->current_write_offset,
+ rb->current_read_offset, rb->size);
+}
+
+/**
+ * scif_rb_write - Write a message to the RB
+ * @rb: ring buffer
+ * @msg: buffer to send the message. Must be at least size bytes long
+ * @size: the size (in bytes) to be copied to the RB
+ *
+ * This API does not block if there isn't enough space in the RB.
+ * Returns: 0 on success or -ENOMEM on failure
+ */
+int scif_rb_write(struct scif_rb *rb, void *msg, u32 size)
+{
+ void *header;
+
+ if (scif_rb_space(rb) < size)
+ return -ENOMEM;
+ header = rb->rb_base + rb->current_write_offset;
+ memcpy_torb(rb, header, msg, size);
+ /*
+ * Wait until scif_rb_commit(). Update the local ring
+ * buffer data, not the shared data until commit.
+ */
+ rb->current_write_offset =
+ (rb->current_write_offset + size) & (rb->size - 1);
+ return 0;
+}
+
+/**
+ * scif_rb_commit - To submit the message to let the peer fetch it
+ * @rb: ring buffer
+ */
+void scif_rb_commit(struct scif_rb *rb)
+{
+ /*
+ * We must ensure ordering between the all the data committed
+ * previously before we expose the new message to the peer by
+ * updating the write_ptr. This write barrier is paired with
+ * the read barrier in scif_rb_count(..)
+ */
+ wmb();
+ ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset;
+#ifdef CONFIG_INTEL_MIC_CARD
+ /*
+ * X100 Si bug: For the case where a Core is performing an EXT_WR
+ * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+ * same address with the same data before it does the Doorbell Write.
+ * This way, if ordering is violated for the Interrupt Message, it will
+ * fall just behind the first Posted associated with the first EXT_WR.
+ */
+ ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset;
+#endif
+}
+
+/**
+ * scif_rb_get - To get next message from the ring buffer
+ * @rb: ring buffer
+ * @size: Number of bytes to be read
+ *
+ * Return: NULL if no bytes to be read from the ring buffer, otherwise the
+ * pointer to the next byte
+ */
+static void *scif_rb_get(struct scif_rb *rb, u32 size)
+{
+ void *header = NULL;
+
+ if (scif_rb_count(rb, size) >= size)
+ header = rb->rb_base + rb->current_read_offset;
+ return header;
+}
+
+/*
+ * scif_rb_get_next - Read from ring buffer.
+ * @rb: ring buffer
+ * @msg: buffer to hold the message. Must be at least size bytes long
+ * @size: Number of bytes to be read
+ *
+ * Return: number of bytes read if available bytes are >= size, otherwise
+ * returns zero.
+ */
+u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size)
+{
+ void *header = NULL;
+ int read_size = 0;
+
+ header = scif_rb_get(rb, size);
+ if (header) {
+ u32 next_cmd_offset =
+ (rb->current_read_offset + size) & (rb->size - 1);
+
+ read_size = size;
+ rb->current_read_offset = next_cmd_offset;
+ memcpy_fromrb(rb, header, msg, size);
+ }
+ return read_size;
+}
+
+/**
+ * scif_rb_update_read_ptr
+ * @rb: ring buffer
+ */
+void scif_rb_update_read_ptr(struct scif_rb *rb)
+{
+ u32 new_offset;
+
+ new_offset = rb->current_read_offset;
+ /*
+ * We must ensure ordering between the all the data committed or read
+ * previously before we expose the empty slot to the peer by updating
+ * the read_ptr. This barrier is paired with the memory barrier in
+ * scif_rb_space(..)
+ */
+ mb();
+ ACCESS_ONCE(*rb->read_ptr) = new_offset;
+#ifdef CONFIG_INTEL_MIC_CARD
+ /*
+ * X100 Si Bug: For the case where a Core is performing an EXT_WR
+ * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+ * same address with the same data before it does the Doorbell Write.
+ * This way, if ordering is violated for the Interrupt Message, it will
+ * fall just behind the first Posted associated with the first EXT_WR.
+ */
+ ACCESS_ONCE(*rb->read_ptr) = new_offset;
+#endif
+}
+
+/**
+ * scif_rb_count
+ * @rb: ring buffer
+ * @size: Number of bytes expected to be read
+ *
+ * Return: number of bytes that can be read from the RB
+ */
+u32 scif_rb_count(struct scif_rb *rb, u32 size)
+{
+ if (scif_rb_ring_cnt(rb->current_write_offset,
+ rb->current_read_offset,
+ rb->size) < size) {
+ rb->current_write_offset = *rb->write_ptr;
+ /*
+ * Update from the HW write pointer if empty only once the peer
+ * has exposed the new message. This read barrier is paired
+ * with the write barrier in scif_rb_commit(..)
+ */
+ smp_rmb();
+ }
+ return scif_rb_ring_cnt(rb->current_write_offset,
+ rb->current_read_offset,
+ rb->size);
+}
diff --git a/drivers/misc/mic/scif/scif_rb.h b/drivers/misc/mic/scif/scif_rb.h
new file mode 100644
index 000000000..166dffe30
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rb.h
@@ -0,0 +1,100 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ */
+#ifndef SCIF_RB_H
+#define SCIF_RB_H
+/*
+ * This file describes a general purpose, byte based ring buffer. Writers to the
+ * ring buffer need to synchronize using a lock. The same is true for readers,
+ * although in practice, the ring buffer has a single reader. It is lockless
+ * between producer and consumer so it can handle being used across the PCIe
+ * bus. The ring buffer ensures that there are no reads across the PCIe bus for
+ * performance reasons. Two of these are used to form a single bidirectional
+ * queue-pair across PCIe.
+ */
+/*
+ * struct scif_rb - SCIF Ring Buffer
+ *
+ * @rb_base: The base of the memory used for storing RB messages
+ * @read_ptr: Pointer to the read offset
+ * @write_ptr: Pointer to the write offset
+ * @size: Size of the memory in rb_base
+ * @current_read_offset: Cached read offset for performance
+ * @current_write_offset: Cached write offset for performance
+ */
+struct scif_rb {
+ void *rb_base;
+ u32 *read_ptr;
+ u32 *write_ptr;
+ u32 size;
+ u32 current_read_offset;
+ u32 current_write_offset;
+};
+
+/* methods used by both */
+void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
+ void *rb_base, u8 size);
+/* writer only methods */
+/* write a new command, then scif_rb_commit() */
+int scif_rb_write(struct scif_rb *rb, void *msg, u32 size);
+/* after write(), then scif_rb_commit() */
+void scif_rb_commit(struct scif_rb *rb);
+/* query space available for writing to a RB. */
+u32 scif_rb_space(struct scif_rb *rb);
+
+/* reader only methods */
+/* read a new message from the ring buffer of size bytes */
+u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size);
+/* update the read pointer so that the space can be reused */
+void scif_rb_update_read_ptr(struct scif_rb *rb);
+/* count the number of bytes that can be read */
+u32 scif_rb_count(struct scif_rb *rb, u32 size);
+#endif