Linux-libre 4.4-gnupck-4.4-gnu

author: André Fabian Silva Delgado <emulatorman@parabola.nu> 2016-01-20 14:01:31 -0300
committer: André Fabian Silva Delgado <emulatorman@parabola.nu> 2016-01-20 14:01:31 -0300
commit: b4b7ff4b08e691656c9d77c758fc355833128ac0 (patch)
tree: 82fcb00e6b918026dc9f2d1f05ed8eee83874cc0 /drivers/misc/mic/scif
parent: 35acfa0fc609f2a2cd95cef4a6a9c3a5c38f1778 (diff)
21 files changed, 6956 insertions, 263 deletions
diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile
index bf10bb7e2..29cfc3e51 100644
--- a/drivers/misc/mic/scif/Makefile
+++ b/drivers/misc/mic/scif/Makefile
@@ -13,3 +13,8 @@ scif-objs += scif_epd.o
 scif-objs += scif_rb.o
 scif-objs += scif_nodeqp.o
 scif-objs += scif_nm.o
+scif-objs += scif_dma.o
+scif-objs += scif_fence.o
+scif-objs += scif_mmap.o
+scif-objs += scif_rma.o
+scif-objs += scif_rma_list.o
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
index f39d3135a..ddc9e4b08 100644
--- a/drivers/misc/mic/scif/scif_api.c
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -37,9 +37,21 @@ enum conn_async_state {
 	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
 };
 
+/*
+ * File operations for anonymous inode file associated with a SCIF endpoint,
+ * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
+ * poll API in the kernel and these take in a struct file *. Since a struct
+ * file is not available to kernel mode SCIF, it uses an anonymous file for
+ * this purpose.
+ */
+const struct file_operations scif_anon_fops = {
+	.owner = THIS_MODULE,
+};
+
 scif_epd_t scif_open(void)
 {
 	struct scif_endpt *ep;
+	int err;
 
 	might_sleep();
 	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
@@ -50,15 +62,22 @@ scif_epd_t scif_open(void)
 	if (!ep->qp_info.qp)
 		goto err_qp_alloc;
 
+	err = scif_anon_inode_getfile(ep);
+	if (err)
+		goto err_anon_inode;
+
 	spin_lock_init(&ep->lock);
 	mutex_init(&ep->sendlock);
 	mutex_init(&ep->recvlock);
 
+	scif_rma_ep_init(ep);
 	ep->state = SCIFEP_UNBOUND;
 	dev_dbg(scif_info.mdev.this_device,
 		"SCIFAPI open: ep %p success\n", ep);
 	return ep;
 
+err_anon_inode:
+	kfree(ep->qp_info.qp);
 err_qp_alloc:
 	kfree(ep);
 err_ep_alloc:
@@ -166,8 +185,11 @@ int scif_close(scif_epd_t epd)
 
 	switch (oldstate) {
 	case SCIFEP_ZOMBIE:
+		dev_err(scif_info.mdev.this_device,
+			"SCIFAPI close: zombie state unexpected\n");
 	case SCIFEP_DISCONNECTED:
 		spin_unlock(&ep->lock);
+		scif_unregister_all_windows(epd);
 		/* Remove from the disconnected list */
 		mutex_lock(&scif_info.connlock);
 		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
@@ -189,6 +211,7 @@ int scif_close(scif_epd_t epd)
 	case SCIFEP_CLOSING:
 	{
 		spin_unlock(&ep->lock);
+		scif_unregister_all_windows(epd);
 		scif_disconnect_ep(ep);
 		break;
 	}
@@ -200,7 +223,7 @@ int scif_close(scif_epd_t epd)
 		struct scif_endpt *aep;
 
 		spin_unlock(&ep->lock);
-		spin_lock(&scif_info.eplock);
+		mutex_lock(&scif_info.eplock);
 
 		/* remove from listen list */
 		list_for_each_safe(pos, tmpq, &scif_info.listen) {
@@ -222,7 +245,7 @@ int scif_close(scif_epd_t epd)
 					break;
 				}
 			}
-			spin_unlock(&scif_info.eplock);
+			mutex_unlock(&scif_info.eplock);
 			mutex_lock(&scif_info.connlock);
 			list_for_each_safe(pos, tmpq, &scif_info.connected) {
 				tmpep = list_entry(pos,
@@ -242,13 +265,13 @@ int scif_close(scif_epd_t epd)
 			}
 			mutex_unlock(&scif_info.connlock);
 			scif_teardown_ep(aep);
-			spin_lock(&scif_info.eplock);
+			mutex_lock(&scif_info.eplock);
 			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
 			ep->acceptcnt--;
 		}
 
 		spin_lock(&ep->lock);
-		spin_unlock(&scif_info.eplock);
+		mutex_unlock(&scif_info.eplock);
 
 		/* Remove and reject any pending connection requests. */
 		while (ep->conreqcnt) {
@@ -279,6 +302,7 @@ int scif_close(scif_epd_t epd)
 	}
 	}
 	scif_put_port(ep->port.port);
+	scif_anon_inode_fput(ep);
 	scif_teardown_ep(ep);
 	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
 	return 0;
@@ -409,9 +433,9 @@ int scif_listen(scif_epd_t epd, int backlog)
 	scif_teardown_ep(ep);
 	ep->qp_info.qp = NULL;
 
-	spin_lock(&scif_info.eplock);
+	mutex_lock(&scif_info.eplock);
 	list_add_tail(&ep->list, &scif_info.listen);
-	spin_unlock(&scif_info.eplock);
+	mutex_unlock(&scif_info.eplock);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(scif_listen);
@@ -450,6 +474,13 @@ static int scif_conn_func(struct scif_endpt *ep)
 	struct scifmsg msg;
 	struct device *spdev;
 
+	err = scif_reserve_dma_chan(ep);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		ep->state = SCIFEP_BOUND;
+		goto connect_error_simple;
+	}
 	/* Initiate the first part of the endpoint QP setup */
 	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
 				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
@@ -558,8 +589,10 @@ void scif_conn_handler(struct work_struct *work)
 			list_del(&ep->conn_list);
 		}
 		spin_unlock(&scif_info.nb_connect_lock);
-		if (ep)
+		if (ep) {
 			ep->conn_err = scif_conn_func(ep);
+			wake_up_interruptible(&ep->conn_pend_wq);
+		}
 	} while (ep);
 }
 
@@ -660,6 +693,7 @@ int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
 	ep->remote_dev = &scif_dev[dst->node];
 	ep->qp_info.qp->magic = SCIFEP_MAGIC;
 	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		init_waitqueue_head(&ep->conn_pend_wq);
 		spin_lock(&scif_info.nb_connect_lock);
 		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
 		spin_unlock(&scif_info.nb_connect_lock);
@@ -782,12 +816,25 @@ retry_connection:
 	cep->remote_dev = &scif_dev[peer->node];
 	cep->remote_ep = conreq->msg.payload[0];
 
+	scif_rma_ep_init(cep);
+
+	err = scif_reserve_dma_chan(cep);
+	if (err) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto scif_accept_error_qpalloc;
+	}
+
 	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
 	if (!cep->qp_info.qp) {
 		err = -ENOMEM;
 		goto scif_accept_error_qpalloc;
 	}
 
+	err = scif_anon_inode_getfile(cep);
+	if (err)
+		goto scif_accept_error_anon_inode;
+
 	cep->qp_info.qp->magic = SCIFEP_MAGIC;
 	spdev = scif_get_peer_dev(cep->remote_dev);
 	if (IS_ERR(spdev)) {
@@ -858,6 +905,8 @@ retry:
 	spin_unlock(&cep->lock);
 	return 0;
 scif_accept_error_map:
+	scif_anon_inode_fput(cep);
+scif_accept_error_anon_inode:
 	scif_teardown_ep(cep);
 scif_accept_error_qpalloc:
 	kfree(cep);
@@ -1247,6 +1296,134 @@ int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
 }
 EXPORT_SYMBOL_GPL(scif_recv);
 
+static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
+				   poll_table *p, struct scif_endpt *ep)
+{
+	/*
+	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
+	 * and regrab it afterwards. Because the endpoint state might have
+	 * changed while the lock was given up, the state must be checked
+	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
+	 * does this.
+	 */
+	spin_unlock(&ep->lock);
+	poll_wait(f, wq, p);
+	spin_lock(&ep->lock);
+}
+
+unsigned int
+__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
+{
+	unsigned int mask = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	spin_lock(&ep->lock);
+
+	/* Endpoint is waiting for a non-blocking connect to complete */
+	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+			if (ep->state == SCIFEP_CONNECTED ||
+			    ep->state == SCIFEP_DISCONNECTED ||
+			    ep->conn_err)
+				mask |= POLLOUT;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is listening for incoming connection requests */
+	if (ep->state == SCIFEP_LISTENING) {
+		_scif_poll_wait(f, &ep->conwq, wait, ep);
+		if (ep->state == SCIFEP_LISTENING) {
+			if (ep->conreqcnt)
+				mask |= POLLIN;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is connected or disconnected */
+	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
+		if (poll_requested_events(wait) & POLLIN)
+			_scif_poll_wait(f, &ep->recvwq, wait, ep);
+		if (poll_requested_events(wait) & POLLOUT)
+			_scif_poll_wait(f, &ep->sendwq, wait, ep);
+		if (ep->state == SCIFEP_CONNECTED ||
+		    ep->state == SCIFEP_DISCONNECTED) {
+			/* Data can be read without blocking */
+			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
+				mask |= POLLIN;
+			/* Data can be written without blocking */
+			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
+				mask |= POLLOUT;
+			/* Return POLLHUP if endpoint is disconnected */
+			if (ep->state == SCIFEP_DISCONNECTED)
+				mask |= POLLHUP;
+			goto exit;
+		}
+	}
+
+	/* Return POLLERR if the endpoint is in none of the above states */
+	mask |= POLLERR;
+exit:
+	spin_unlock(&ep->lock);
+	return mask;
+}
+
+/**
+ * scif_poll() - Kernel mode SCIF poll
+ * @ufds: Array of scif_pollepd structures containing the end points
+ *	  and events to poll on
+ * @nfds: Size of the ufds array
+ * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
+ *
+ * The code flow in this function is based on do_poll(..) in select.c
+ *
+ * Returns the number of endpoints which have pending events or 0 in
+ * the event of a timeout. If a signal is used for wake up, -EINTR is
+ * returned.
+ */
+int
+scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
+{
+	struct poll_wqueues table;
+	poll_table *pt;
+	int i, mask, count = 0, timed_out = timeout_msecs == 0;
+	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
+		: msecs_to_jiffies(timeout_msecs);
+
+	poll_initwait(&table);
+	pt = &table.pt;
+	while (1) {
+		for (i = 0; i < nfds; i++) {
+			pt->_key = ufds[i].events | POLLERR | POLLHUP;
+			mask = __scif_pollfd(ufds[i].epd->anon,
+					     pt, ufds[i].epd);
+			mask &= ufds[i].events | POLLERR | POLLHUP;
+			if (mask) {
+				count++;
+				pt->_qproc = NULL;
+			}
+			ufds[i].revents = mask;
+		}
+		pt->_qproc = NULL;
+		if (!count) {
+			count = table.error;
+			if (signal_pending(current))
+				count = -EINTR;
+		}
+		if (count || timed_out)
+			break;
+
+		if (!schedule_timeout_interruptible(timeout))
+			timed_out = 1;
+	}
+	poll_freewait(&table);
+	return count;
+}
+EXPORT_SYMBOL_GPL(scif_poll);
+
 int scif_get_node_ids(u16 *nodes, int len, u16 *self)
 {
 	int online = 0;
@@ -1274,3 +1451,46 @@ int scif_get_node_ids(u16 *nodes, int len, u16 *self)
 	return online;
 }
 EXPORT_SYMBOL_GPL(scif_get_node_ids);
+
+static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
+{
+	struct scif_client *client =
+		container_of(si, struct scif_client, si);
+	struct scif_peer_dev *spdev =
+		container_of(dev, struct scif_peer_dev, dev);
+
+	if (client->probe)
+		client->probe(spdev);
+	return 0;
+}
+
+static void scif_remove_client_dev(struct device *dev,
+				   struct subsys_interface *si)
+{
+	struct scif_client *client =
+		container_of(si, struct scif_client, si);
+	struct scif_peer_dev *spdev =
+		container_of(dev, struct scif_peer_dev, dev);
+
+	if (client->remove)
+		client->remove(spdev);
+}
+
+void scif_client_unregister(struct scif_client *client)
+{
+	subsys_interface_unregister(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_unregister);
+
+int scif_client_register(struct scif_client *client)
+{
+	struct subsys_interface *si = &client->si;
+
+	si->name = client->name;
+	si->subsys = &scif_peer_bus;
+	si->add_dev = scif_add_client_dev;
+	si->remove_dev = scif_remove_client_dev;
+
+	return subsys_interface_register(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_register);
diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c
index 51f14e2a1..6884dad97 100644
--- a/drivers/misc/mic/scif/scif_debugfs.c
+++ b/drivers/misc/mic/scif/scif_debugfs.c
@@ -62,10 +62,87 @@ static const struct file_operations scif_dev_ops = {
 	.release = scif_dev_test_release
 };
 
-void __init scif_init_debugfs(void)
+static void scif_display_window(struct scif_window *window, struct seq_file *s)
+{
+	int j;
+	struct scatterlist *sg;
+	scif_pinned_pages_t pin = window->pinned_pages;
+
+	seq_printf(s, "window %p type %d temp %d offset 0x%llx ",
+		   window, window->type, window->temp, window->offset);
+	seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ",
+		   window->nr_pages, window->nr_contig_chunks, window->prot);
+	seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ",
+		   window->ref_count, window->magic, window->peer_window);
+	seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n",
+		   window->unreg_state, window->va_for_temp);
+
+	for (j = 0; j < window->nr_contig_chunks; j++)
+		seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j,
+			   window->dma_addr[j], window->num_pages[j]);
+
+	if (window->type == SCIF_WINDOW_SELF && pin)
+		for (j = 0; j < window->nr_pages; j++)
+			seq_printf(s, "page[%d] = pinned_pages %p address %p\n",
+				   j, pin->pages[j],
+				   page_address(pin->pages[j]));
+
+	if (window->st)
+		for_each_sg(window->st->sgl, sg, window->st->nents, j)
+			seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n",
+				   j, sg_dma_address(sg), sg_dma_len(sg));
+}
+
+static void scif_display_all_windows(struct list_head *head, struct seq_file *s)
 {
-	struct dentry *d;
+	struct list_head *item;
+	struct scif_window *window;
 
+	list_for_each(item, head) {
+		window = list_entry(item, struct scif_window, list);
+		scif_display_window(window, s);
+	}
+}
+
+static int scif_rma_test(struct seq_file *s, void *unused)
+{
+	struct scif_endpt *ep;
+	struct list_head *pos;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each(pos, &scif_info.connected) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		seq_printf(s, "ep %p self windows\n", ep);
+		mutex_lock(&ep->rma_info.rma_lock);
+		scif_display_all_windows(&ep->rma_info.reg_list, s);
+		seq_printf(s, "ep %p remote windows\n", ep);
+		scif_display_all_windows(&ep->rma_info.remote_reg_list, s);
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+	mutex_unlock(&scif_info.connlock);
+	return 0;
+}
+
+static int scif_rma_test_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_rma_test, inode->i_private);
+}
+
+static int scif_rma_test_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations scif_rma_ops = {
+	.owner   = THIS_MODULE,
+	.open    = scif_rma_test_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = scif_rma_test_release
+};
+
+void __init scif_init_debugfs(void)
+{
 	scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
 	if (!scif_dbg) {
 		dev_err(scif_info.mdev.this_device,
@@ -73,8 +150,8 @@ void __init scif_init_debugfs(void)
 		return;
 	}
 
-	d = debugfs_create_file("scif_dev", 0444, scif_dbg,
-				NULL, &scif_dev_ops);
+	debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops);
+	debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops);
 	debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log);
 	debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable);
 }
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
new file mode 100644
index 000000000..95a13c629
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -0,0 +1,1979 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include "scif_map.h"
+
+/*
+ * struct scif_dma_comp_cb - SCIF DMA completion callback
+ *
+ * @dma_completion_func: DMA completion callback
+ * @cb_cookie: DMA completion callback cookie
+ * @temp_buf: Temporary buffer
+ * @temp_buf_to_free: Temporary buffer to be freed
+ * @is_cache: Is a kmem_cache allocated buffer
+ * @dst_offset: Destination registration offset
+ * @dst_window: Destination registration window
+ * @len: Length of the temp buffer
+ * @temp_phys: DMA address of the temp buffer
+ * @sdev: The SCIF device
+ * @header_padding: padding for cache line alignment
+ */
+struct scif_dma_comp_cb {
+	void (*dma_completion_func)(void *cookie);
+	void *cb_cookie;
+	u8 *temp_buf;
+	u8 *temp_buf_to_free;
+	bool is_cache;
+	s64 dst_offset;
+	struct scif_window *dst_window;
+	size_t len;
+	dma_addr_t temp_phys;
+	struct scif_dev *sdev;
+	int header_padding;
+};
+
+/**
+ * struct scif_copy_work - Work for DMA copy
+ *
+ * @src_offset: Starting source offset
+ * @dst_offset: Starting destination offset
+ * @src_window: Starting src registered window
+ * @dst_window: Starting dst registered window
+ * @loopback: true if this is a loopback DMA transfer
+ * @len: Length of the transfer
+ * @comp_cb: DMA copy completion callback
+ * @remote_dev: The remote SCIF peer device
+ * @fence_type: polling or interrupt based
+ * @ordered: is this a tail byte ordered DMA transfer
+ */
+struct scif_copy_work {
+	s64 src_offset;
+	s64 dst_offset;
+	struct scif_window *src_window;
+	struct scif_window *dst_window;
+	int loopback;
+	size_t len;
+	struct scif_dma_comp_cb   *comp_cb;
+	struct scif_dev	*remote_dev;
+	int fence_type;
+	bool ordered;
+};
+
+#ifndef list_entry_next
+#define list_entry_next(pos, member) \
+	list_entry(pos->member.next, typeof(*pos), member)
+#endif
+
+/**
+ * scif_reserve_dma_chan:
+ * @ep: Endpoint Descriptor.
+ *
+ * This routine reserves a DMA channel for a particular
+ * endpoint. All DMA transfers for an endpoint are always
+ * programmed on the same DMA channel.
+ */
+int scif_reserve_dma_chan(struct scif_endpt *ep)
+{
+	int err = 0;
+	struct scif_dev *scifdev;
+	struct scif_hw_dev *sdev;
+	struct dma_chan *chan;
+
+	/* Loopback DMAs are not supported on the management node */
+	if (!scif_info.nodeid && scifdev_self(ep->remote_dev))
+		return 0;
+	if (scif_info.nodeid)
+		scifdev = &scif_dev[0];
+	else
+		scifdev = ep->remote_dev;
+	sdev = scifdev->sdev;
+	if (!sdev->num_dma_ch)
+		return -ENODEV;
+	chan = sdev->dma_ch[scifdev->dma_ch_idx];
+	scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch;
+	mutex_lock(&ep->rma_info.rma_lock);
+	ep->rma_info.dma_chan = chan;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return err;
+}
+
+#ifdef CONFIG_MMU_NOTIFIER
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached windows
+ */
+static
+void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
+			    struct scif_endpt *ep,
+			    u64 start, u64 len)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	u64 start_va, end_va;
+	u64 end = start + len;
+
+	if (end <= start)
+		return;
+
+	list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
+		window = list_entry(item, struct scif_window, list);
+		ep = (struct scif_endpt *)window->ep;
+		if (!len)
+			break;
+		start_va = window->va_for_temp;
+		end_va = start_va + (window->nr_pages << PAGE_SHIFT);
+		if (start < start_va && end <= start_va)
+			break;
+		if (start >= end_va)
+			continue;
+		__scif_rma_destroy_tcw_helper(window);
+	}
+}
+
+static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len)
+{
+	struct scif_endpt *ep = mmn->ep;
+
+	spin_lock(&ep->rma_info.tc_lock);
+	__scif_rma_destroy_tcw(mmn, ep, start, len);
+	spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+	struct list_head *item, *tmp;
+	struct scif_mmu_notif *mmn;
+
+	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+	}
+}
+
+static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+	struct list_head *item, *tmp;
+	struct scif_mmu_notif *mmn;
+
+	spin_lock(&ep->rma_info.tc_lock);
+	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		__scif_rma_destroy_tcw(mmn, ep, 0, ULONG_MAX);
+	}
+	spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+	if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit)
+		return false;
+	if ((atomic_read(&ep->rma_info.tcw_total_pages)
+			+ (cur_bytes >> PAGE_SHIFT)) >
+			scif_info.rma_tc_limit) {
+		dev_info(scif_info.mdev.this_device,
+			 "%s %d total=%d, current=%zu reached max\n",
+			 __func__, __LINE__,
+			 atomic_read(&ep->rma_info.tcw_total_pages),
+			 (1 + (cur_bytes >> PAGE_SHIFT)));
+		scif_rma_destroy_tcw_invalid();
+		__scif_rma_destroy_tcw_ep(ep);
+	}
+	return true;
+}
+
+static void scif_mmu_notifier_release(struct mmu_notifier *mn,
+				      struct mm_struct *mm)
+{
+	struct scif_mmu_notif	*mmn;
+
+	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+	scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+	schedule_work(&scif_info.misc_work);
+}
+
+static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
+					      struct mm_struct *mm,
+					      unsigned long address)
+{
+	struct scif_mmu_notif	*mmn;
+
+	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+	scif_rma_destroy_tcw(mmn, address, PAGE_SIZE);
+}
+
+static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+						     struct mm_struct *mm,
+						     unsigned long start,
+						     unsigned long end)
+{
+	struct scif_mmu_notif	*mmn;
+
+	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+	scif_rma_destroy_tcw(mmn, start, end - start);
+}
+
+static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+						   struct mm_struct *mm,
+						   unsigned long start,
+						   unsigned long end)
+{
+	/*
+	 * Nothing to do here, everything needed was done in
+	 * invalidate_range_start.
+	 */
+}
+
+static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
+	.release = scif_mmu_notifier_release,
+	.clear_flush_young = NULL,
+	.invalidate_page = scif_mmu_notifier_invalidate_page,
+	.invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
+	.invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
+
+static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep)
+{
+	struct scif_endpt_rma_info *rma = &ep->rma_info;
+	struct scif_mmu_notif *mmn = NULL;
+	struct list_head *item, *tmp;
+
+	mutex_lock(&ep->rma_info.mmn_lock);
+	list_for_each_safe(item, tmp, &rma->mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm);
+		list_del(item);
+		kfree(mmn);
+	}
+	mutex_unlock(&ep->rma_info.mmn_lock);
+}
+
+static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn,
+				   struct mm_struct *mm, struct scif_endpt *ep)
+{
+	mmn->ep = ep;
+	mmn->mm = mm;
+	mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops;
+	INIT_LIST_HEAD(&mmn->list);
+	INIT_LIST_HEAD(&mmn->tc_reg_list);
+}
+
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma)
+{
+	struct scif_mmu_notif *mmn;
+	struct list_head *item;
+
+	list_for_each(item, &rma->mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		if (mmn->mm == mm)
+			return mmn;
+	}
+	return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+	struct scif_mmu_notif *mmn
+		 = kzalloc(sizeof(*mmn), GFP_KERNEL);
+
+	if (!mmn)
+		return ERR_PTR(ENOMEM);
+
+	scif_init_mmu_notifier(mmn, current->mm, ep);
+	if (mmu_notifier_register(&mmn->ep_mmu_notifier,
+				  current->mm)) {
+		kfree(mmn);
+		return ERR_PTR(EBUSY);
+	}
+	list_add(&mmn->list, &ep->rma_info.mmn_list);
+	return mmn;
+}
+
+/*
+ * Called from the misc thread to destroy temporary cached windows and
+ * unregister the MMU notifier for the SCIF endpoint.
+ */
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+	struct list_head *pos, *tmpq;
+	struct scif_endpt *ep;
+restart:
+	scif_rma_destroy_tcw_invalid();
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) {
+		ep = list_entry(pos, struct scif_endpt, mmu_list);
+		list_del(&ep->mmu_list);
+		spin_unlock(&scif_info.rmalock);
+		scif_rma_destroy_tcw_ep(ep);
+		scif_ep_unregister_mmu_notifier(ep);
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+	return !!(flags & SCIF_RMA_USECACHE);
+}
+#else
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm,
+		       struct scif_endpt_rma_info *rma)
+{
+	return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+	return NULL;
+}
+
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+	return false;
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+	return false;
+}
+#endif
+
+/**
+ * scif_register_temp:
+ * @epd: End Point Descriptor.
+ * @addr: virtual address to/from which to copy
+ * @len: length of range to copy
+ * @out_offset: computed offset returned by reference.
+ * @out_window: allocated registered window returned by reference.
+ *
+ * Create a temporary registered window. The peer will not know about this
+ * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's.
+ */
+static int
+scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot,
+		   off_t *out_offset, struct scif_window **out_window)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err;
+	scif_pinned_pages_t pinned_pages;
+	size_t aligned_len;
+
+	aligned_len = ALIGN(len, PAGE_SIZE);
+
+	err = __scif_pin_pages((void *)(addr & PAGE_MASK),
+			       aligned_len, &prot, 0, &pinned_pages);
+	if (err)
+		return err;
+
+	pinned_pages->prot = prot;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, 0, 0,
+				     aligned_len >> PAGE_SHIFT,
+				     (s64 *)out_offset);
+	if (err)
+		goto error_unpin;
+
+	/* Allocate and prepare self registration window */
+	*out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT,
+					*out_offset, true);
+	if (!*out_window) {
+		scif_free_window_offset(ep, NULL, *out_offset);
+		err = -ENOMEM;
+		goto error_unpin;
+	}
+
+	(*out_window)->pinned_pages = pinned_pages;
+	(*out_window)->nr_pages = pinned_pages->nr_pages;
+	(*out_window)->prot = pinned_pages->prot;
+
+	(*out_window)->va_for_temp = addr & PAGE_MASK;
+	err = scif_map_window(ep->remote_dev, *out_window);
+	if (err) {
+		/* Something went wrong! Rollback */
+		scif_destroy_window(ep, *out_window);
+		*out_window = NULL;
+	} else {
+		*out_offset |= (addr - (*out_window)->va_for_temp);
+	}
+	return err;
+error_unpin:
+	if (err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	scif_unpin_pages(pinned_pages);
+	return err;
+}
+
+#define SCIF_DMA_TO (3 * HZ)
+
+/*
+ * scif_sync_dma - Program a DMA without an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * @sync_wait: Wait for DMA to complete?
+ *
+ * Return 0 on success and -errno on error.
+ */
+static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan,
+			 bool sync_wait)
+{
+	int err = 0;
+	struct dma_async_tx_descriptor *tx = NULL;
+	enum dma_ctrl_flags flags = DMA_PREP_FENCE;
+	dma_cookie_t cookie;
+	struct dma_device *ddev;
+
+	if (!chan) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	ddev = chan->device;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	if (!sync_wait) {
+		dma_async_issue_pending(chan);
+	} else {
+		if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) {
+			err = 0;
+		} else {
+			err = -EIO;
+			dev_err(&sdev->dev, "%s %d err %d\n",
+				__func__, __LINE__, err);
+		}
+	}
+release:
+	return err;
+}
+
+static void scif_dma_callback(void *arg)
+{
+	struct completion *done = (struct completion *)arg;
+
+	complete(done);
+}
+
+#define SCIF_DMA_SYNC_WAIT true
+#define SCIF_DMA_POLL BIT(0)
+#define SCIF_DMA_INTR BIT(1)
+
+/*
+ * scif_async_dma - Program a DMA with an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * Return 0 on success and -errno on error.
+ */
+static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	int err = 0;
+	struct dma_device *ddev;
+	struct dma_async_tx_descriptor *tx = NULL;
+	enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
+	DECLARE_COMPLETION_ONSTACK(done_wait);
+	dma_cookie_t cookie;
+	enum dma_status status;
+
+	if (!chan) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	ddev = chan->device;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	reinit_completion(&done_wait);
+	tx->callback = scif_dma_callback;
+	tx->callback_param = &done_wait;
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	dma_async_issue_pending(chan);
+
+	err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO);
+	if (!err) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	err = 0;
+	status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+	if (status != DMA_COMPLETE) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+release:
+	return err;
+}
+
+/*
+ * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular
+ * DMA channel via polling.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	if (!chan)
+		return -EINVAL;
+	return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT);
+}
+
+/*
+ * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular
+ * DMA channel via interrupt based blocking wait.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	if (!chan)
+		return -EINVAL;
+	return scif_async_dma(sdev, chan);
+}
+
+/**
+ * scif_rma_destroy_windows:
+ *
+ * This routine destroys all windows queued for cleanup
+ */
+void scif_rma_destroy_windows(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep;
+	struct dma_chan *chan;
+
+	might_sleep();
+restart:
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(item, tmp, &scif_info.rma) {
+		window = list_entry(item, struct scif_window,
+				    list);
+		ep = (struct scif_endpt *)window->ep;
+		chan = ep->rma_info.dma_chan;
+
+		list_del_init(&window->list);
+		spin_unlock(&scif_info.rmalock);
+		if (!chan || !scifdev_alive(ep) ||
+		    !scif_drain_dma_intr(ep->remote_dev->sdev,
+					 ep->rma_info.dma_chan))
+			/* Remove window from global list */
+			window->unreg_state = OP_COMPLETED;
+		else
+			dev_warn(&ep->remote_dev->sdev->dev,
+				 "DMA engine hung?\n");
+		if (window->unreg_state == OP_COMPLETED) {
+			if (window->type == SCIF_WINDOW_SELF)
+				scif_destroy_window(ep, window);
+			else
+				scif_destroy_remote_window(window);
+			atomic_dec(&ep->rma_info.tw_refcount);
+		}
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached registered windows
+ * which have been queued for cleanup.
+ */
+void scif_rma_destroy_tcw_invalid(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep;
+	struct dma_chan *chan;
+
+	might_sleep();
+restart:
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(item, tmp, &scif_info.rma_tc) {
+		window = list_entry(item, struct scif_window, list);
+		ep = (struct scif_endpt *)window->ep;
+		chan = ep->rma_info.dma_chan;
+		list_del_init(&window->list);
+		spin_unlock(&scif_info.rmalock);
+		mutex_lock(&ep->rma_info.rma_lock);
+		if (!chan || !scifdev_alive(ep) ||
+		    !scif_drain_dma_intr(ep->remote_dev->sdev,
+					 ep->rma_info.dma_chan)) {
+			atomic_sub(window->nr_pages,
+				   &ep->rma_info.tcw_total_pages);
+			scif_destroy_window(ep, window);
+			atomic_dec(&ep->rma_info.tcw_refcount);
+		} else {
+			dev_warn(&ep->remote_dev->sdev->dev,
+				 "DMA engine hung?\n");
+		}
+		mutex_unlock(&ep->rma_info.rma_lock);
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+static inline
+void *_get_local_va(off_t off, struct scif_window *window, size_t len)
+{
+	int page_nr = (off - window->offset) >> PAGE_SHIFT;
+	off_t page_off = off & ~PAGE_MASK;
+	void *va = NULL;
+
+	if (window->type == SCIF_WINDOW_SELF) {
+		struct page **pages = window->pinned_pages->pages;
+
+		va = page_address(pages[page_nr]) + page_off;
+	}
+	return va;
+}
+
+static inline
+void *ioremap_remote(off_t off, struct scif_window *window,
+		     size_t len, struct scif_dev *dev,
+		     struct scif_window_iter *iter)
+{
+	dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter);
+
+	/*
+	 * If the DMA address is not card relative then we need the DMA
+	 * addresses to be an offset into the bar. The aperture base was already
+	 * added so subtract it here since scif_ioremap is going to add it again
+	 */
+	if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+	    dev->sdev->aper && !dev->sdev->card_rel_da)
+		phys = phys - dev->sdev->aper->pa;
+	return scif_ioremap(phys, len, dev);
+}
+
+static inline void
+iounmap_remote(void *virt, size_t size, struct scif_copy_work *work)
+{
+	scif_iounmap(virt, size, work->remote_dev);
+}
+
+/*
+ * Takes care of ordering issue caused by
+ * 1. Hardware:  Only in the case of cpu copy from mgmt node to card
+ * because of WC memory.
+ * 2. Software: If memcpy reorders copy instructions for optimization.
+ * This could happen at both mgmt node and card.
+ */
+static inline void
+scif_ordered_memcpy_toio(char *dst, const char *src, size_t count)
+{
+	if (!count)
+		return;
+
+	memcpy_toio((void __iomem __force *)dst, src, --count);
+	/* Order the last byte with the previous stores */
+	wmb();
+	*(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_toio(char *dst, const char *src,
+					   size_t count, bool ordered)
+{
+	if (ordered)
+		scif_ordered_memcpy_toio(dst, src, count);
+	else
+		memcpy_toio((void __iomem __force *)dst, src, count);
+}
+
+static inline
+void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count)
+{
+	if (!count)
+		return;
+
+	memcpy_fromio(dst, (void __iomem __force *)src, --count);
+	/* Order the last byte with the previous loads */
+	rmb();
+	*(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_fromio(char *dst, const char *src,
+					     size_t count, bool ordered)
+{
+	if (ordered)
+		scif_ordered_memcpy_fromio(dst, src, count);
+	else
+		memcpy_fromio(dst, (void __iomem __force *)src, count);
+}
+
+#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0)
+
+/*
+ * scif_off_to_dma_addr:
+ * Obtain the dma_addr given the window and the offset.
+ * @window: Registered window.
+ * @off: Window offset.
+ * @nr_bytes: Return the number of contiguous bytes till next DMA addr index.
+ * @index: Return the index of the dma_addr array found.
+ * @start_off: start offset of index of the dma addr array found.
+ * The nr_bytes provides the callee an estimate of the maximum possible
+ * DMA xfer possible while the index/start_off provide faster lookups
+ * for the next iteration.
+ */
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+				size_t *nr_bytes, struct scif_window_iter *iter)
+{
+	int i, page_nr;
+	s64 start, end;
+	off_t page_off;
+
+	if (window->nr_pages == window->nr_contig_chunks) {
+		page_nr = (off - window->offset) >> PAGE_SHIFT;
+		page_off = off & ~PAGE_MASK;
+
+		if (nr_bytes)
+			*nr_bytes = PAGE_SIZE - page_off;
+		return window->dma_addr[page_nr] | page_off;
+	}
+	if (iter) {
+		i = iter->index;
+		start = iter->offset;
+	} else {
+		i =  0;
+		start =  window->offset;
+	}
+	for (; i < window->nr_contig_chunks; i++) {
+		end = start + (window->num_pages[i] << PAGE_SHIFT);
+		if (off >= start && off < end) {
+			if (iter) {
+				iter->index = i;
+				iter->offset = start;
+			}
+			if (nr_bytes)
+				*nr_bytes = end - off;
+			return (window->dma_addr[i] + (off - start));
+		}
+		start += (window->num_pages[i] << PAGE_SHIFT);
+	}
+	dev_err(scif_info.mdev.this_device,
+		"%s %d BUG. Addr not found? window %p off 0x%llx\n",
+		__func__, __LINE__, window, off);
+	return SCIF_RMA_ERROR_CODE;
+}
+
+/*
+ * Copy between rma window and temporary buffer
+ */
+static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window,
+				    u8 *temp, size_t rem_len, bool to_temp)
+{
+	void *window_virt;
+	size_t loop_len;
+	int offset_in_page;
+	s64 end_offset;
+
+	offset_in_page = offset & ~PAGE_MASK;
+	loop_len = PAGE_SIZE - offset_in_page;
+
+	if (rem_len < loop_len)
+		loop_len = rem_len;
+
+	window_virt = _get_local_va(offset, window, loop_len);
+	if (!window_virt)
+		return;
+	if (to_temp)
+		memcpy(temp, window_virt, loop_len);
+	else
+		memcpy(window_virt, temp, loop_len);
+
+	offset += loop_len;
+	temp += loop_len;
+	rem_len -= loop_len;
+
+	end_offset = window->offset +
+		(window->nr_pages << PAGE_SHIFT);
+	while (rem_len) {
+		if (offset == end_offset) {
+			window = list_entry_next(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		loop_len = min(PAGE_SIZE, rem_len);
+		window_virt = _get_local_va(offset, window, loop_len);
+		if (!window_virt)
+			return;
+		if (to_temp)
+			memcpy(temp, window_virt, loop_len);
+		else
+			memcpy(window_virt, temp, loop_len);
+		offset	+= loop_len;
+		temp	+= loop_len;
+		rem_len	-= loop_len;
+	}
+}
+
+/**
+ * scif_rma_completion_cb:
+ * @data: RMA cookie
+ *
+ * RMA interrupt completion callback.
+ */
+static void scif_rma_completion_cb(void *data)
+{
+	struct scif_dma_comp_cb *comp_cb = data;
+
+	/* Free DMA Completion CB. */
+	if (comp_cb->dst_window)
+		scif_rma_local_cpu_copy(comp_cb->dst_offset,
+					comp_cb->dst_window,
+					comp_cb->temp_buf +
+					comp_cb->header_padding,
+					comp_cb->len, false);
+	scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev,
+			  SCIF_KMEM_UNALIGNED_BUF_SIZE);
+	if (comp_cb->is_cache)
+		kmem_cache_free(unaligned_cache,
+				comp_cb->temp_buf_to_free);
+	else
+		kfree(comp_cb->temp_buf_to_free);
+}
+
+/* Copies between temporary buffer and offsets provided in work */
+static int
+scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work,
+				 u8 *temp, struct dma_chan *chan,
+				 bool src_local)
+{
+	struct scif_dma_comp_cb *comp_cb = work->comp_cb;
+	dma_addr_t window_dma_addr, temp_dma_addr;
+	dma_addr_t temp_phys = comp_cb->temp_phys;
+	size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
+	int offset_in_ca, ret = 0;
+	s64 end_offset, offset;
+	struct scif_window *window;
+	void *window_virt_addr;
+	size_t tail_len;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	if (src_local) {
+		offset = work->dst_offset;
+		window = work->dst_window;
+	} else {
+		offset = work->src_offset;
+		window = work->src_window;
+	}
+
+	offset_in_ca = offset & (L1_CACHE_BYTES - 1);
+	if (offset_in_ca) {
+		loop_len = L1_CACHE_BYTES - offset_in_ca;
+		loop_len = min(loop_len, remaining_len);
+		window_virt_addr = ioremap_remote(offset, window,
+						  loop_len,
+						  work->remote_dev,
+						  NULL);
+		if (!window_virt_addr)
+			return -ENOMEM;
+		if (src_local)
+			scif_unaligned_cpy_toio(window_virt_addr, temp,
+						loop_len,
+						work->ordered &&
+						!(remaining_len - loop_len));
+		else
+			scif_unaligned_cpy_fromio(temp, window_virt_addr,
+						  loop_len, work->ordered &&
+						  !(remaining_len - loop_len));
+		iounmap_remote(window_virt_addr, loop_len, work);
+
+		offset += loop_len;
+		temp += loop_len;
+		temp_phys += loop_len;
+		remaining_len -= loop_len;
+	}
+
+	offset_in_ca = offset & ~PAGE_MASK;
+	end_offset = window->offset +
+		(window->nr_pages << PAGE_SHIFT);
+
+	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+	remaining_len -= tail_len;
+	while (remaining_len) {
+		if (offset == end_offset) {
+			window = list_entry_next(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		if (scif_is_mgmt_node())
+			temp_dma_addr = temp_phys;
+		else
+			/* Fix if we ever enable IOMMU on the card */
+			temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
+		window_dma_addr = scif_off_to_dma_addr(window, offset,
+						       &nr_contig_bytes,
+						       NULL);
+		loop_len = min(nr_contig_bytes, remaining_len);
+		if (src_local) {
+			if (work->ordered && !tail_len &&
+			    !(remaining_len - loop_len) &&
+			    loop_len != L1_CACHE_BYTES) {
+				/*
+				 * Break up the last chunk of the transfer into
+				 * two steps. if there is no tail to guarantee
+				 * DMA ordering. SCIF_DMA_POLLING inserts
+				 * a status update descriptor in step 1 which
+				 * acts as a double sided synchronization fence
+				 * for the DMA engine to ensure that the last
+				 * cache line in step 2 is updated last.
+				 */
+				/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len -
+							    L1_CACHE_BYTES,
+							    DMA_PREP_FENCE);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+				offset += (loop_len - L1_CACHE_BYTES);
+				temp_dma_addr += (loop_len - L1_CACHE_BYTES);
+				window_dma_addr += (loop_len - L1_CACHE_BYTES);
+				remaining_len -= (loop_len - L1_CACHE_BYTES);
+				loop_len = remaining_len;
+
+				/* Step 2) DMA: L1_CACHE_BYTES */
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len, 0);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+			} else {
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len, 0);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+			}
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr,
+					window_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		}
+		if (ret < 0)
+			goto err;
+		offset += loop_len;
+		temp += loop_len;
+		temp_phys += loop_len;
+		remaining_len -= loop_len;
+		offset_in_ca = 0;
+	}
+	if (tail_len) {
+		if (offset == end_offset) {
+			window = list_entry_next(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		window_virt_addr = ioremap_remote(offset, window, tail_len,
+						  work->remote_dev,
+						  NULL);
+		if (!window_virt_addr)
+			return -ENOMEM;
+		/*
+		 * The CPU copy for the tail bytes must be initiated only once
+		 * previous DMA transfers for this endpoint have completed
+		 * to guarantee ordering.
+		 */
+		if (work->ordered) {
+			struct scif_dev *rdev = work->remote_dev;
+
+			ret = scif_drain_dma_intr(rdev->sdev, chan);
+			if (ret)
+				return ret;
+		}
+		if (src_local)
+			scif_unaligned_cpy_toio(window_virt_addr, temp,
+						tail_len, work->ordered);
+		else
+			scif_unaligned_cpy_fromio(temp, window_virt_addr,
+						  tail_len, work->ordered);
+		iounmap_remote(window_virt_addr, tail_len, work);
+	}
+	tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		ret = -ENOMEM;
+		return ret;
+	}
+	tx->callback = &scif_rma_completion_cb;
+	tx->callback_param = comp_cb;
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		ret = -ENOMEM;
+		return ret;
+	}
+	dma_async_issue_pending(chan);
+	return 0;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * _scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+					   struct dma_chan *chan)
+{
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	size_t loop_len, remaining_len, src_contig_bytes = 0;
+	size_t dst_contig_bytes = 0;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+	s64 end_src_offset, end_dst_offset;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	int ret = 0;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	remaining_len = work->len;
+
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+	end_src_offset = src_window->offset +
+		(src_window->nr_pages << PAGE_SHIFT);
+	end_dst_offset = dst_window->offset +
+		(dst_window->nr_pages << PAGE_SHIFT);
+	while (remaining_len) {
+		if (src_offset == end_src_offset) {
+			src_window = list_entry_next(src_window, list);
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(src_window, &src_win_iter);
+		}
+		if (dst_offset == end_dst_offset) {
+			dst_window = list_entry_next(dst_window, list);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(dst_window, &dst_win_iter);
+		}
+
+		/* compute dma addresses for transfer */
+		src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+						    &src_contig_bytes,
+						    &src_win_iter);
+		dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+						    &dst_contig_bytes,
+						    &dst_win_iter);
+		loop_len = min(src_contig_bytes, dst_contig_bytes);
+		loop_len = min(loop_len, remaining_len);
+		if (work->ordered && !(remaining_len - loop_len)) {
+			/*
+			 * Break up the last chunk of the transfer into two
+			 * steps to ensure that the last byte in step 2 is
+			 * updated last.
+			 */
+			/* Step 1) DMA: Body Length - 1 */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len - 1,
+							 DMA_PREP_FENCE);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			src_offset += (loop_len - 1);
+			dst_offset += (loop_len - 1);
+			src_dma_addr += (loop_len - 1);
+			dst_dma_addr += (loop_len - 1);
+			remaining_len -= (loop_len - 1);
+			loop_len = remaining_len;
+
+			/* Step 2) DMA: 1 BYTES */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+					src_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+					src_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+		}
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+	return ret;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+					  struct dma_chan *chan)
+{
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0;
+	size_t dst_contig_bytes = 0;
+	int src_cache_off;
+	s64 end_src_offset, end_dst_offset;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+	void *src_virt, *dst_virt;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	int ret = 0;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	remaining_len = work->len;
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+
+	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+	if (src_cache_off != 0) {
+		/* Head */
+		loop_len = L1_CACHE_BYTES - src_cache_off;
+		loop_len = min(loop_len, remaining_len);
+		src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+		dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!src_virt)
+			return -ENOMEM;
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!dst_virt) {
+			if (src_window->type != SCIF_WINDOW_SELF)
+				iounmap_remote(src_virt, loop_len, work);
+			return -ENOMEM;
+		}
+		if (src_window->type == SCIF_WINDOW_SELF)
+			scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+						remaining_len == loop_len ?
+						work->ordered : false);
+		else
+			scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len,
+						  remaining_len == loop_len ?
+						  work->ordered : false);
+		if (src_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(src_virt, loop_len, work);
+		if (dst_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(dst_virt, loop_len, work);
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+
+	end_src_offset = src_window->offset +
+		(src_window->nr_pages << PAGE_SHIFT);
+	end_dst_offset = dst_window->offset +
+		(dst_window->nr_pages << PAGE_SHIFT);
+	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+	remaining_len -= tail_len;
+	while (remaining_len) {
+		if (src_offset == end_src_offset) {
+			src_window = list_entry_next(src_window, list);
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(src_window, &src_win_iter);
+		}
+		if (dst_offset == end_dst_offset) {
+			dst_window = list_entry_next(dst_window, list);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(dst_window, &dst_win_iter);
+		}
+
+		/* compute dma addresses for transfer */
+		src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+						    &src_contig_bytes,
+						    &src_win_iter);
+		dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+						    &dst_contig_bytes,
+						    &dst_win_iter);
+		loop_len = min(src_contig_bytes, dst_contig_bytes);
+		loop_len = min(loop_len, remaining_len);
+		if (work->ordered && !tail_len &&
+		    !(remaining_len - loop_len)) {
+			/*
+			 * Break up the last chunk of the transfer into two
+			 * steps. if there is no tail to gurantee DMA ordering.
+			 * Passing SCIF_DMA_POLLING inserts a status update
+			 * descriptor in step 1 which acts as a double sided
+			 * synchronization fence for the DMA engine to ensure
+			 * that the last cache line in step 2 is updated last.
+			 */
+			/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len -
+							 L1_CACHE_BYTES,
+							 DMA_PREP_FENCE);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+			src_offset += (loop_len - L1_CACHE_BYTES);
+			dst_offset += (loop_len - L1_CACHE_BYTES);
+			src_dma_addr += (loop_len - L1_CACHE_BYTES);
+			dst_dma_addr += (loop_len - L1_CACHE_BYTES);
+			remaining_len -= (loop_len - L1_CACHE_BYTES);
+			loop_len = remaining_len;
+
+			/* Step 2) DMA: L1_CACHE_BYTES */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		}
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+	remaining_len = tail_len;
+	if (remaining_len) {
+		loop_len = remaining_len;
+		if (src_offset == end_src_offset)
+			src_window = list_entry_next(src_window, list);
+		if (dst_offset == end_dst_offset)
+			dst_window = list_entry_next(dst_window, list);
+
+		src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+		dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+		/*
+		 * The CPU copy for the tail bytes must be initiated only once
+		 * previous DMA transfers for this endpoint have completed to
+		 * guarantee ordering.
+		 */
+		if (work->ordered) {
+			struct scif_dev *rdev = work->remote_dev;
+
+			ret = scif_drain_dma_poll(rdev->sdev, chan);
+			if (ret)
+				return ret;
+		}
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!src_virt)
+			return -ENOMEM;
+
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!dst_virt) {
+			if (src_window->type != SCIF_WINDOW_SELF)
+				iounmap_remote(src_virt, loop_len, work);
+			return -ENOMEM;
+		}
+
+		if (src_window->type == SCIF_WINDOW_SELF)
+			scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+						work->ordered);
+		else
+			scif_unaligned_cpy_fromio(dst_virt, src_virt,
+						  loop_len, work->ordered);
+		if (src_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(src_virt, loop_len, work);
+
+		if (dst_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(dst_virt, loop_len, work);
+		remaining_len -= loop_len;
+	}
+	return ret;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * scif_rma_list_cpu_copy:
+ *
+ * Traverse all the windows and perform CPU copy.
+ */
+static int scif_rma_list_cpu_copy(struct scif_copy_work *work)
+{
+	void *src_virt, *dst_virt;
+	size_t loop_len, remaining_len;
+	int src_page_off, dst_page_off;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 end_src_offset, end_dst_offset;
+	int ret = 0;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+
+	remaining_len = work->len;
+
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+	while (remaining_len) {
+		src_page_off = src_offset & ~PAGE_MASK;
+		dst_page_off = dst_offset & ~PAGE_MASK;
+		loop_len = min(PAGE_SIZE -
+			       max(src_page_off, dst_page_off),
+			       remaining_len);
+
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev,
+						  &src_win_iter);
+		if (!src_virt) {
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev,
+						  &dst_win_iter);
+		if (!dst_virt) {
+			if (src_window->type == SCIF_WINDOW_PEER)
+				iounmap_remote(src_virt, loop_len, work);
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		if (work->loopback) {
+			memcpy(dst_virt, src_virt, loop_len);
+		} else {
+			if (src_window->type == SCIF_WINDOW_SELF)
+				memcpy_toio((void __iomem __force *)dst_virt,
+					    src_virt, loop_len);
+			else
+				memcpy_fromio(dst_virt,
+					      (void __iomem __force *)src_virt,
+					      loop_len);
+		}
+		if (src_window->type == SCIF_WINDOW_PEER)
+			iounmap_remote(src_virt, loop_len, work);
+
+		if (dst_window->type == SCIF_WINDOW_PEER)
+			iounmap_remote(dst_virt, loop_len, work);
+
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+		if (remaining_len) {
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			if (src_offset == end_src_offset) {
+				src_window = list_entry_next(src_window, list);
+				scif_init_window_iter(src_window,
+						      &src_win_iter);
+			}
+			if (dst_offset == end_dst_offset) {
+				dst_window = list_entry_next(dst_window, list);
+				scif_init_window_iter(dst_window,
+						      &dst_win_iter);
+			}
+		}
+	}
+error:
+	return ret;
+}
+
+static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
+					  struct scif_copy_work *work,
+					  struct dma_chan *chan, off_t loffset)
+{
+	int src_cache_off, dst_cache_off;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	u8 *temp = NULL;
+	bool src_local = true, dst_local = false;
+	struct scif_dma_comp_cb *comp_cb;
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	int err;
+
+	if (is_dma_copy_aligned(chan->device, 1, 1, 1))
+		return _scif_rma_list_dma_copy_aligned(work, chan);
+
+	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+	dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
+
+	if (dst_cache_off == src_cache_off)
+		return scif_rma_list_dma_copy_aligned(work, chan);
+
+	if (work->loopback)
+		return scif_rma_list_cpu_copy(work);
+	src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset);
+	dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset);
+	src_local = work->src_window->type == SCIF_WINDOW_SELF;
+	dst_local = work->dst_window->type == SCIF_WINDOW_SELF;
+
+	dst_local = dst_local;
+	/* Allocate dma_completion cb */
+	comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL);
+	if (!comp_cb)
+		goto error;
+
+	work->comp_cb = comp_cb;
+	comp_cb->cb_cookie = comp_cb;
+	comp_cb->dma_completion_func = &scif_rma_completion_cb;
+
+	if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) {
+		comp_cb->is_cache = false;
+		/* Allocate padding bytes to align to a cache line */
+		temp = kmalloc(work->len + (L1_CACHE_BYTES << 1),
+			       GFP_KERNEL);
+		if (!temp)
+			goto free_comp_cb;
+		comp_cb->temp_buf_to_free = temp;
+		/* kmalloc(..) does not guarantee cache line alignment */
+		if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES))
+			temp = PTR_ALIGN(temp, L1_CACHE_BYTES);
+	} else {
+		comp_cb->is_cache = true;
+		temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL);
+		if (!temp)
+			goto free_comp_cb;
+		comp_cb->temp_buf_to_free = temp;
+	}
+
+	if (src_local) {
+		temp += dst_cache_off;
+		scif_rma_local_cpu_copy(work->src_offset, work->src_window,
+					temp, work->len, true);
+	} else {
+		comp_cb->dst_window = work->dst_window;
+		comp_cb->dst_offset = work->dst_offset;
+		work->src_offset = work->src_offset - src_cache_off;
+		comp_cb->len = work->len;
+		work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
+		comp_cb->header_padding = src_cache_off;
+	}
+	comp_cb->temp_buf = temp;
+
+	err = scif_map_single(&comp_cb->temp_phys, temp,
+			      work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE);
+	if (err)
+		goto free_temp_buf;
+	comp_cb->sdev = work->remote_dev;
+	if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0)
+		goto free_temp_buf;
+	if (!src_local)
+		work->fence_type = SCIF_DMA_INTR;
+	return 0;
+free_temp_buf:
+	if (comp_cb->is_cache)
+		kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free);
+	else
+		kfree(comp_cb->temp_buf_to_free);
+free_comp_cb:
+	kfree(comp_cb);
+error:
+	return -ENOMEM;
+}
+
+/**
+ * scif_rma_copy:
+ * @epd: end point descriptor.
+ * @loffset: offset in local registered address space to/from which to copy
+ * @addr: user virtual address to/from which to copy
+ * @len: length of range to copy
+ * @roffset: offset in remote registered address space to/from which to copy
+ * @flags: flags
+ * @dir: LOCAL->REMOTE or vice versa.
+ * @last_chunk: true if this is the last chunk of a larger transfer
+ *
+ * Validate parameters, check if src/dst registered ranges requested for copy
+ * are valid and initiate either CPU or DMA copy.
+ */
+static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr,
+			 size_t len, off_t roffset, int flags,
+			 enum scif_rma_dir dir, bool last_chunk)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_rma_req remote_req;
+	struct scif_rma_req req;
+	struct scif_window *local_window = NULL;
+	struct scif_window *remote_window = NULL;
+	struct scif_copy_work copy_work;
+	bool loopback;
+	int err = 0;
+	struct dma_chan *chan;
+	struct scif_mmu_notif *mmn = NULL;
+	bool cache = false;
+	struct device *spdev;
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE |
+				SCIF_RMA_SYNC | SCIF_RMA_ORDERED)))
+		return -EINVAL;
+
+	loopback = scifdev_self(ep->remote_dev) ? true : false;
+	copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ?
+				SCIF_DMA_POLL : 0;
+	copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk);
+
+	/* Use CPU for Mgmt node <-> Mgmt node copies */
+	if (loopback && scif_is_mgmt_node()) {
+		flags |= SCIF_RMA_USECPU;
+		copy_work.fence_type = 0x0;
+	}
+
+	cache = scif_is_set_reg_cache(flags);
+
+	remote_req.out_window = &remote_window;
+	remote_req.offset = roffset;
+	remote_req.nr_bytes = len;
+	/*
+	 * If transfer is from local to remote then the remote window
+	 * must be writeable and vice versa.
+	 */
+	remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ;
+	remote_req.type = SCIF_WINDOW_PARTIAL;
+	remote_req.head = &ep->rma_info.remote_reg_list;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		return err;
+	}
+
+	if (addr && cache) {
+		mutex_lock(&ep->rma_info.mmn_lock);
+		mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info);
+		if (!mmn)
+			scif_add_mmu_notifier(current->mm, ep);
+		mutex_unlock(&ep->rma_info.mmn_lock);
+		if (IS_ERR(mmn)) {
+			scif_put_peer_dev(spdev);
+			return PTR_ERR(mmn);
+		}
+		cache = cache && !scif_rma_tc_can_cache(ep, len);
+	}
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (addr) {
+		req.out_window = &local_window;
+		req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK),
+				     PAGE_SIZE);
+		req.va_for_temp = addr & PAGE_MASK;
+		req.prot = (dir == SCIF_LOCAL_TO_REMOTE ?
+			    VM_READ : VM_WRITE | VM_READ);
+		/* Does a valid local window exist? */
+		if (mmn) {
+			spin_lock(&ep->rma_info.tc_lock);
+			req.head = &mmn->tc_reg_list;
+			err = scif_query_tcw(ep, &req);
+			spin_unlock(&ep->rma_info.tc_lock);
+		}
+		if (!mmn || err) {
+			err = scif_register_temp(epd, req.va_for_temp,
+						 req.nr_bytes, req.prot,
+						 &loffset, &local_window);
+			if (err) {
+				mutex_unlock(&ep->rma_info.rma_lock);
+				goto error;
+			}
+			if (!cache)
+				goto skip_cache;
+			atomic_inc(&ep->rma_info.tcw_refcount);
+			atomic_add_return(local_window->nr_pages,
+					  &ep->rma_info.tcw_total_pages);
+			if (mmn) {
+				spin_lock(&ep->rma_info.tc_lock);
+				scif_insert_tcw(local_window,
+						&mmn->tc_reg_list);
+				spin_unlock(&ep->rma_info.tc_lock);
+			}
+		}
+skip_cache:
+		loffset = local_window->offset +
+				(addr - local_window->va_for_temp);
+	} else {
+		req.out_window = &local_window;
+		req.offset = loffset;
+		/*
+		 * If transfer is from local to remote then the self window
+		 * must be readable and vice versa.
+		 */
+		req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE;
+		req.nr_bytes = len;
+		req.type = SCIF_WINDOW_PARTIAL;
+		req.head = &ep->rma_info.reg_list;
+		/* Does a valid local window exist? */
+		err = scif_query_window(&req);
+		if (err) {
+			mutex_unlock(&ep->rma_info.rma_lock);
+			goto error;
+		}
+	}
+
+	/* Does a valid remote window exist? */
+	err = scif_query_window(&remote_req);
+	if (err) {
+		mutex_unlock(&ep->rma_info.rma_lock);
+		goto error;
+	}
+
+	/*
+	 * Prepare copy_work for submitting work to the DMA kernel thread
+	 * or CPU copy routine.
+	 */
+	copy_work.len = len;
+	copy_work.loopback = loopback;
+	copy_work.remote_dev = ep->remote_dev;
+	if (dir == SCIF_LOCAL_TO_REMOTE) {
+		copy_work.src_offset = loffset;
+		copy_work.src_window = local_window;
+		copy_work.dst_offset = roffset;
+		copy_work.dst_window = remote_window;
+	} else {
+		copy_work.src_offset = roffset;
+		copy_work.src_window = remote_window;
+		copy_work.dst_offset = loffset;
+		copy_work.dst_window = local_window;
+	}
+
+	if (flags & SCIF_RMA_USECPU) {
+		scif_rma_list_cpu_copy(&copy_work);
+	} else {
+		chan = ep->rma_info.dma_chan;
+		err = scif_rma_list_dma_copy_wrapper(epd, &copy_work,
+						     chan, loffset);
+	}
+	if (addr && !cache)
+		atomic_inc(&ep->rma_info.tw_refcount);
+
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	if (last_chunk) {
+		struct scif_dev *rdev = ep->remote_dev;
+
+		if (copy_work.fence_type == SCIF_DMA_POLL)
+			err = scif_drain_dma_poll(rdev->sdev,
+						  ep->rma_info.dma_chan);
+		else if (copy_work.fence_type == SCIF_DMA_INTR)
+			err = scif_drain_dma_intr(rdev->sdev,
+						  ep->rma_info.dma_chan);
+	}
+
+	if (addr && !cache)
+		scif_queue_for_cleanup(local_window, &scif_info.rma);
+	scif_put_peer_dev(spdev);
+	return err;
+error:
+	if (err) {
+		if (addr && local_window && !cache)
+			scif_destroy_window(ep, local_window);
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d len 0x%lx\n",
+			__func__, __LINE__, err, len);
+	}
+	scif_put_peer_dev(spdev);
+	return err;
+}
+
+int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
+		  off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n",
+		epd, loffset, len, roffset, flags);
+	if (scif_unaligned(loffset, roffset)) {
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, loffset, 0x0,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_REMOTE_TO_LOCAL, false);
+			if (err)
+				goto readfrom_err;
+			loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, loffset, 0x0, len,
+			    roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+readfrom_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_readfrom);
+
+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
+		 off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, loffset, len, roffset, flags);
+	if (scif_unaligned(loffset, roffset)) {
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, loffset, 0x0,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_LOCAL_TO_REMOTE, false);
+			if (err)
+				goto writeto_err;
+			loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, loffset, 0x0, len,
+			    roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+writeto_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_writeto);
+
+int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len,
+		   off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, addr, len, roffset, flags);
+	if (scif_unaligned((off_t __force)addr, roffset)) {
+		if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+			flags &= ~SCIF_RMA_USECACHE;
+
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, 0, (u64)addr,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_REMOTE_TO_LOCAL, false);
+			if (err)
+				goto vreadfrom_err;
+			addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, 0, (u64)addr, len,
+			    roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+vreadfrom_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_vreadfrom);
+
+int scif_vwriteto(scif_epd_t epd, void *addr, size_t len,
+		  off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, addr, len, roffset, flags);
+	if (scif_unaligned((off_t __force)addr, roffset)) {
+		if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+			flags &= ~SCIF_RMA_USECACHE;
+
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, 0, (u64)addr,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_LOCAL_TO_REMOTE, false);
+			if (err)
+				goto vwriteto_err;
+			addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, 0, (u64)addr, len,
+			    roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+vwriteto_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_vwriteto);
diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c
index b4bfbb08a..00e5d6d66 100644
--- a/drivers/misc/mic/scif/scif_epd.c
+++ b/drivers/misc/mic/scif/scif_epd.c
@@ -65,14 +65,14 @@ void scif_teardown_ep(void *endpt)
 void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held)
 {
 	if (!eplock_held)
-		spin_lock(&scif_info.eplock);
+		mutex_lock(&scif_info.eplock);
 	spin_lock(&ep->lock);
 	ep->state = SCIFEP_ZOMBIE;
 	spin_unlock(&ep->lock);
 	list_add_tail(&ep->list, &scif_info.zombie);
 	scif_info.nr_zombies++;
 	if (!eplock_held)
-		spin_unlock(&scif_info.eplock);
+		mutex_unlock(&scif_info.eplock);
 	schedule_work(&scif_info.misc_work);
 }
 
@@ -81,16 +81,15 @@ static struct scif_endpt *scif_find_listen_ep(u16 port)
 	struct scif_endpt *ep = NULL;
 	struct list_head *pos, *tmpq;
 
-	spin_lock(&scif_info.eplock);
+	mutex_lock(&scif_info.eplock);
 	list_for_each_safe(pos, tmpq, &scif_info.listen) {
 		ep = list_entry(pos, struct scif_endpt, list);
 		if (ep->port.port == port) {
-			spin_lock(&ep->lock);
-			spin_unlock(&scif_info.eplock);
+			mutex_unlock(&scif_info.eplock);
 			return ep;
 		}
 	}
-	spin_unlock(&scif_info.eplock);
+	mutex_unlock(&scif_info.eplock);
 	return NULL;
 }
 
@@ -99,14 +98,17 @@ void scif_cleanup_zombie_epd(void)
 	struct list_head *pos, *tmpq;
 	struct scif_endpt *ep;
 
-	spin_lock(&scif_info.eplock);
+	mutex_lock(&scif_info.eplock);
 	list_for_each_safe(pos, tmpq, &scif_info.zombie) {
 		ep = list_entry(pos, struct scif_endpt, list);
-		list_del(pos);
-		scif_info.nr_zombies--;
-		kfree(ep);
+		if (scif_rma_ep_can_uninit(ep)) {
+			list_del(pos);
+			scif_info.nr_zombies--;
+			put_iova_domain(&ep->rma_info.iovad);
+			kfree(ep);
+		}
 	}
-	spin_unlock(&scif_info.eplock);
+	mutex_unlock(&scif_info.eplock);
 }
 
 /**
@@ -137,6 +139,8 @@ void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg)
 	if (!ep)
 		/*  Send reject due to no listening ports */
 		goto conreq_sendrej_free;
+	else
+		spin_lock(&ep->lock);
 
 	if (ep->backlog <= ep->conreqcnt) {
 		/*  Send reject due to too many pending requests */
diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h
index 331322a25..1771d7a9b 100644
--- a/drivers/misc/mic/scif/scif_epd.h
+++ b/drivers/misc/mic/scif/scif_epd.h
@@ -96,7 +96,11 @@ struct scif_endpt_qp_info {
  * @conn_port: Connection port
  * @conn_err: Errors during connection
  * @conn_async_state: Async connection
+ * @conn_pend_wq: Used by poll while waiting for incoming connections
  * @conn_list: List of async connection requests
+ * @rma_info: Information for triggering SCIF RMA and DMA operations
+ * @mmu_list: link to list of MMU notifier cleanup work
+ * @anon: anonymous file for use in kernel mode scif poll
  */
 struct scif_endpt {
 	enum scif_epd_state state;
@@ -125,7 +129,11 @@ struct scif_endpt {
 	struct scif_port_id conn_port;
 	int conn_err;
 	int conn_async_state;
+	wait_queue_head_t conn_pend_wq;
 	struct list_head conn_list;
+	struct scif_endpt_rma_info rma_info;
+	struct list_head mmu_list;
+	struct file *anon;
 };
 
 static inline int scifdev_alive(struct scif_endpt *ep)
@@ -133,6 +141,43 @@ static inline int scifdev_alive(struct scif_endpt *ep)
 	return _scifdev_alive(ep->remote_dev);
 }
 
+/*
+ * scif_verify_epd:
+ * ep: SCIF endpoint
+ *
+ * Checks several generic error conditions and returns the
+ * appropriate error.
+ */
+static inline int scif_verify_epd(struct scif_endpt *ep)
+{
+	if (ep->state == SCIFEP_DISCONNECTED)
+		return -ECONNRESET;
+
+	if (ep->state != SCIFEP_CONNECTED)
+		return -ENOTCONN;
+
+	if (!scifdev_alive(ep))
+		return -ENODEV;
+
+	return 0;
+}
+
+static inline int scif_anon_inode_getfile(scif_epd_t epd)
+{
+	epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0);
+	if (IS_ERR(epd->anon))
+		return PTR_ERR(epd->anon);
+	return 0;
+}
+
+static inline void scif_anon_inode_fput(scif_epd_t epd)
+{
+	if (epd->anon) {
+		fput(epd->anon);
+		epd->anon = NULL;
+	}
+}
+
 void scif_cleanup_zombie_epd(void);
 void scif_teardown_ep(void *endpt);
 void scif_cleanup_ep_qp(struct scif_endpt *ep);
@@ -157,4 +202,9 @@ void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg);
 void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
 int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
 int __scif_flush(scif_epd_t epd);
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd);
+unsigned int __scif_pollfd(struct file *f, poll_table *wait,
+			   struct scif_endpt *ep);
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+		     int map_flags, scif_pinned_pages_t *pages);
 #endif /* SCIF_EPD_H */
diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c
index eccf7e713..f7e826142 100644
--- a/drivers/misc/mic/scif/scif_fd.c
+++ b/drivers/misc/mic/scif/scif_fd.c
@@ -34,6 +34,20 @@ static int scif_fdclose(struct inode *inode, struct file *f)
 	return scif_close(priv);
 }
 
+static int scif_fdmmap(struct file *f, struct vm_area_struct *vma)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return scif_mmap(vma, priv);
+}
+
+static unsigned int scif_fdpoll(struct file *f, poll_table *wait)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return __scif_pollfd(f, wait, priv);
+}
+
 static int scif_fdflush(struct file *f, fl_owner_t id)
 {
 	struct scif_endpt *ep = f->private_data;
@@ -140,12 +154,12 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		 * Add to the list of user mode eps where the second half
 		 * of the accept is not yet completed.
 		 */
-		spin_lock(&scif_info.eplock);
+		mutex_lock(&scif_info.eplock);
 		list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept);
 		list_add_tail(&((*ep)->liacceptlist), &priv->li_accept);
 		(*ep)->listenep = priv;
 		priv->acceptcnt++;
-		spin_unlock(&scif_info.eplock);
+		mutex_unlock(&scif_info.eplock);
 
 		return 0;
 	}
@@ -163,7 +177,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -EFAULT;
 
 		/* Remove form the user accept queue */
-		spin_lock(&scif_info.eplock);
+		mutex_lock(&scif_info.eplock);
 		list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
 			tmpep = list_entry(pos,
 					   struct scif_endpt, miacceptlist);
@@ -175,7 +189,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		}
 
 		if (!fep) {
-			spin_unlock(&scif_info.eplock);
+			mutex_unlock(&scif_info.eplock);
 			return -ENOENT;
 		}
 
@@ -190,9 +204,10 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			}
 		}
 
-		spin_unlock(&scif_info.eplock);
+		mutex_unlock(&scif_info.eplock);
 
 		/* Free the resources automatically created from the open. */
+		scif_anon_inode_fput(priv);
 		scif_teardown_ep(priv);
 		scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD);
 		f->private_data = newep;
@@ -290,6 +305,157 @@ getnodes_err1:
 getnodes_err2:
 		return err;
 	}
+	case SCIF_REG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_reg reg;
+		off_t ret;
+
+		if (copy_from_user(&reg, argp, sizeof(reg))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		if (reg.flags & SCIF_MAP_KERNEL) {
+			err = -EINVAL;
+			goto reg_err;
+		}
+		ret = scif_register(priv, (void *)reg.addr, reg.len,
+				    reg.offset, reg.prot, reg.flags);
+		if (ret < 0) {
+			err = (int)ret;
+			goto reg_err;
+		}
+
+		if (copy_to_user(&((struct scifioctl_reg __user *)argp)
+				 ->out_offset, &ret, sizeof(reg.out_offset))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		err = 0;
+reg_err:
+		scif_err_debug(err, "scif_register");
+		return err;
+	}
+	case SCIF_UNREG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_unreg unreg;
+
+		if (copy_from_user(&unreg, argp, sizeof(unreg))) {
+			err = -EFAULT;
+			goto unreg_err;
+		}
+		err = scif_unregister(priv, unreg.offset, unreg.len);
+unreg_err:
+		scif_err_debug(err, "scif_unregister");
+		return err;
+	}
+	case SCIF_READFROM:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto readfrom_err;
+		}
+		err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset,
+				    copy.flags);
+readfrom_err:
+		scif_err_debug(err, "scif_readfrom");
+		return err;
+	}
+	case SCIF_WRITETO:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto writeto_err;
+		}
+		err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset,
+				   copy.flags);
+writeto_err:
+		scif_err_debug(err, "scif_writeto");
+		return err;
+	}
+	case SCIF_VREADFROM:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vreadfrom_err;
+		}
+		err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len,
+				     copy.roffset, copy.flags);
+vreadfrom_err:
+		scif_err_debug(err, "scif_vreadfrom");
+		return err;
+	}
+	case SCIF_VWRITETO:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vwriteto_err;
+		}
+		err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len,
+				    copy.roffset, copy.flags);
+vwriteto_err:
+		scif_err_debug(err, "scif_vwriteto");
+		return err;
+	}
+	case SCIF_FENCE_MARK:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_fence_mark mark;
+		int tmp_mark = 0;
+
+		if (copy_from_user(&mark, argp, sizeof(mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+		err = scif_fence_mark(priv, mark.flags, &tmp_mark);
+		if (err)
+			goto fence_mark_err;
+		if (copy_to_user((void __user *)mark.mark, &tmp_mark,
+				 sizeof(tmp_mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+fence_mark_err:
+		scif_err_debug(err, "scif_fence_mark");
+		return err;
+	}
+	case SCIF_FENCE_WAIT:
+	{
+		struct scif_endpt *priv = f->private_data;
+
+		err = scif_fence_wait(priv, arg);
+		scif_err_debug(err, "scif_fence_wait");
+		return err;
+	}
+	case SCIF_FENCE_SIGNAL:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_fence_signal signal;
+
+		if (copy_from_user(&signal, argp, sizeof(signal))) {
+			err = -EFAULT;
+			goto fence_signal_err;
+		}
+
+		err = scif_fence_signal(priv, signal.loff, signal.lval,
+					signal.roff, signal.rval, signal.flags);
+fence_signal_err:
+		scif_err_debug(err, "scif_fence_signal");
+		return err;
+	}
 	}
 	return -EINVAL;
 }
@@ -298,6 +464,8 @@ const struct file_operations scif_fops = {
 	.open = scif_fdopen,
 	.release = scif_fdclose,
 	.unlocked_ioctl = scif_fdioctl,
+	.mmap = scif_fdmmap,
+	.poll = scif_fdpoll,
 	.flush = scif_fdflush,
 	.owner = THIS_MODULE,
 };
diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c
new file mode 100644
index 000000000..7f2c96f57
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_fence.c
@@ -0,0 +1,771 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+
+#include "scif_main.h"
+
+/**
+ * scif_recv_mark: Handle SCIF_MARK request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a mark.
+ */
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int mark, err;
+
+	err = _scif_fence_mark(ep, &mark);
+	if (err)
+		msg->uop = SCIF_MARK_NACK;
+	else
+		msg->uop = SCIF_MARK_ACK;
+	msg->payload[0] = ep->remote_ep;
+	msg->payload[2] = mark;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a SCIF_MARK message.
+ */
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_MARK_ACK) {
+		fence_req->state = OP_COMPLETED;
+		fence_req->dma_mark = (int)msg->payload[2];
+	} else {
+		fence_req->state = OP_FAILED;
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_wait: Handle SCIF_WAIT request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested waiting on a fence.
+ */
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_remote_fence_info *fence;
+
+	/*
+	 * Allocate structure for remote fence information and
+	 * send a NACK if the allocation failed. The peer will
+	 * return ENOMEM upon receiving a NACK.
+	 */
+	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence) {
+		msg->payload[0] = ep->remote_ep;
+		msg->uop = SCIF_WAIT_NACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+		return;
+	}
+
+	/* Prepare the fence request */
+	memcpy(&fence->msg, msg, sizeof(struct scifmsg));
+	INIT_LIST_HEAD(&fence->list);
+
+	/* Insert to the global remote fence request list */
+	mutex_lock(&scif_info.fencelock);
+	atomic_inc(&ep->rma_info.fence_refcount);
+	list_add_tail(&fence->list, &scif_info.fence);
+	mutex_unlock(&scif_info.fencelock);
+
+	schedule_work(&scif_info.misc_work);
+}
+
+/**
+ * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a SCIF_WAIT message.
+ */
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_WAIT_ACK)
+		fence_req->state = OP_COMPLETED;
+	else
+		fence_req->state = OP_FAILED;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a signal on a local offset.
+ */
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int err;
+
+	err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+			       SCIF_WINDOW_SELF);
+	if (err)
+		msg->uop = SCIF_SIG_NACK;
+	else
+		msg->uop = SCIF_SIG_ACK;
+	msg->payload[0] = ep->remote_ep;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a signal on a remote offset.
+ */
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int err;
+
+	err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+			       SCIF_WINDOW_PEER);
+	if (err)
+		msg->uop = SCIF_SIG_NACK;
+	else
+		msg->uop = SCIF_SIG_ACK;
+	msg->payload[0] = ep->remote_ep;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a signal request.
+ */
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[3];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_SIG_ACK)
+		fence_req->state = OP_COMPLETED;
+	else
+		fence_req->state = OP_FAILED;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+static inline void *scif_get_local_va(off_t off, struct scif_window *window)
+{
+	struct page **pages = window->pinned_pages->pages;
+	int page_nr = (off - window->offset) >> PAGE_SHIFT;
+	off_t page_off = off & ~PAGE_MASK;
+
+	return page_address(pages[page_nr]) + page_off;
+}
+
+static void scif_prog_signal_cb(void *arg)
+{
+	struct scif_status *status = arg;
+
+	dma_pool_free(status->ep->remote_dev->signal_pool, status,
+		      status->src_dma_addr);
+}
+
+static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct dma_chan *chan = ep->rma_info.dma_chan;
+	struct dma_device *ddev = chan->device;
+	bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1);
+	struct dma_async_tx_descriptor *tx;
+	struct scif_status *status = NULL;
+	dma_addr_t src;
+	dma_cookie_t cookie;
+	int err;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto alloc_fail;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto alloc_fail;
+	}
+	dma_async_issue_pending(chan);
+	if (x100) {
+		/*
+		 * For X100 use the status descriptor to write the value to
+		 * the destination.
+		 */
+		tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0);
+	} else {
+		status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL,
+					&src);
+		if (!status) {
+			err = -ENOMEM;
+			dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto alloc_fail;
+		}
+		status->val = val;
+		status->src_dma_addr = src;
+		status->ep = ep;
+		src += offsetof(struct scif_status, val);
+		tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val),
+						  DMA_PREP_INTERRUPT);
+	}
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto dma_fail;
+	}
+	if (!x100) {
+		tx->callback = scif_prog_signal_cb;
+		tx->callback_param = status;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = -EIO;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto dma_fail;
+	}
+	dma_async_issue_pending(chan);
+	return 0;
+dma_fail:
+	if (!x100)
+		dma_pool_free(ep->remote_dev->signal_pool, status,
+			      status->src_dma_addr);
+alloc_fail:
+	return err;
+}
+
+/*
+ * scif_prog_signal:
+ * @epd - Endpoint Descriptor
+ * @offset - registered address to write @val to
+ * @val - Value to be written at @offset
+ * @type - Type of the window.
+ *
+ * Arrange to write a value to the registered offset after ensuring that the
+ * offset provided is indeed valid.
+ */
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+		     enum scif_window_type type)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_window *window = NULL;
+	struct scif_rma_req req;
+	dma_addr_t dst_dma_addr;
+	int err;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	req.out_window = &window;
+	req.offset = offset;
+	req.nr_bytes = sizeof(u64);
+	req.prot = SCIF_PROT_WRITE;
+	req.type = SCIF_WINDOW_SINGLE;
+	if (type == SCIF_WINDOW_SELF)
+		req.head = &ep->rma_info.reg_list;
+	else
+		req.head = &ep->rma_info.remote_reg_list;
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto unlock_ret;
+	}
+
+	if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) {
+		u64 *dst_virt;
+
+		if (type == SCIF_WINDOW_SELF)
+			dst_virt = scif_get_local_va(offset, window);
+		else
+			dst_virt =
+			scif_get_local_va(offset, (struct scif_window *)
+					  window->peer_window);
+		*dst_virt = val;
+	} else {
+		dst_dma_addr = __scif_off_to_dma_addr(window, offset);
+		err = _scif_prog_signal(epd, dst_dma_addr, val);
+	}
+unlock_ret:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return err;
+}
+
+static int _scif_fence_wait(scif_epd_t epd, int mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE;
+	int err;
+
+	/* Wait for DMA callback in scif_fence_mark_cb(..) */
+	err = wait_event_interruptible_timeout(ep->rma_info.markwq,
+					       dma_async_is_tx_complete(
+					       ep->rma_info.dma_chan,
+					       cookie, NULL, NULL) ==
+					       DMA_COMPLETE,
+					       SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err)
+		err = -ETIMEDOUT;
+	else if (err > 0)
+		err = 0;
+	return err;
+}
+
+/**
+ * scif_rma_handle_remote_fences:
+ *
+ * This routine services remote fence requests.
+ */
+void scif_rma_handle_remote_fences(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_remote_fence_info *fence;
+	struct scif_endpt *ep;
+	int mark, err;
+
+	might_sleep();
+	mutex_lock(&scif_info.fencelock);
+	list_for_each_safe(item, tmp, &scif_info.fence) {
+		fence = list_entry(item, struct scif_remote_fence_info,
+				   list);
+		/* Remove fence from global list */
+		list_del(&fence->list);
+
+		/* Initiate the fence operation */
+		ep = (struct scif_endpt *)fence->msg.payload[0];
+		mark = fence->msg.payload[2];
+		err = _scif_fence_wait(ep, mark);
+		if (err)
+			fence->msg.uop = SCIF_WAIT_NACK;
+		else
+			fence->msg.uop = SCIF_WAIT_ACK;
+		fence->msg.payload[0] = ep->remote_ep;
+		scif_nodeqp_send(ep->remote_dev, &fence->msg);
+		kfree(fence);
+		if (!atomic_sub_return(1, &ep->rma_info.fence_refcount))
+			schedule_work(&scif_info.misc_work);
+	}
+	mutex_unlock(&scif_info.fencelock);
+}
+
+static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark)
+{
+	int err;
+	struct scifmsg msg;
+	struct scif_fence_info *fence_req;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+	if (!fence_req) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	fence_req->state = OP_IN_PROGRESS;
+	init_completion(&fence_req->comp);
+
+	msg.src = ep->port;
+	msg.uop = uop;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = (u64)fence_req;
+	if (uop == SCIF_WAIT)
+		msg.payload[2] = mark;
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED)
+		err = scif_nodeqp_send(ep->remote_dev, &msg);
+	else
+		err = -ENOTCONN;
+	spin_unlock(&ep->lock);
+	if (err)
+		goto error_free;
+retry:
+	/* Wait for a SCIF_WAIT_(N)ACK message */
+	err = wait_for_completion_timeout(&fence_req->comp,
+					  SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+	if (!err)
+		err = -ENODEV;
+	if (err > 0)
+		err = 0;
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (err < 0) {
+		if (fence_req->state == OP_IN_PROGRESS)
+			fence_req->state = OP_FAILED;
+	}
+	if (fence_req->state == OP_FAILED && !err)
+		err = -ENOMEM;
+	if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED)
+		*out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark;
+	mutex_unlock(&ep->rma_info.rma_lock);
+error_free:
+	kfree(fence_req);
+error:
+	return err;
+}
+
+/**
+ * scif_send_fence_mark:
+ * @epd: end point descriptor.
+ * @out_mark: Output DMA mark reported by peer.
+ *
+ * Send a remote fence mark request.
+ */
+static int scif_send_fence_mark(scif_epd_t epd, int *out_mark)
+{
+	return _scif_send_fence(epd, SCIF_MARK, 0, out_mark);
+}
+
+/**
+ * scif_send_fence_wait:
+ * @epd: end point descriptor.
+ * @mark: DMA mark to wait for.
+ *
+ * Send a remote fence wait request.
+ */
+static int scif_send_fence_wait(scif_epd_t epd, int mark)
+{
+	return _scif_send_fence(epd, SCIF_WAIT, mark, NULL);
+}
+
+static int _scif_send_fence_signal_wait(struct scif_endpt *ep,
+					struct scif_fence_info *fence_req)
+{
+	int err;
+
+retry:
+	/* Wait for a SCIF_SIG_(N)ACK message */
+	err = wait_for_completion_timeout(&fence_req->comp,
+					  SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+	if (!err)
+		err = -ENODEV;
+	if (err > 0)
+		err = 0;
+	if (err < 0) {
+		mutex_lock(&ep->rma_info.rma_lock);
+		if (fence_req->state == OP_IN_PROGRESS)
+			fence_req->state = OP_FAILED;
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+	if (fence_req->state == OP_FAILED && !err)
+		err = -ENXIO;
+	return err;
+}
+
+/**
+ * scif_send_fence_signal:
+ * @epd - endpoint descriptor
+ * @loff - local offset
+ * @lval - local value to write to loffset
+ * @roff - remote offset
+ * @rval - remote value to write to roffset
+ * @flags - flags
+ *
+ * Sends a remote fence signal request
+ */
+static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval,
+				  off_t loff, u64 lval, int flags)
+{
+	int err = 0;
+	struct scifmsg msg;
+	struct scif_fence_info *fence_req;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+	if (!fence_req) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	fence_req->state = OP_IN_PROGRESS;
+	init_completion(&fence_req->comp);
+	msg.src = ep->port;
+	if (flags & SCIF_SIGNAL_LOCAL) {
+		msg.uop = SCIF_SIG_LOCAL;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = roff;
+		msg.payload[2] = rval;
+		msg.payload[3] = (u64)fence_req;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		if (err)
+			goto error_free;
+		err = _scif_send_fence_signal_wait(ep, fence_req);
+		if (err)
+			goto error_free;
+	}
+	fence_req->state = OP_IN_PROGRESS;
+
+	if (flags & SCIF_SIGNAL_REMOTE) {
+		msg.uop = SCIF_SIG_REMOTE;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = loff;
+		msg.payload[2] = lval;
+		msg.payload[3] = (u64)fence_req;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		if (err)
+			goto error_free;
+		err = _scif_send_fence_signal_wait(ep, fence_req);
+	}
+error_free:
+	kfree(fence_req);
+error:
+	return err;
+}
+
+static void scif_fence_mark_cb(void *arg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)arg;
+
+	wake_up_interruptible(&ep->rma_info.markwq);
+	atomic_dec(&ep->rma_info.fence_refcount);
+}
+
+/*
+ * _scif_fence_mark:
+ *
+ * @epd - endpoint descriptor
+ * Set up a mark for this endpoint and return the value of the mark.
+ */
+int _scif_fence_mark(scif_epd_t epd, int *mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct dma_chan *chan = ep->rma_info.dma_chan;
+	struct dma_device *ddev = chan->device;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+	int err;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	dma_async_issue_pending(chan);
+	tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	tx->callback = scif_fence_mark_cb;
+	tx->callback_param = ep;
+	*mark = cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	atomic_inc(&ep->rma_info.fence_refcount);
+	dma_async_issue_pending(chan);
+	return 0;
+}
+
+#define SCIF_LOOPB_MAGIC_MARK 0xdead
+
+int scif_fence_mark(scif_epd_t epd, int flags, int *mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n",
+		ep, flags, *mark);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Invalid flags? */
+	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/* At least one of init self or peer RMA should be set */
+	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+		return -EINVAL;
+
+	/* Exactly one of init self or peer RMA should be set but not both */
+	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/*
+	 * Management node loopback does not need to use DMA.
+	 * Return a valid mark to be symmetric.
+	 */
+	if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+		*mark = SCIF_LOOPB_MAGIC_MARK;
+		return 0;
+	}
+
+	if (flags & SCIF_FENCE_INIT_SELF)
+		err = _scif_fence_mark(epd, mark);
+	else
+		err = scif_send_fence_mark(ep, mark);
+
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n",
+		ep, flags, *mark, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_mark);
+
+int scif_fence_wait(scif_epd_t epd, int mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_wait: ep %p mark 0x%x\n",
+		ep, mark);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+	/*
+	 * Management node loopback does not need to use DMA.
+	 * The only valid mark provided is 0 so simply
+	 * return success if the mark is valid.
+	 */
+	if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+		if (mark == SCIF_LOOPB_MAGIC_MARK)
+			return 0;
+		else
+			return -EINVAL;
+	}
+	if (mark & SCIF_REMOTE_FENCE)
+		err = scif_send_fence_wait(epd, mark);
+	else
+		err = _scif_fence_wait(epd, mark);
+	if (err < 0)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_wait);
+
+int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval,
+		      off_t roff, u64 rval, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n",
+		ep, loff, lval, roff, rval, flags);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Invalid flags? */
+	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER |
+			SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))
+		return -EINVAL;
+
+	/* At least one of init self or peer RMA should be set */
+	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+		return -EINVAL;
+
+	/* Exactly one of init self or peer RMA should be set but not both */
+	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */
+	if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)))
+		return -EINVAL;
+
+	/* Only Dword offsets allowed */
+	if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1)))
+		return -EINVAL;
+
+	/* Only Dword aligned offsets allowed */
+	if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1)))
+		return -EINVAL;
+
+	if (flags & SCIF_FENCE_INIT_PEER) {
+		err = scif_send_fence_signal(epd, roff, rval, loff,
+					     lval, flags);
+	} else {
+		/* Local Signal in Local RAS */
+		if (flags & SCIF_SIGNAL_LOCAL) {
+			err = scif_prog_signal(epd, loff, lval,
+					       SCIF_WINDOW_SELF);
+			if (err)
+				goto error_ret;
+		}
+
+		/* Signal in Remote RAS */
+		if (flags & SCIF_SIGNAL_REMOTE)
+			err = scif_prog_signal(epd, roff,
+					       rval, SCIF_WINDOW_PEER);
+	}
+error_ret:
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_signal);
diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c
index 6ce851f5c..36d847af1 100644
--- a/drivers/misc/mic/scif/scif_main.c
+++ b/drivers/misc/mic/scif/scif_main.c
@@ -34,6 +34,7 @@ struct scif_info scif_info = {
 };
 
 struct scif_dev *scif_dev;
+struct kmem_cache *unaligned_cache;
 static atomic_t g_loopb_cnt;
 
 /* Runs in the context of intr_wq */
@@ -80,35 +81,6 @@ irqreturn_t scif_intr_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int scif_peer_probe(struct scif_peer_dev *spdev)
-{
-	struct scif_dev *scifdev = &scif_dev[spdev->dnode];
-
-	mutex_lock(&scif_info.conflock);
-	scif_info.total++;
-	scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
-	mutex_unlock(&scif_info.conflock);
-	rcu_assign_pointer(scifdev->spdev, spdev);
-
-	/* In the future SCIF kernel client devices will be added here */
-	return 0;
-}
-
-static void scif_peer_remove(struct scif_peer_dev *spdev)
-{
-	struct scif_dev *scifdev = &scif_dev[spdev->dnode];
-
-	/* In the future SCIF kernel client devices will be removed here */
-	spdev = rcu_dereference(scifdev->spdev);
-	if (spdev)
-		RCU_INIT_POINTER(scifdev->spdev, NULL);
-	synchronize_rcu();
-
-	mutex_lock(&scif_info.conflock);
-	scif_info.total--;
-	mutex_unlock(&scif_info.conflock);
-}
-
 static void scif_qp_setup_handler(struct work_struct *work)
 {
 	struct scif_dev *scifdev = container_of(work, struct scif_dev,
@@ -139,20 +111,13 @@ static void scif_qp_setup_handler(struct work_struct *work)
 	}
 }
 
-static int scif_setup_scifdev(struct scif_hw_dev *sdev)
+static int scif_setup_scifdev(void)
 {
+	/* We support a maximum of 129 SCIF nodes including the mgmt node */
+#define MAX_SCIF_NODES 129
 	int i;
-	u8 num_nodes;
-
-	if (sdev->snode) {
-		struct mic_bootparam __iomem *bp = sdev->rdp;
-
-		num_nodes = ioread8(&bp->tot_nodes);
-	} else {
-		struct mic_bootparam *bp = sdev->dp;
+	u8 num_nodes = MAX_SCIF_NODES;
 
-		num_nodes = bp->tot_nodes;
-	}
 	scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL);
 	if (!scif_dev)
 		return -ENOMEM;
@@ -163,7 +128,7 @@ static int scif_setup_scifdev(struct scif_hw_dev *sdev)
 		scifdev->exit = OP_IDLE;
 		init_waitqueue_head(&scifdev->disconn_wq);
 		mutex_init(&scifdev->lock);
-		INIT_WORK(&scifdev->init_msg_work, scif_qp_response_ack);
+		INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device);
 		INIT_DELAYED_WORK(&scifdev->p2p_dwork,
 				  scif_poll_qp_state);
 		INIT_DELAYED_WORK(&scifdev->qp_dwork,
@@ -181,27 +146,21 @@ static void scif_destroy_scifdev(void)
 
 static int scif_probe(struct scif_hw_dev *sdev)
 {
-	struct scif_dev *scifdev;
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
 	int rc;
 
 	dev_set_drvdata(&sdev->dev, sdev);
+	scifdev->sdev = sdev;
+
 	if (1 == atomic_add_return(1, &g_loopb_cnt)) {
-		struct scif_dev *loopb_dev;
+		struct scif_dev *loopb_dev = &scif_dev[sdev->snode];
 
-		rc = scif_setup_scifdev(sdev);
-		if (rc)
-			goto exit;
-		scifdev = &scif_dev[sdev->dnode];
-		scifdev->sdev = sdev;
-		loopb_dev = &scif_dev[sdev->snode];
 		loopb_dev->sdev = sdev;
 		rc = scif_setup_loopback_qp(loopb_dev);
 		if (rc)
-			goto free_sdev;
-	} else {
-		scifdev = &scif_dev[sdev->dnode];
-		scifdev->sdev = sdev;
+			goto exit;
 	}
+
 	rc = scif_setup_intr_wq(scifdev);
 	if (rc)
 		goto destroy_loopb;
@@ -237,8 +196,6 @@ destroy_intr:
 destroy_loopb:
 	if (atomic_dec_and_test(&g_loopb_cnt))
 		scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
-free_sdev:
-	scif_destroy_scifdev();
 exit:
 	return rc;
 }
@@ -290,13 +247,6 @@ static void scif_remove(struct scif_hw_dev *sdev)
 	scifdev->sdev = NULL;
 }
 
-static struct scif_peer_driver scif_peer_driver = {
-	.driver.name =	KBUILD_MODNAME,
-	.driver.owner =	THIS_MODULE,
-	.probe = scif_peer_probe,
-	.remove = scif_peer_remove,
-};
-
 static struct scif_hw_dev_id id_table[] = {
 	{ MIC_SCIF_DEV, SCIF_DEV_ANY_ID },
 	{ 0 },
@@ -312,29 +262,54 @@ static struct scif_driver scif_driver = {
 
 static int _scif_init(void)
 {
-	spin_lock_init(&scif_info.eplock);
+	int rc;
+
+	mutex_init(&scif_info.eplock);
+	spin_lock_init(&scif_info.rmalock);
 	spin_lock_init(&scif_info.nb_connect_lock);
 	spin_lock_init(&scif_info.port_lock);
 	mutex_init(&scif_info.conflock);
 	mutex_init(&scif_info.connlock);
+	mutex_init(&scif_info.fencelock);
 	INIT_LIST_HEAD(&scif_info.uaccept);
 	INIT_LIST_HEAD(&scif_info.listen);
 	INIT_LIST_HEAD(&scif_info.zombie);
 	INIT_LIST_HEAD(&scif_info.connected);
 	INIT_LIST_HEAD(&scif_info.disconnected);
+	INIT_LIST_HEAD(&scif_info.rma);
+	INIT_LIST_HEAD(&scif_info.rma_tc);
+	INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup);
+	INIT_LIST_HEAD(&scif_info.fence);
 	INIT_LIST_HEAD(&scif_info.nb_connect_list);
 	init_waitqueue_head(&scif_info.exitwq);
+	scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT;
 	scif_info.en_msg_log = 0;
 	scif_info.p2p_enable = 1;
+	rc = scif_setup_scifdev();
+	if (rc)
+		goto error;
+	unaligned_cache = kmem_cache_create("Unaligned_DMA",
+					    SCIF_KMEM_UNALIGNED_BUF_SIZE,
+					    0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!unaligned_cache) {
+		rc = -ENOMEM;
+		goto free_sdev;
+	}
 	INIT_WORK(&scif_info.misc_work, scif_misc_handler);
+	INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler);
 	INIT_WORK(&scif_info.conn_work, scif_conn_handler);
 	idr_init(&scif_ports);
 	return 0;
+free_sdev:
+	scif_destroy_scifdev();
+error:
+	return rc;
 }
 
 static void _scif_exit(void)
 {
 	idr_destroy(&scif_ports);
+	kmem_cache_destroy(unaligned_cache);
 	scif_destroy_scifdev();
 }
 
@@ -344,15 +319,13 @@ static int __init scif_init(void)
 	int rc;
 
 	_scif_init();
+	iova_cache_get();
 	rc = scif_peer_bus_init();
 	if (rc)
 		goto exit;
-	rc = scif_peer_register_driver(&scif_peer_driver);
-	if (rc)
-		goto peer_bus_exit;
 	rc = scif_register_driver(&scif_driver);
 	if (rc)
-		goto unreg_scif_peer;
+		goto peer_bus_exit;
 	rc = misc_register(mdev);
 	if (rc)
 		goto unreg_scif;
@@ -360,8 +333,6 @@ static int __init scif_init(void)
 	return 0;
 unreg_scif:
 	scif_unregister_driver(&scif_driver);
-unreg_scif_peer:
-	scif_peer_unregister_driver(&scif_peer_driver);
 peer_bus_exit:
 	scif_peer_bus_exit();
 exit:
@@ -374,8 +345,8 @@ static void __exit scif_exit(void)
 	scif_exit_debugfs();
 	misc_deregister(&scif_info.mdev);
 	scif_unregister_driver(&scif_driver);
-	scif_peer_unregister_driver(&scif_peer_driver);
 	scif_peer_bus_exit();
+	iova_cache_put();
 	_scif_exit();
 }
 
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
index 580bc63e1..a08f0b600 100644
--- a/drivers/misc/mic/scif/scif_main.h
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -22,15 +22,18 @@
 #include <linux/pci.h>
 #include <linux/miscdevice.h>
 #include <linux/dmaengine.h>
+#include <linux/iova.h>
+#include <linux/anon_inodes.h>
 #include <linux/file.h>
+#include <linux/vmalloc.h>
 #include <linux/scif.h>
-
 #include "../common/mic_dev.h"
 
 #define SCIF_MGMT_NODE 0
 #define SCIF_DEFAULT_WATCHDOG_TO 30
 #define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ)
 #define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ)
+#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000
 
 /*
  * Generic state used for certain node QP message exchanges
@@ -73,13 +76,21 @@ enum scif_msg_state {
  * @loopb_work: Used for submitting work to loopb_wq
  * @loopb_recv_q: List of messages received on the loopb_wq
  * @card_initiated_exit: set when the card has initiated the exit
+ * @rmalock: Synchronize access to RMA operations
+ * @fencelock: Synchronize access to list of remote fences requested.
+ * @rma: List of temporary registered windows to be destroyed.
+ * @rma_tc: List of temporary registered & cached Windows to be destroyed
+ * @fence: List of remote fence requests
+ * @mmu_notif_work: Work for registration caching MMU notifier workqueue
+ * @mmu_notif_cleanup: List of temporary cached windows for reg cache
+ * @rma_tc_limit: RMA temporary cache limit
  */
 struct scif_info {
 	u8 nodeid;
 	u8 maxid;
 	u8 total;
 	u32 nr_zombies;
-	spinlock_t eplock;
+	struct mutex eplock;
 	struct mutex connlock;
 	spinlock_t nb_connect_lock;
 	spinlock_t port_lock;
@@ -102,6 +113,14 @@ struct scif_info {
 	struct work_struct loopb_work;
 	struct list_head loopb_recv_q;
 	bool card_initiated_exit;
+	spinlock_t rmalock;
+	struct mutex fencelock;
+	struct list_head rma;
+	struct list_head rma_tc;
+	struct list_head fence;
+	struct work_struct mmu_notif_work;
+	struct list_head mmu_notif_cleanup;
+	unsigned long rma_tc_limit;
 };
 
 /*
@@ -139,7 +158,7 @@ struct scif_p2p_info {
  * @db: doorbell the peer will trigger to generate an interrupt on self
  * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer
  * @cookie: Cookie received while registering the interrupt handler
- * init_msg_work: work scheduled for SCIF_INIT message processing
+ * @peer_add_work: Work for handling device_add for peer devices
  * @p2p_dwork: Delayed work to enable polling for P2P state
  * @qp_dwork: Delayed work for enabling polling for remote QP information
  * @p2p_retry: Number of times to retry polling of P2P state
@@ -152,6 +171,8 @@ struct scif_p2p_info {
  * @disconn_rescnt: Keeps track of number of node remove requests sent
  * @exit: Status of exit message
  * @qp_dma_addr: Queue pair DMA address passed to the peer
+ * @dma_ch_idx: Round robin index for DMA channels
+ * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's
 */
 struct scif_dev {
 	u8 node;
@@ -165,7 +186,7 @@ struct scif_dev {
 	int db;
 	int rdb;
 	struct mic_irq *cookie;
-	struct work_struct init_msg_work;
+	struct work_struct peer_add_work;
 	struct delayed_work p2p_dwork;
 	struct delayed_work qp_dwork;
 	int p2p_retry;
@@ -178,17 +199,25 @@ struct scif_dev {
 	atomic_t disconn_rescnt;
 	enum scif_msg_state exit;
 	dma_addr_t qp_dma_addr;
+	int dma_ch_idx;
+	struct dma_pool *signal_pool;
 };
 
+extern bool scif_reg_cache_enable;
+extern bool scif_ulimit_check;
 extern struct scif_info scif_info;
 extern struct idr scif_ports;
+extern struct bus_type scif_peer_bus;
 extern struct scif_dev *scif_dev;
 extern const struct file_operations scif_fops;
+extern const struct file_operations scif_anon_fops;
 
 /* Size of the RB for the Node QP */
 #define SCIF_NODE_QP_SIZE 0x10000
 
 #include "scif_nodeqp.h"
+#include "scif_rma.h"
+#include "scif_rma_list.h"
 
 /*
  * scifdev_self:
diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h
index 20e50b4e1..3e86360ba 100644
--- a/drivers/misc/mic/scif/scif_map.h
+++ b/drivers/misc/mic/scif/scif_map.h
@@ -80,7 +80,7 @@ scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev,
 		  size_t size)
 {
 	if (!scifdev_self(scifdev)) {
-		if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr)
+		if (scifdev_is_p2p(scifdev))
 			local = local - scifdev->base_addr;
 		dma_unmap_single(&scifdev->sdev->dev, local,
 				 size, DMA_BIDIRECTIONAL);
@@ -110,4 +110,27 @@ scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev)
 		sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt);
 	}
 }
+
+static __always_inline int
+scif_map_page(dma_addr_t *dma_handle, struct page *page,
+	      struct scif_dev *scifdev)
+{
+	int err = 0;
+
+	if (scifdev_self(scifdev)) {
+		*dma_handle = page_to_phys(page);
+	} else {
+		struct scif_hw_dev *sdev = scifdev->sdev;
+		*dma_handle = dma_map_page(&sdev->dev,
+					   page, 0x0, PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&sdev->dev, *dma_handle))
+			err = -ENOMEM;
+		else if (scifdev_is_p2p(scifdev))
+			*dma_handle = *dma_handle + scifdev->base_addr;
+	}
+	if (err)
+		*dma_handle = 0;
+	return err;
+}
 #endif  /* SCIF_MAP_H */
diff --git a/drivers/misc/mic/scif/scif_mmap.c b/drivers/misc/mic/scif/scif_mmap.c
new file mode 100644
index 000000000..49cb8f7b4
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_mmap.c
@@ -0,0 +1,699 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+
+/*
+ * struct scif_vma_info - Information about a remote memory mapping
+ *			  created via scif_mmap(..)
+ * @vma: VM area struct
+ * @list: link to list of active vmas
+ */
+struct scif_vma_info {
+	struct vm_area_struct *vma;
+	struct list_head list;
+};
+
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_window *recv_window =
+		(struct scif_window *)msg->payload[0];
+	struct scif_endpt *ep;
+
+	ep = (struct scif_endpt *)recv_window->ep;
+	req.out_window = &window;
+	req.offset = recv_window->offset;
+	req.prot = recv_window->prot;
+	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.reg_list;
+	msg->payload[0] = ep->remote_ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	if (scif_query_window(&req)) {
+		dev_err(&scifdev->sdev->dev,
+			"%s %d -ENXIO\n", __func__, __LINE__);
+		msg->uop = SCIF_UNREGISTER_ACK;
+		goto error;
+	}
+
+	scif_put_window(window, window->nr_pages);
+
+	if (!window->ref_count) {
+		atomic_inc(&ep->rma_info.tw_refcount);
+		ep->rma_info.async_list_del = 1;
+		list_del_init(&window->list);
+		scif_free_window_offset(ep, window, window->offset);
+	}
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (window && !window->ref_count)
+		scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/*
+ * Remove valid remote memory mappings created via scif_mmap(..) from the
+ * process address space since the remote node is lost
+ */
+static void __scif_zap_mmaps(struct scif_endpt *ep)
+{
+	struct list_head *item;
+	struct scif_vma_info *info;
+	struct vm_area_struct *vma;
+	unsigned long size;
+
+	spin_lock(&ep->lock);
+	list_for_each(item, &ep->rma_info.vma_list) {
+		info = list_entry(item, struct scif_vma_info, list);
+		vma = info->vma;
+		size = vma->vm_end - vma->vm_start;
+		zap_vma_ptes(vma, vma->vm_start, size);
+		dev_dbg(scif_info.mdev.this_device,
+			"%s ep %p zap vma %p size 0x%lx\n",
+			__func__, ep, info->vma, size);
+	}
+	spin_unlock(&ep->lock);
+}
+
+/*
+ * Traverse the list of endpoints for a particular remote node and
+ * zap valid remote memory mappings since the remote node is lost
+ */
+static void _scif_zap_mmaps(int node, struct list_head *head)
+{
+	struct scif_endpt *ep;
+	struct list_head *item;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each(item, head) {
+		ep = list_entry(item, struct scif_endpt, list);
+		if (ep->remote_dev->node == node)
+			__scif_zap_mmaps(ep);
+	}
+	mutex_unlock(&scif_info.connlock);
+}
+
+/*
+ * Wrapper for removing remote memory mappings for a particular node. This API
+ * is called by peer nodes as part of handling a lost node.
+ */
+void scif_zap_mmaps(int node)
+{
+	_scif_zap_mmaps(node, &scif_info.connected);
+	_scif_zap_mmaps(node, &scif_info.disconnected);
+}
+
+/*
+ * This API is only called while handling a lost node:
+ * a) Remote node is dead.
+ * b) Remote memory mappings have been zapped
+ * So we can traverse the remote_reg_list without any locks. Since
+ * the window has not yet been unregistered we can drop the ref count
+ * and queue it to the cleanup thread.
+ */
+static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
+{
+	struct list_head *pos, *tmp;
+	struct scif_window *window;
+
+	list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
+		window = list_entry(pos, struct scif_window, list);
+		if (window->ref_count)
+			scif_put_window(window, window->nr_pages);
+		else
+			dev_err(scif_info.mdev.this_device,
+				"%s %d unexpected\n",
+				__func__, __LINE__);
+		if (!window->ref_count) {
+			atomic_inc(&ep->rma_info.tw_refcount);
+			list_del_init(&window->list);
+			scif_queue_for_cleanup(window, &scif_info.rma);
+		}
+	}
+}
+
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node)
+{
+	struct scif_endpt *ep;
+	struct list_head *item;
+
+	mutex_lock(&scif_info.eplock);
+	list_for_each(item, &scif_info.zombie) {
+		ep = list_entry(item, struct scif_endpt, list);
+		if (ep->remote_dev && ep->remote_dev->node == node)
+			__scif_cleanup_rma_for_zombies(ep);
+	}
+	mutex_unlock(&scif_info.eplock);
+	flush_work(&scif_info.misc_work);
+}
+
+/* Insert the VMA into the per endpoint VMA list */
+static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+	struct scif_vma_info *info;
+	int err = 0;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		err = -ENOMEM;
+		goto done;
+	}
+	info->vma = vma;
+	spin_lock(&ep->lock);
+	list_add_tail(&info->list, &ep->rma_info.vma_list);
+	spin_unlock(&ep->lock);
+done:
+	return err;
+}
+
+/* Delete the VMA from the per endpoint VMA list */
+static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+	struct list_head *item;
+	struct scif_vma_info *info;
+
+	spin_lock(&ep->lock);
+	list_for_each(item, &ep->rma_info.vma_list) {
+		info = list_entry(item, struct scif_vma_info, list);
+		if (info->vma == vma) {
+			list_del(&info->list);
+			kfree(info);
+			break;
+		}
+	}
+	spin_unlock(&ep->lock);
+}
+
+static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
+{
+	struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+	phys_addr_t out_phys, apt_base = 0;
+
+	/*
+	 * If the DMA address is card relative then we need to add the
+	 * aperture base for mmap to work correctly
+	 */
+	if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
+		apt_base = sdev->aper->pa;
+	out_phys = apt_base + phys;
+	return out_phys;
+}
+
+int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
+		   struct scif_range **pages)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	int nr_pages, err, i;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
+		ep, offset, len);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	if (!len || (offset < 0) ||
+	    (offset + len < offset) ||
+	    (ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (ALIGN(len, PAGE_SIZE) != len))
+		return -EINVAL;
+
+	nr_pages = len >> PAGE_SHIFT;
+
+	req.out_window = &window;
+	req.offset = offset;
+	req.prot = 0;
+	req.nr_bytes = len;
+	req.type = SCIF_WINDOW_SINGLE;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+
+	/* Allocate scif_range */
+	*pages = kzalloc(sizeof(**pages), GFP_KERNEL);
+	if (!*pages) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* Allocate phys addr array */
+	(*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
+	if (!((*pages)->phys_addr)) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
+		/* Allocate virtual address array */
+		((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
+		if (!(*pages)->va) {
+			err = -ENOMEM;
+			goto error;
+		}
+	}
+	/* Populate the values */
+	(*pages)->cookie = window;
+	(*pages)->nr_pages = nr_pages;
+	(*pages)->prot_flags = window->prot;
+
+	for (i = 0; i < nr_pages; i++) {
+		(*pages)->phys_addr[i] =
+			__scif_off_to_dma_addr(window, offset +
+					       (i * PAGE_SIZE));
+		(*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
+							ep);
+		if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
+			(*pages)->va[i] =
+				ep->remote_dev->sdev->aper->va +
+				(*pages)->phys_addr[i] -
+				ep->remote_dev->sdev->aper->pa;
+	}
+
+	scif_get_window(window, nr_pages);
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (err) {
+		if (*pages) {
+			scif_free((*pages)->phys_addr,
+				  nr_pages * sizeof(dma_addr_t));
+			scif_free((*pages)->va,
+				  nr_pages * sizeof(void *));
+			kfree(*pages);
+			*pages = NULL;
+		}
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_get_pages);
+
+int scif_put_pages(struct scif_range *pages)
+{
+	struct scif_endpt *ep;
+	struct scif_window *window;
+	struct scifmsg msg;
+
+	if (!pages || !pages->cookie)
+		return -EINVAL;
+
+	window = pages->cookie;
+
+	if (!window || window->magic != SCIFEP_MAGIC)
+		return -EINVAL;
+
+	ep = (struct scif_endpt *)window->ep;
+	/*
+	 * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
+	 * callee should be allowed to release references to the pages,
+	 * else the endpoint was not connected in the first place,
+	 * hence the ENOTCONN.
+	 */
+	if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
+		return -ENOTCONN;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+
+	scif_put_window(window, pages->nr_pages);
+
+	/* Initiate window destruction if ref count is zero */
+	if (!window->ref_count) {
+		list_del(&window->list);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		scif_drain_dma_intr(ep->remote_dev->sdev,
+				    ep->rma_info.dma_chan);
+		/* Inform the peer about this window being destroyed. */
+		msg.uop = SCIF_MUNMAP;
+		msg.src = ep->port;
+		msg.payload[0] = window->peer_window;
+		/* No error handling for notification messages */
+		scif_nodeqp_send(ep->remote_dev, &msg);
+		/* Destroy this window from the peer's registered AS */
+		scif_destroy_remote_window(window);
+	} else {
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+
+	scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
+	scif_free(pages->va, pages->nr_pages * sizeof(void *));
+	kfree(pages);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scif_put_pages);
+
+/*
+ * scif_rma_list_mmap:
+ *
+ * Traverse the remote registration list starting from start_window:
+ * 1) Create VtoP mappings via remap_pfn_range(..)
+ * 2) Once step 1) and 2) complete successfully then traverse the range of
+ *    windows again and bump the reference count.
+ * RMA lock must be held.
+ */
+static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
+			      int nr_pages, struct vm_area_struct *vma)
+{
+	s64 end_offset, loop_offset = offset;
+	struct scif_window *window = start_window;
+	int loop_nr_pages, nr_pages_left = nr_pages;
+	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+	struct list_head *head = &ep->rma_info.remote_reg_list;
+	int i, err = 0;
+	dma_addr_t phys_addr;
+	struct scif_window_iter src_win_iter;
+	size_t contig_bytes = 0;
+
+	might_sleep();
+	list_for_each_entry_from(window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_init_window_iter(window, &src_win_iter);
+		for (i = 0; i < loop_nr_pages; i++) {
+			phys_addr = scif_off_to_dma_addr(window, loop_offset,
+							 &contig_bytes,
+							 &src_win_iter);
+			phys_addr = scif_get_phys(phys_addr, ep);
+			err = remap_pfn_range(vma,
+					      vma->vm_start +
+					      loop_offset - offset,
+					      phys_addr >> PAGE_SHIFT,
+					      PAGE_SIZE,
+					      vma->vm_page_prot);
+			if (err)
+				goto error;
+			loop_offset += PAGE_SIZE;
+		}
+		nr_pages_left -= loop_nr_pages;
+		if (!nr_pages_left)
+			break;
+	}
+	/*
+	 * No more failures expected. Bump up the ref count for all
+	 * the windows. Another traversal from start_window required
+	 * for handling errors encountered across windows during
+	 * remap_pfn_range(..).
+	 */
+	loop_offset = offset;
+	nr_pages_left = nr_pages;
+	window = start_window;
+	head = &ep->rma_info.remote_reg_list;
+	list_for_each_entry_from(window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_get_window(window, loop_nr_pages);
+		nr_pages_left -= loop_nr_pages;
+		loop_offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages_left)
+			break;
+	}
+error:
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+
+/*
+ * scif_rma_list_munmap:
+ *
+ * Traverse the remote registration list starting from window:
+ * 1) Decrement ref count.
+ * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
+ * RMA lock must be held.
+ */
+static void scif_rma_list_munmap(struct scif_window *start_window,
+				 s64 offset, int nr_pages)
+{
+	struct scifmsg msg;
+	s64 loop_offset = offset, end_offset;
+	int loop_nr_pages, nr_pages_left = nr_pages;
+	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+	struct list_head *head = &ep->rma_info.remote_reg_list;
+	struct scif_window *window = start_window, *_window;
+
+	msg.uop = SCIF_MUNMAP;
+	msg.src = ep->port;
+	loop_offset = offset;
+	nr_pages_left = nr_pages;
+	list_for_each_entry_safe_from(window, _window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_put_window(window, loop_nr_pages);
+		if (!window->ref_count) {
+			struct scif_dev *rdev = ep->remote_dev;
+
+			scif_drain_dma_intr(rdev->sdev,
+					    ep->rma_info.dma_chan);
+			/* Inform the peer about this munmap */
+			msg.payload[0] = window->peer_window;
+			/* No error handling for Notification messages. */
+			scif_nodeqp_send(ep->remote_dev, &msg);
+			list_del(&window->list);
+			/* Destroy this window from the peer's registered AS */
+			scif_destroy_remote_window(window);
+		}
+		nr_pages_left -= loop_nr_pages;
+		loop_offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages_left)
+			break;
+	}
+}
+
+/*
+ * The private data field of each VMA used to mmap a remote window
+ * points to an instance of struct vma_pvt
+ */
+struct vma_pvt {
+	struct scif_endpt *ep;	/* End point for remote window */
+	s64 offset;		/* offset within remote window */
+	bool valid_offset;	/* offset is valid only if the original
+				 * mmap request was for a single page
+				 * else the offset within the vma is
+				 * the correct offset
+				 */
+	struct kref ref;
+};
+
+static void vma_pvt_release(struct kref *ref)
+{
+	struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
+
+	kfree(vmapvt);
+}
+
+/**
+ * scif_vma_open - VMA open driver callback
+ * @vma: VMM memory area.
+ * The open method is called by the kernel to allow the subsystem implementing
+ * the VMA to initialize the area. This method is invoked any time a new
+ * reference to the VMA is made (when a process forks, for example).
+ * The one exception happens when the VMA is first created by mmap;
+ * in this case, the driver's mmap method is called instead.
+ * This function is also invoked when an existing VMA is split by the kernel
+ * due to a call to munmap on a subset of the VMA resulting in two VMAs.
+ * The kernel invokes this function only on one of the two VMAs.
+ */
+static void scif_vma_open(struct vm_area_struct *vma)
+{
+	struct vma_pvt *vmapvt = vma->vm_private_data;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
+		vma->vm_start, vma->vm_end);
+	scif_insert_vma(vmapvt->ep, vma);
+	kref_get(&vmapvt->ref);
+}
+
+/**
+ * scif_munmap - VMA close driver callback.
+ * @vma: VMM memory area.
+ * When an area is destroyed, the kernel calls its close operation.
+ * Note that there's no usage count associated with VMA's; the area
+ * is opened and closed exactly once by each process that uses it.
+ */
+static void scif_munmap(struct vm_area_struct *vma)
+{
+	struct scif_endpt *ep;
+	struct vma_pvt *vmapvt = vma->vm_private_data;
+	int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	s64 offset;
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	int err;
+
+	might_sleep();
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
+		vma->vm_start, vma->vm_end);
+	ep = vmapvt->ep;
+	offset = vmapvt->valid_offset ? vmapvt->offset :
+		(vma->vm_pgoff) << PAGE_SHIFT;
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
+		ep, nr_pages, offset);
+	req.out_window = &window;
+	req.offset = offset;
+	req.nr_bytes = vma->vm_end - vma->vm_start;
+	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+	req.type = SCIF_WINDOW_PARTIAL;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+
+	err = scif_query_window(&req);
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	else
+		scif_rma_list_munmap(window, offset, nr_pages);
+
+	mutex_unlock(&ep->rma_info.rma_lock);
+	/*
+	 * The kernel probably zeroes these out but we still want
+	 * to clean up our own mess just in case.
+	 */
+	vma->vm_ops = NULL;
+	vma->vm_private_data = NULL;
+	kref_put(&vmapvt->ref, vma_pvt_release);
+	scif_delete_vma(ep, vma);
+}
+
+static const struct vm_operations_struct scif_vm_ops = {
+	.open = scif_vma_open,
+	.close = scif_munmap,
+};
+
+/**
+ * scif_mmap - Map pages in virtual address space to a remote window.
+ * @vma: VMM memory area.
+ * @epd: endpoint descriptor
+ *
+ * Return: Upon successful completion, scif_mmap() returns zero
+ * else an apt error is returned as documented in scif.h
+ */
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
+	int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int err;
+	struct vma_pvt *vmapvt;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
+		ep, start_offset, nr_pages);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	might_sleep();
+
+	err = scif_insert_vma(ep, vma);
+	if (err)
+		return err;
+
+	vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
+	if (!vmapvt) {
+		scif_delete_vma(ep, vma);
+		return -ENOMEM;
+	}
+
+	vmapvt->ep = ep;
+	kref_init(&vmapvt->ref);
+
+	req.out_window = &window;
+	req.offset = start_offset;
+	req.nr_bytes = vma->vm_end - vma->vm_start;
+	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+	req.type = SCIF_WINDOW_PARTIAL;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unlock;
+	}
+
+	/* Default prot for loopback */
+	if (!scifdev_self(ep->remote_dev))
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+	/*
+	 * VM_DONTCOPY - Do not copy this vma on fork
+	 * VM_DONTEXPAND - Cannot expand with mremap()
+	 * VM_RESERVED - Count as reserved_vm like IO
+	 * VM_PFNMAP - Page-ranges managed without "struct page"
+	 * VM_IO - Memory mapped I/O or similar
+	 *
+	 * We do not want to copy this VMA automatically on a fork(),
+	 * expand this VMA due to mremap() or swap out these pages since
+	 * the VMA is actually backed by physical pages in the remote
+	 * node's physical memory and not via a struct page.
+	 */
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
+
+	if (!scifdev_self(ep->remote_dev))
+		vma->vm_flags |= VM_IO | VM_PFNMAP;
+
+	/* Map this range of windows */
+	err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unlock;
+	}
+	/* Set up the driver call back */
+	vma->vm_ops = &scif_vm_ops;
+	vma->vm_private_data = vmapvt;
+error_unlock:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (err) {
+		kfree(vmapvt);
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		scif_delete_vma(ep, vma);
+	}
+	return err;
+}
diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c
index 9b4c5382d..79f26a02a 100644
--- a/drivers/misc/mic/scif/scif_nm.c
+++ b/drivers/misc/mic/scif/scif_nm.c
@@ -34,6 +34,7 @@ static void scif_invalidate_ep(int node)
 	list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 		ep = list_entry(pos, struct scif_endpt, list);
 		if (ep->remote_dev->node == node) {
+			scif_unmap_all_windows(ep);
 			spin_lock(&ep->lock);
 			scif_cleanup_ep_qp(ep);
 			spin_unlock(&ep->lock);
@@ -50,6 +51,7 @@ static void scif_invalidate_ep(int node)
 			wake_up_interruptible(&ep->sendwq);
 			wake_up_interruptible(&ep->recvwq);
 			spin_unlock(&ep->lock);
+			scif_unmap_all_windows(ep);
 		}
 	}
 	mutex_unlock(&scif_info.connlock);
@@ -61,8 +63,8 @@ void scif_free_qp(struct scif_dev *scifdev)
 
 	if (!qp)
 		return;
-	scif_free_coherent((void *)qp->inbound_q.rb_base,
-			   qp->local_buf, scifdev, qp->inbound_q.size);
+	scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size);
+	kfree(qp->inbound_q.rb_base);
 	scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp));
 	kfree(scifdev->qpairs);
 	scifdev->qpairs = NULL;
@@ -125,8 +127,12 @@ void scif_cleanup_scifdev(struct scif_dev *dev)
 		}
 		scif_destroy_intr_wq(dev);
 	}
+	flush_work(&scif_info.misc_work);
 	scif_destroy_p2p(dev);
 	scif_invalidate_ep(dev->node);
+	scif_zap_mmaps(dev->node);
+	scif_cleanup_rma_for_zombies(dev->node);
+	flush_work(&scif_info.misc_work);
 	scif_send_acks(dev);
 	if (!dev->node && scif_info.card_initiated_exit) {
 		/*
@@ -147,14 +153,8 @@ void scif_cleanup_scifdev(struct scif_dev *dev)
 void scif_handle_remove_node(int node)
 {
 	struct scif_dev *scifdev = &scif_dev[node];
-	struct scif_peer_dev *spdev;
-
-	rcu_read_lock();
-	spdev = rcu_dereference(scifdev->spdev);
-	rcu_read_unlock();
-	if (spdev)
-		scif_peer_unregister_device(spdev);
-	else
+
+	if (scif_peer_unregister_device(scifdev))
 		scif_send_acks(scifdev);
 }
 
diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c
index 6dfdae345..c66ca1a58 100644
--- a/drivers/misc/mic/scif/scif_nodeqp.c
+++ b/drivers/misc/mic/scif/scif_nodeqp.c
@@ -105,18 +105,22 @@
 int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
 			  int local_size, struct scif_dev *scifdev)
 {
-	void *local_q = NULL;
+	void *local_q = qp->inbound_q.rb_base;
 	int err = 0;
 	u32 tmp_rd = 0;
 
 	spin_lock_init(&qp->send_lock);
 	spin_lock_init(&qp->recv_lock);
 
-	local_q = kzalloc(local_size, GFP_KERNEL);
+	/* Allocate rb only if not already allocated */
 	if (!local_q) {
-		err = -ENOMEM;
-		return err;
+		local_q = kzalloc(local_size, GFP_KERNEL);
+		if (!local_q) {
+			err = -ENOMEM;
+			return err;
+		}
 	}
+
 	err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
 	if (err)
 		goto kfree;
@@ -260,6 +264,11 @@ int scif_setup_qp_connect_response(struct scif_dev *scifdev,
 		     r_buf,
 		     get_count_order(remote_size));
 	/*
+	 * Because the node QP may already be processing an INIT message, set
+	 * the read pointer so the cached read offset isn't lost
+	 */
+	qp->remote_qp->local_read = qp->inbound_q.current_read_offset;
+	/*
 	 * resetup the inbound_q now that we know where the
 	 * inbound_read really is.
 	 */
@@ -426,6 +435,21 @@ free_p2p:
 	return NULL;
 }
 
+/* Uninitialize and release resources from a p2p mapping */
+static void scif_deinit_p2p_info(struct scif_dev *scifdev,
+				 struct scif_p2p_info *p2p)
+{
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+		     p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+		     p2p->sg_nentries[SCIF_PPI_APER], DMA_BIDIRECTIONAL);
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+	kfree(p2p);
+}
+
 /**
  * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
  * @dst: Destination node
@@ -468,8 +492,10 @@ static void scif_node_connect(struct scif_dev *scifdev, int dst)
 	if (!p2p_ij)
 		return;
 	p2p_ji = scif_init_p2p_info(dev_j, dev_i);
-	if (!p2p_ji)
+	if (!p2p_ji) {
+		scif_deinit_p2p_info(dev_i, p2p_ij);
 		return;
+	}
 	list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p);
 	list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p);
 
@@ -529,27 +555,6 @@ static void scif_p2p_setup(void)
 	}
 }
 
-void scif_qp_response_ack(struct work_struct *work)
-{
-	struct scif_dev *scifdev = container_of(work, struct scif_dev,
-						init_msg_work);
-	struct scif_peer_dev *spdev;
-
-	/* Drop the INIT message if it has already been received */
-	if (_scifdev_alive(scifdev))
-		return;
-
-	spdev = scif_peer_register_device(scifdev);
-	if (IS_ERR(spdev))
-		return;
-
-	if (scif_is_mgmt_node()) {
-		mutex_lock(&scif_info.conflock);
-		scif_p2p_setup();
-		mutex_unlock(&scif_info.conflock);
-	}
-}
-
 static char *message_types[] = {"BAD",
 				"INIT",
 				"EXIT",
@@ -568,7 +573,29 @@ static char *message_types[] = {"BAD",
 				"DISCNT_ACK",
 				"CLIENT_SENT",
 				"CLIENT_RCVD",
-				"SCIF_GET_NODE_INFO"};
+				"SCIF_GET_NODE_INFO",
+				"REGISTER",
+				"REGISTER_ACK",
+				"REGISTER_NACK",
+				"UNREGISTER",
+				"UNREGISTER_ACK",
+				"UNREGISTER_NACK",
+				"ALLOC_REQ",
+				"ALLOC_GNT",
+				"ALLOC_REJ",
+				"FREE_PHYS",
+				"FREE_VIRT",
+				"MUNMAP",
+				"MARK",
+				"MARK_ACK",
+				"MARK_NACK",
+				"WAIT",
+				"WAIT_ACK",
+				"WAIT_NACK",
+				"SIGNAL_LOCAL",
+				"SIGNAL_REMOTE",
+				"SIG_ACK",
+				"SIG_NACK"};
 
 static void
 scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg,
@@ -662,10 +689,16 @@ int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
  *
  * Work queue handler for servicing miscellaneous SCIF tasks.
  * Examples include:
- * 1) Cleanup of zombie endpoints.
+ * 1) Remote fence requests.
+ * 2) Destruction of temporary registered windows
+ *    created during scif_vreadfrom()/scif_vwriteto().
+ * 3) Cleanup of zombie endpoints.
  */
 void scif_misc_handler(struct work_struct *work)
 {
+	scif_rma_handle_remote_fences();
+	scif_rma_destroy_windows();
+	scif_rma_destroy_tcw_invalid();
 	scif_cleanup_zombie_epd();
 }
 
@@ -682,13 +715,14 @@ scif_init(struct scif_dev *scifdev, struct scifmsg *msg)
 	 * address to complete initializing the inbound_q.
 	 */
 	flush_delayed_work(&scifdev->qp_dwork);
-	/*
-	 * Delegate the peer device registration to a workqueue, otherwise if
-	 * SCIF client probe (called during peer device registration) calls
-	 * scif_connect(..), it will block the message processing thread causing
-	 * a deadlock.
-	 */
-	schedule_work(&scifdev->init_msg_work);
+
+	scif_peer_register_device(scifdev);
+
+	if (scif_is_mgmt_node()) {
+		mutex_lock(&scif_info.conflock);
+		scif_p2p_setup();
+		mutex_unlock(&scif_info.conflock);
+	}
 }
 
 /**
@@ -838,13 +872,13 @@ void scif_poll_qp_state(struct work_struct *work)
 				      msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT));
 		return;
 	}
-	scif_peer_register_device(peerdev);
 	return;
 timeout:
 	dev_err(&peerdev->sdev->dev,
 		"%s %d remote node %d offline,  state = 0x%x\n",
 		__func__, __LINE__, peerdev->node, qp->qp_state);
 	qp->remote_qp->qp_state = SCIF_QP_OFFLINE;
+	scif_peer_unregister_device(peerdev);
 	scif_cleanup_scifdev(peerdev);
 }
 
@@ -894,6 +928,9 @@ scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg)
 		goto local_error;
 	peerdev->rdb = msg->payload[2];
 	qp->remote_qp->qp_state = SCIF_QP_ONLINE;
+
+	scif_peer_register_device(peerdev);
+
 	schedule_delayed_work(&peerdev->p2p_dwork, 0);
 	return;
 local_error:
@@ -1007,6 +1044,27 @@ static void (*scif_intr_func[SCIF_MAX_MSG + 1])
 	scif_clientsend,	/* SCIF_CLIENT_SENT */
 	scif_clientrcvd,	/* SCIF_CLIENT_RCVD */
 	scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */
+	scif_recv_reg,		/* SCIF_REGISTER */
+	scif_recv_reg_ack,	/* SCIF_REGISTER_ACK */
+	scif_recv_reg_nack,	/* SCIF_REGISTER_NACK */
+	scif_recv_unreg,	/* SCIF_UNREGISTER */
+	scif_recv_unreg_ack,	/* SCIF_UNREGISTER_ACK */
+	scif_recv_unreg_nack,	/* SCIF_UNREGISTER_NACK */
+	scif_alloc_req,		/* SCIF_ALLOC_REQ */
+	scif_alloc_gnt_rej,	/* SCIF_ALLOC_GNT */
+	scif_alloc_gnt_rej,	/* SCIF_ALLOC_REJ */
+	scif_free_virt,		/* SCIF_FREE_VIRT */
+	scif_recv_munmap,	/* SCIF_MUNMAP */
+	scif_recv_mark,		/* SCIF_MARK */
+	scif_recv_mark_resp,	/* SCIF_MARK_ACK */
+	scif_recv_mark_resp,	/* SCIF_MARK_NACK */
+	scif_recv_wait,		/* SCIF_WAIT */
+	scif_recv_wait_resp,	/* SCIF_WAIT_ACK */
+	scif_recv_wait_resp,	/* SCIF_WAIT_NACK */
+	scif_recv_sig_local,	/* SCIF_SIG_LOCAL */
+	scif_recv_sig_remote,	/* SCIF_SIG_REMOTE */
+	scif_recv_sig_resp,	/* SCIF_SIG_ACK */
+	scif_recv_sig_resp,	/* SCIF_SIG_NACK */
 };
 
 /**
@@ -1169,7 +1227,6 @@ int scif_setup_loopback_qp(struct scif_dev *scifdev)
 	int err = 0;
 	void *local_q;
 	struct scif_qp *qp;
-	struct scif_peer_dev *spdev;
 
 	err = scif_setup_intr_wq(scifdev);
 	if (err)
@@ -1216,15 +1273,11 @@ int scif_setup_loopback_qp(struct scif_dev *scifdev)
 		     &qp->local_write,
 		     local_q, get_count_order(SCIF_NODE_QP_SIZE));
 	scif_info.nodeid = scifdev->node;
-	spdev = scif_peer_register_device(scifdev);
-	if (IS_ERR(spdev)) {
-		err = PTR_ERR(spdev);
-		goto free_local_q;
-	}
+
+	scif_peer_register_device(scifdev);
+
 	scif_info.loopb_dev = scifdev;
 	return err;
-free_local_q:
-	kfree(local_q);
 free_qpairs:
 	kfree(scifdev->qpairs);
 destroy_loopb_wq:
@@ -1243,13 +1296,7 @@ exit:
  */
 int scif_destroy_loopback_qp(struct scif_dev *scifdev)
 {
-	struct scif_peer_dev *spdev;
-
-	rcu_read_lock();
-	spdev = rcu_dereference(scifdev->spdev);
-	rcu_read_unlock();
-	if (spdev)
-		scif_peer_unregister_device(spdev);
+	scif_peer_unregister_device(scifdev);
 	destroy_workqueue(scif_info.loopb_wq);
 	scif_destroy_intr_wq(scifdev);
 	kfree(scifdev->qpairs->outbound_q.rb_base);
diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h
index 6c0ed6783..958962731 100644
--- a/drivers/misc/mic/scif/scif_nodeqp.h
+++ b/drivers/misc/mic/scif/scif_nodeqp.h
@@ -74,7 +74,28 @@
 #define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */
 #define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */
 #define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/
-#define SCIF_MAX_MSG SCIF_GET_NODE_INFO
+#define SCIF_REGISTER 19 /* Tell peer about a new registered window */
+#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */
+#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */
+#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */
+#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */
+#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */
+#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */
+#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */
+#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */
+#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */
+#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */
+#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */
+#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */
+#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */
+#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */
+#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */
+#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */
+#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */
+#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */
+#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */
+#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */
+#define SCIF_MAX_MSG SCIF_SIG_NACK
 
 /*
  * struct scifmsg - Node QP message format
@@ -92,6 +113,24 @@ struct scifmsg {
 } __packed;
 
 /*
+ * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request
+ * the remote note to allocate memory
+ *
+ * phys_addr: Physical address of the buffer
+ * vaddr: Virtual address of the buffer
+ * size: Size of the buffer
+ * state: Current state
+ * allocwq: wait queue for status
+ */
+struct scif_allocmsg {
+	dma_addr_t phys_addr;
+	unsigned long vaddr;
+	size_t size;
+	enum scif_msg_state state;
+	wait_queue_head_t allocwq;
+};
+
+/*
  * struct scif_qp - Node Queue Pair
  *
  * Interesting structure -- a little difficult because we can only
@@ -158,7 +197,6 @@ int scif_setup_qp_connect_response(struct scif_dev *scifdev,
 int scif_setup_loopback_qp(struct scif_dev *scifdev);
 int scif_destroy_loopback_qp(struct scif_dev *scifdev);
 void scif_poll_qp_state(struct work_struct *work);
-void scif_qp_response_ack(struct work_struct *work);
 void scif_destroy_p2p(struct scif_dev *scifdev);
 void scif_send_exit(struct scif_dev *scifdev);
 static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev)
diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c
index 589ae9ad2..6ffa3bdbd 100644
--- a/drivers/misc/mic/scif/scif_peer_bus.c
+++ b/drivers/misc/mic/scif/scif_peer_bus.c
@@ -24,93 +24,152 @@ dev_to_scif_peer(struct device *dev)
 	return container_of(dev, struct scif_peer_dev, dev);
 }
 
-static inline struct scif_peer_driver *
-drv_to_scif_peer(struct device_driver *drv)
-{
-	return container_of(drv, struct scif_peer_driver, driver);
-}
+struct bus_type scif_peer_bus = {
+	.name  = "scif_peer_bus",
+};
 
-static int scif_peer_dev_match(struct device *dv, struct device_driver *dr)
+static void scif_peer_release_dev(struct device *d)
 {
-	return !strncmp(dev_name(dv), dr->name, 4);
+	struct scif_peer_dev *sdev = dev_to_scif_peer(d);
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+	scif_cleanup_scifdev(scifdev);
+	kfree(sdev);
 }
 
-static int scif_peer_dev_probe(struct device *d)
+static int scif_peer_initialize_device(struct scif_dev *scifdev)
 {
-	struct scif_peer_dev *dev = dev_to_scif_peer(d);
-	struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver);
+	struct scif_peer_dev *spdev;
+	int ret;
 
-	return drv->probe(dev);
-}
+	spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
+	if (!spdev) {
+		ret = -ENOMEM;
+		goto err;
+	}
 
-static int scif_peer_dev_remove(struct device *d)
-{
-	struct scif_peer_dev *dev = dev_to_scif_peer(d);
-	struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver);
+	spdev->dev.parent = scifdev->sdev->dev.parent;
+	spdev->dev.release = scif_peer_release_dev;
+	spdev->dnode = scifdev->node;
+	spdev->dev.bus = &scif_peer_bus;
+	dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
+
+	device_initialize(&spdev->dev);
+	get_device(&spdev->dev);
+	rcu_assign_pointer(scifdev->spdev, spdev);
 
-	drv->remove(dev);
+	mutex_lock(&scif_info.conflock);
+	scif_info.total++;
+	scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
+	mutex_unlock(&scif_info.conflock);
 	return 0;
+err:
+	dev_err(&scifdev->sdev->dev,
+		"dnode %d: initialize_device rc %d\n", scifdev->node, ret);
+	return ret;
 }
 
-static struct bus_type scif_peer_bus = {
-	.name  = "scif_peer_bus",
-	.match = scif_peer_dev_match,
-	.probe = scif_peer_dev_probe,
-	.remove = scif_peer_dev_remove,
-};
-
-int scif_peer_register_driver(struct scif_peer_driver *driver)
+static int scif_peer_add_device(struct scif_dev *scifdev)
 {
-	driver->driver.bus = &scif_peer_bus;
-	return driver_register(&driver->driver);
+	struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev);
+	char pool_name[16];
+	int ret;
+
+	ret = device_add(&spdev->dev);
+	put_device(&spdev->dev);
+	if (ret) {
+		dev_err(&scifdev->sdev->dev,
+			"dnode %d: peer device_add failed\n", scifdev->node);
+		goto put_spdev;
+	}
+
+	scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode);
+	scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev,
+						sizeof(struct scif_status), 1,
+						0);
+	if (!scifdev->signal_pool) {
+		dev_err(&scifdev->sdev->dev,
+			"dnode %d: dmam_pool_create failed\n", scifdev->node);
+		ret = -ENOMEM;
+		goto del_spdev;
+	}
+	dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode);
+	return 0;
+del_spdev:
+	device_del(&spdev->dev);
+put_spdev:
+	RCU_INIT_POINTER(scifdev->spdev, NULL);
+	synchronize_rcu();
+	put_device(&spdev->dev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total--;
+	mutex_unlock(&scif_info.conflock);
+	return ret;
 }
 
-void scif_peer_unregister_driver(struct scif_peer_driver *driver)
+void scif_add_peer_device(struct work_struct *work)
 {
-	driver_unregister(&driver->driver);
+	struct scif_dev *scifdev = container_of(work, struct scif_dev,
+						peer_add_work);
+
+	scif_peer_add_device(scifdev);
 }
 
-static void scif_peer_release_dev(struct device *d)
+/*
+ * Peer device registration is split into a device_initialize and a device_add.
+ * The reason for doing this is as follows: First, peer device registration
+ * itself cannot be done in the message processing thread and must be delegated
+ * to another workqueue, otherwise if SCIF client probe, called during peer
+ * device registration, calls scif_connect(..), it will block the message
+ * processing thread causing a deadlock. Next, device_initialize is done in the
+ * "top-half" message processing thread and device_add in the "bottom-half"
+ * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing
+ * concurrently with SCIF_INIT message processing is unable to get a reference
+ * on the peer device, thereby failing the connect request.
+ */
+void scif_peer_register_device(struct scif_dev *scifdev)
 {
-	struct scif_peer_dev *sdev = dev_to_scif_peer(d);
-	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+	int ret;
 
-	scif_cleanup_scifdev(scifdev);
-	kfree(sdev);
+	mutex_lock(&scifdev->lock);
+	ret = scif_peer_initialize_device(scifdev);
+	if (ret)
+		goto exit;
+	schedule_work(&scifdev->peer_add_work);
+exit:
+	mutex_unlock(&scifdev->lock);
 }
 
-struct scif_peer_dev *
-scif_peer_register_device(struct scif_dev *scifdev)
+int scif_peer_unregister_device(struct scif_dev *scifdev)
 {
-	int ret;
 	struct scif_peer_dev *spdev;
 
-	spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
-	if (!spdev)
-		return ERR_PTR(-ENOMEM);
-
-	spdev->dev.parent = scifdev->sdev->dev.parent;
-	spdev->dev.release = scif_peer_release_dev;
-	spdev->dnode = scifdev->node;
-	spdev->dev.bus = &scif_peer_bus;
+	mutex_lock(&scifdev->lock);
+	/* Flush work to ensure device register is complete */
+	flush_work(&scifdev->peer_add_work);
 
-	dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
 	/*
-	 * device_register() causes the bus infrastructure to look for a
-	 * matching driver.
+	 * Continue holding scifdev->lock since theoretically unregister_device
+	 * can be called simultaneously from multiple threads
 	 */
-	ret = device_register(&spdev->dev);
-	if (ret)
-		goto free_spdev;
-	return spdev;
-free_spdev:
-	kfree(spdev);
-	return ERR_PTR(ret);
-}
-
-void scif_peer_unregister_device(struct scif_peer_dev *sdev)
-{
-	device_unregister(&sdev->dev);
+	spdev = rcu_dereference(scifdev->spdev);
+	if (!spdev) {
+		mutex_unlock(&scifdev->lock);
+		return -ENODEV;
+	}
+
+	RCU_INIT_POINTER(scifdev->spdev, NULL);
+	synchronize_rcu();
+	mutex_unlock(&scifdev->lock);
+
+	dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode);
+	device_unregister(&spdev->dev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total--;
+	mutex_unlock(&scif_info.conflock);
+	return 0;
 }
 
 int scif_peer_bus_init(void)
diff --git a/drivers/misc/mic/scif/scif_peer_bus.h b/drivers/misc/mic/scif/scif_peer_bus.h
index 33f0dbb30..a3b8dd2ed 100644
--- a/drivers/misc/mic/scif/scif_peer_bus.h
+++ b/drivers/misc/mic/scif/scif_peer_bus.h
@@ -19,47 +19,13 @@
 
 #include <linux/device.h>
 #include <linux/mic_common.h>
-
-/*
- * Peer devices show up as PCIe devices for the mgmt node but not the cards.
- * The mgmt node discovers all the cards on the PCIe bus and informs the other
- * cards about their peers. Upon notification of a peer a node adds a peer
- * device to the peer bus to maintain symmetry in the way devices are
- * discovered across all nodes in the SCIF network.
- */
-/**
- * scif_peer_dev - representation of a peer SCIF device
- * @dev: underlying device
- * @dnode - The destination node which this device will communicate with.
- */
-struct scif_peer_dev {
-	struct device dev;
-	u8 dnode;
-};
-
-/**
- * scif_peer_driver - operations for a scif_peer I/O driver
- * @driver: underlying device driver (populate name and owner).
- * @id_table: the ids serviced by this driver.
- * @probe: the function to call when a device is found.  Returns 0 or -errno.
- * @remove: the function to call when a device is removed.
- */
-struct scif_peer_driver {
-	struct device_driver driver;
-	const struct scif_peer_dev_id *id_table;
-
-	int (*probe)(struct scif_peer_dev *dev);
-	void (*remove)(struct scif_peer_dev *dev);
-};
+#include <linux/scif.h>
 
 struct scif_dev;
 
-int scif_peer_register_driver(struct scif_peer_driver *driver);
-void scif_peer_unregister_driver(struct scif_peer_driver *driver);
-
-struct scif_peer_dev *scif_peer_register_device(struct scif_dev *sdev);
-void scif_peer_unregister_device(struct scif_peer_dev *sdev);
-
+void scif_add_peer_device(struct work_struct *work);
+void scif_peer_register_device(struct scif_dev *sdev);
+int scif_peer_unregister_device(struct scif_dev *scifdev);
 int scif_peer_bus_init(void);
 void scif_peer_bus_exit(void);
 #endif /* _SCIF_PEER_BUS_H */
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
new file mode 100644
index 000000000..8310b4dbf
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -0,0 +1,1775 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/dma_remapping.h>
+#include <linux/pagemap.h>
+#include "scif_main.h"
+#include "scif_map.h"
+
+/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
+#define SCIF_MAP_ULIMIT 0x40
+
+bool scif_ulimit_check = 1;
+
+/**
+ * scif_rma_ep_init:
+ * @ep: end point
+ *
+ * Initialize RMA per EP data structures.
+ */
+void scif_rma_ep_init(struct scif_endpt *ep)
+{
+	struct scif_endpt_rma_info *rma = &ep->rma_info;
+
+	mutex_init(&rma->rma_lock);
+	init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
+			 SCIF_DMA_64BIT_PFN);
+	spin_lock_init(&rma->tc_lock);
+	mutex_init(&rma->mmn_lock);
+	INIT_LIST_HEAD(&rma->reg_list);
+	INIT_LIST_HEAD(&rma->remote_reg_list);
+	atomic_set(&rma->tw_refcount, 0);
+	atomic_set(&rma->tcw_refcount, 0);
+	atomic_set(&rma->tcw_total_pages, 0);
+	atomic_set(&rma->fence_refcount, 0);
+
+	rma->async_list_del = 0;
+	rma->dma_chan = NULL;
+	INIT_LIST_HEAD(&rma->mmn_list);
+	INIT_LIST_HEAD(&rma->vma_list);
+	init_waitqueue_head(&rma->markwq);
+}
+
+/**
+ * scif_rma_ep_can_uninit:
+ * @ep: end point
+ *
+ * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
+ */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep)
+{
+	int ret = 0;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Destroy RMA Info only if both lists are empty */
+	if (list_empty(&ep->rma_info.reg_list) &&
+	    list_empty(&ep->rma_info.remote_reg_list) &&
+	    list_empty(&ep->rma_info.mmn_list) &&
+	    !atomic_read(&ep->rma_info.tw_refcount) &&
+	    !atomic_read(&ep->rma_info.tcw_refcount) &&
+	    !atomic_read(&ep->rma_info.fence_refcount))
+		ret = 1;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return ret;
+}
+
+/**
+ * scif_create_pinned_pages:
+ * @nr_pages: number of pages in window
+ * @prot: read/write protection
+ *
+ * Allocate and prepare a set of pinned pages.
+ */
+static struct scif_pinned_pages *
+scif_create_pinned_pages(int nr_pages, int prot)
+{
+	struct scif_pinned_pages *pin;
+
+	might_sleep();
+	pin = scif_zalloc(sizeof(*pin));
+	if (!pin)
+		goto error;
+
+	pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
+	if (!pin->pages)
+		goto error_free_pinned_pages;
+
+	pin->prot = prot;
+	pin->magic = SCIFEP_MAGIC;
+	return pin;
+
+error_free_pinned_pages:
+	scif_free(pin, sizeof(*pin));
+error:
+	return NULL;
+}
+
+/**
+ * scif_destroy_pinned_pages:
+ * @pin: A set of pinned pages.
+ *
+ * Deallocate resources for pinned pages.
+ */
+static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
+{
+	int j;
+	int writeable = pin->prot & SCIF_PROT_WRITE;
+	int kernel = SCIF_MAP_KERNEL & pin->map_flags;
+
+	for (j = 0; j < pin->nr_pages; j++) {
+		if (pin->pages[j] && !kernel) {
+			if (writeable)
+				SetPageDirty(pin->pages[j]);
+			put_page(pin->pages[j]);
+		}
+	}
+
+	scif_free(pin->pages,
+		  pin->nr_pages * sizeof(*pin->pages));
+	scif_free(pin, sizeof(*pin));
+	return 0;
+}
+
+/*
+ * scif_create_window:
+ * @ep: end point
+ * @nr_pages: number of pages
+ * @offset: registration offset
+ * @temp: true if a temporary window is being created
+ *
+ * Allocate and prepare a self registration window.
+ */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+				       s64 offset, bool temp)
+{
+	struct scif_window *window;
+
+	might_sleep();
+	window = scif_zalloc(sizeof(*window));
+	if (!window)
+		goto error;
+
+	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+	if (!window->dma_addr)
+		goto error_free_window;
+
+	window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
+	if (!window->num_pages)
+		goto error_free_window;
+
+	window->offset = offset;
+	window->ep = (u64)ep;
+	window->magic = SCIFEP_MAGIC;
+	window->reg_state = OP_IDLE;
+	init_waitqueue_head(&window->regwq);
+	window->unreg_state = OP_IDLE;
+	init_waitqueue_head(&window->unregwq);
+	INIT_LIST_HEAD(&window->list);
+	window->type = SCIF_WINDOW_SELF;
+	window->temp = temp;
+	return window;
+
+error_free_window:
+	scif_free(window->dma_addr,
+		  nr_pages * sizeof(*window->dma_addr));
+	scif_free(window, sizeof(*window));
+error:
+	return NULL;
+}
+
+/**
+ * scif_destroy_incomplete_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+static void scif_destroy_incomplete_window(struct scif_endpt *ep,
+					   struct scif_window *window)
+{
+	int err;
+	int nr_pages = window->nr_pages;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+	struct scifmsg msg;
+
+retry:
+	/* Wait for a SCIF_ALLOC_GNT/REJ message */
+	err = wait_event_timeout(alloc->allocwq,
+				 alloc->state != OP_IN_PROGRESS,
+				 SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (alloc->state == OP_COMPLETED) {
+		msg.uop = SCIF_FREE_VIRT;
+		msg.src = ep->port;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = window->alloc_handle.vaddr;
+		msg.payload[2] = (u64)window;
+		msg.payload[3] = SCIF_REGISTER;
+		_scif_nodeqp_send(ep->remote_dev, &msg);
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	scif_free_window_offset(ep, window, window->offset);
+	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+	scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_unmap_window:
+ * @remote_dev: SCIF remote device
+ * @window: registration window
+ *
+ * Delete any DMA mappings created for a registered self window
+ */
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+	int j;
+
+	if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
+		if (window->st) {
+			dma_unmap_sg(&remote_dev->sdev->dev,
+				     window->st->sgl, window->st->nents,
+				     DMA_BIDIRECTIONAL);
+			sg_free_table(window->st);
+			kfree(window->st);
+			window->st = NULL;
+		}
+	} else {
+		for (j = 0; j < window->nr_contig_chunks; j++) {
+			if (window->dma_addr[j]) {
+				scif_unmap_single(window->dma_addr[j],
+						  remote_dev,
+						  window->num_pages[j] <<
+						  PAGE_SHIFT);
+				window->dma_addr[j] = 0x0;
+			}
+		}
+	}
+}
+
+static inline struct mm_struct *__scif_acquire_mm(void)
+{
+	if (scif_ulimit_check)
+		return get_task_mm(current);
+	return NULL;
+}
+
+static inline void __scif_release_mm(struct mm_struct *mm)
+{
+	if (mm)
+		mmput(mm);
+}
+
+static inline int
+__scif_dec_pinned_vm_lock(struct mm_struct *mm,
+			  int nr_pages, bool try_lock)
+{
+	if (!mm || !nr_pages || !scif_ulimit_check)
+		return 0;
+	if (try_lock) {
+		if (!down_write_trylock(&mm->mmap_sem)) {
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err\n", __func__, __LINE__);
+			return -1;
+		}
+	} else {
+		down_write(&mm->mmap_sem);
+	}
+	mm->pinned_vm -= nr_pages;
+	up_write(&mm->mmap_sem);
+	return 0;
+}
+
+static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
+					     int nr_pages)
+{
+	unsigned long locked, lock_limit;
+
+	if (!mm || !nr_pages || !scif_ulimit_check)
+		return 0;
+
+	locked = nr_pages;
+	locked += mm->pinned_vm;
+	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+		dev_err(scif_info.mdev.this_device,
+			"locked(%lu) > lock_limit(%lu)\n",
+			locked, lock_limit);
+		return -ENOMEM;
+	}
+	mm->pinned_vm = locked;
+	return 0;
+}
+
+/**
+ * scif_destroy_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
+{
+	int j;
+	struct scif_pinned_pages *pinned_pages = window->pinned_pages;
+	int nr_pages = window->nr_pages;
+
+	might_sleep();
+	if (!window->temp && window->mm) {
+		__scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
+		__scif_release_mm(window->mm);
+		window->mm = NULL;
+	}
+
+	scif_free_window_offset(ep, window, window->offset);
+	scif_unmap_window(ep->remote_dev, window);
+	/*
+	 * Decrement references for this set of pinned pages from
+	 * this window.
+	 */
+	j = atomic_sub_return(1, &pinned_pages->ref_count);
+	if (j < 0)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d incorrect ref count %d\n",
+			__func__, __LINE__, j);
+	/*
+	 * If the ref count for pinned_pages is zero then someone
+	 * has already called scif_unpin_pages() for it and we should
+	 * destroy the page cache.
+	 */
+	if (!j)
+		scif_destroy_pinned_pages(window->pinned_pages);
+	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+	window->magic = 0;
+	scif_free(window, sizeof(*window));
+	return 0;
+}
+
+/**
+ * scif_create_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Allocate and prepare lookup entries for the remote
+ * end to copy over the physical addresses.
+ * Returns 0 on success and appropriate errno on failure.
+ */
+static int scif_create_remote_lookup(struct scif_dev *remote_dev,
+				     struct scif_window *window)
+{
+	int i, j, err = 0;
+	int nr_pages = window->nr_pages;
+	bool vmalloc_dma_phys, vmalloc_num_pages;
+
+	might_sleep();
+	/* Map window */
+	err = scif_map_single(&window->mapped_offset,
+			      window, remote_dev, sizeof(*window));
+	if (err)
+		goto error_window;
+
+	/* Compute the number of lookup entries. 21 == 2MB Shift */
+	window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
+					((2) * 1024 * 1024)) >> 21;
+
+	window->dma_addr_lookup.lookup =
+		scif_alloc_coherent(&window->dma_addr_lookup.offset,
+				    remote_dev, window->nr_lookup *
+				    sizeof(*window->dma_addr_lookup.lookup),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!window->dma_addr_lookup.lookup) {
+		err = -ENOMEM;
+		goto error_window;
+	}
+
+	window->num_pages_lookup.lookup =
+		scif_alloc_coherent(&window->num_pages_lookup.offset,
+				    remote_dev, window->nr_lookup *
+				    sizeof(*window->num_pages_lookup.lookup),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!window->num_pages_lookup.lookup) {
+		err = -ENOMEM;
+		goto error_window;
+	}
+
+	vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
+	vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
+
+	/* Now map each of the pages containing physical addresses */
+	for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
+		err = scif_map_page(&window->dma_addr_lookup.lookup[j],
+				    vmalloc_dma_phys ?
+				    vmalloc_to_page(&window->dma_addr[i]) :
+				    virt_to_page(&window->dma_addr[i]),
+				    remote_dev);
+		if (err)
+			goto error_window;
+		err = scif_map_page(&window->num_pages_lookup.lookup[j],
+				    vmalloc_dma_phys ?
+				    vmalloc_to_page(&window->num_pages[i]) :
+				    virt_to_page(&window->num_pages[i]),
+				    remote_dev);
+		if (err)
+			goto error_window;
+	}
+	return 0;
+error_window:
+	return err;
+}
+
+/**
+ * scif_destroy_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Destroy lookup entries used for the remote
+ * end to copy over the physical addresses.
+ */
+static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
+				       struct scif_window *window)
+{
+	int i, j;
+
+	if (window->nr_lookup) {
+		struct scif_rma_lookup *lup = &window->dma_addr_lookup;
+		struct scif_rma_lookup *npup = &window->num_pages_lookup;
+
+		for (i = 0, j = 0; i < window->nr_pages;
+			i += SCIF_NR_ADDR_IN_PAGE, j++) {
+			if (lup->lookup && lup->lookup[j])
+				scif_unmap_single(lup->lookup[j],
+						  remote_dev,
+						  PAGE_SIZE);
+			if (npup->lookup && npup->lookup[j])
+				scif_unmap_single(npup->lookup[j],
+						  remote_dev,
+						  PAGE_SIZE);
+		}
+		if (lup->lookup)
+			scif_free_coherent(lup->lookup, lup->offset,
+					   remote_dev, window->nr_lookup *
+					   sizeof(*lup->lookup));
+		if (npup->lookup)
+			scif_free_coherent(npup->lookup, npup->offset,
+					   remote_dev, window->nr_lookup *
+					   sizeof(*npup->lookup));
+		if (window->mapped_offset)
+			scif_unmap_single(window->mapped_offset,
+					  remote_dev, sizeof(*window));
+		window->nr_lookup = 0;
+	}
+}
+
+/**
+ * scif_create_remote_window:
+ * @ep: end point
+ * @nr_pages: number of pages in window
+ *
+ * Allocate and prepare a remote registration window.
+ */
+static struct scif_window *
+scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
+{
+	struct scif_window *window;
+
+	might_sleep();
+	window = scif_zalloc(sizeof(*window));
+	if (!window)
+		goto error_ret;
+
+	window->magic = SCIFEP_MAGIC;
+	window->nr_pages = nr_pages;
+
+	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+	if (!window->dma_addr)
+		goto error_window;
+
+	window->num_pages = scif_zalloc(nr_pages *
+					sizeof(*window->num_pages));
+	if (!window->num_pages)
+		goto error_window;
+
+	if (scif_create_remote_lookup(scifdev, window))
+		goto error_window;
+
+	window->type = SCIF_WINDOW_PEER;
+	window->unreg_state = OP_IDLE;
+	INIT_LIST_HEAD(&window->list);
+	return window;
+error_window:
+	scif_destroy_remote_window(window);
+error_ret:
+	return NULL;
+}
+
+/**
+ * scif_destroy_remote_window:
+ * @ep: end point
+ * @window: remote registration window
+ *
+ * Deallocate resources for remote window.
+ */
+void
+scif_destroy_remote_window(struct scif_window *window)
+{
+	scif_free(window->dma_addr, window->nr_pages *
+		  sizeof(*window->dma_addr));
+	scif_free(window->num_pages, window->nr_pages *
+		  sizeof(*window->num_pages));
+	window->magic = 0;
+	scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_iommu_map: create DMA mappings if the IOMMU is enabled
+ * @remote_dev: SCIF remote device
+ * @window: remote registration window
+ *
+ * Map the physical pages using dma_map_sg(..) and then detect the number
+ * of contiguous DMA mappings allocated
+ */
+static int scif_iommu_map(struct scif_dev *remote_dev,
+			  struct scif_window *window)
+{
+	struct scatterlist *sg;
+	int i, err;
+	scif_pinned_pages_t pin = window->pinned_pages;
+
+	window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
+	if (!window->st)
+		return -ENOMEM;
+
+	err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
+	if (err)
+		return err;
+
+	for_each_sg(window->st->sgl, sg, window->st->nents, i)
+		sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
+
+	err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
+			 window->st->nents, DMA_BIDIRECTIONAL);
+	if (!err)
+		return -ENOMEM;
+	/* Detect contiguous ranges of DMA mappings */
+	sg = window->st->sgl;
+	for (i = 0; sg; i++) {
+		dma_addr_t last_da;
+
+		window->dma_addr[i] = sg_dma_address(sg);
+		window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
+		last_da = sg_dma_address(sg) + sg_dma_len(sg);
+		while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
+			window->num_pages[i] +=
+				(sg_dma_len(sg) >> PAGE_SHIFT);
+			last_da = window->dma_addr[i] +
+				sg_dma_len(sg);
+		}
+		window->nr_contig_chunks++;
+	}
+	return 0;
+}
+
+/**
+ * scif_map_window:
+ * @remote_dev: SCIF remote device
+ * @window: self registration window
+ *
+ * Map pages of a window into the aperture/PCI.
+ * Also determine addresses required for DMA.
+ */
+int
+scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+	int i, j, k, err = 0, nr_contig_pages;
+	scif_pinned_pages_t pin;
+	phys_addr_t phys_prev, phys_curr;
+
+	might_sleep();
+
+	pin = window->pinned_pages;
+
+	if (intel_iommu_enabled && !scifdev_self(remote_dev))
+		return scif_iommu_map(remote_dev, window);
+
+	for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
+		phys_prev = page_to_phys(pin->pages[i]);
+		nr_contig_pages = 1;
+
+		/* Detect physically contiguous chunks */
+		for (k = i + 1; k < window->nr_pages; k++) {
+			phys_curr = page_to_phys(pin->pages[k]);
+			if (phys_curr != (phys_prev + PAGE_SIZE))
+				break;
+			phys_prev = phys_curr;
+			nr_contig_pages++;
+		}
+		window->num_pages[j] = nr_contig_pages;
+		window->nr_contig_chunks++;
+		if (scif_is_mgmt_node()) {
+			/*
+			 * Management node has to deal with SMPT on X100 and
+			 * hence the DMA mapping is required
+			 */
+			err = scif_map_single(&window->dma_addr[j],
+					      phys_to_virt(page_to_phys(
+							   pin->pages[i])),
+					      remote_dev,
+					      nr_contig_pages << PAGE_SHIFT);
+			if (err)
+				return err;
+		} else {
+			window->dma_addr[j] = page_to_phys(pin->pages[i]);
+		}
+	}
+	return err;
+}
+
+/**
+ * scif_send_scif_unregister:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_UNREGISTER message.
+ */
+static int scif_send_scif_unregister(struct scif_endpt *ep,
+				     struct scif_window *window)
+{
+	struct scifmsg msg;
+
+	msg.uop = SCIF_UNREGISTER;
+	msg.src = ep->port;
+	msg.payload[0] = window->alloc_handle.vaddr;
+	msg.payload[1] = (u64)window;
+	return scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_unregister_window:
+ * @window: self registration window
+ *
+ * Send an unregistration request and wait for a response.
+ */
+int scif_unregister_window(struct scif_window *window)
+{
+	int err = 0;
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+	bool send_msg = false;
+
+	might_sleep();
+	switch (window->unreg_state) {
+	case OP_IDLE:
+	{
+		window->unreg_state = OP_IN_PROGRESS;
+		send_msg = true;
+		/* fall through */
+	}
+	case OP_IN_PROGRESS:
+	{
+		scif_get_window(window, 1);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		if (send_msg) {
+			err = scif_send_scif_unregister(ep, window);
+			if (err) {
+				window->unreg_state = OP_COMPLETED;
+				goto done;
+			}
+		} else {
+			/* Return ENXIO since unregistration is in progress */
+			mutex_lock(&ep->rma_info.rma_lock);
+			return -ENXIO;
+		}
+retry:
+		/* Wait for a SCIF_UNREGISTER_(N)ACK message */
+		err = wait_event_timeout(window->unregwq,
+					 window->unreg_state != OP_IN_PROGRESS,
+					 SCIF_NODE_ALIVE_TIMEOUT);
+		if (!err && scifdev_alive(ep))
+			goto retry;
+		if (!err) {
+			err = -ENODEV;
+			window->unreg_state = OP_COMPLETED;
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err %d\n", __func__, __LINE__, err);
+		}
+		if (err > 0)
+			err = 0;
+done:
+		mutex_lock(&ep->rma_info.rma_lock);
+		scif_put_window(window, 1);
+		break;
+	}
+	case OP_FAILED:
+	{
+		if (!scifdev_alive(ep)) {
+			err = -ENODEV;
+			window->unreg_state = OP_COMPLETED;
+		}
+		break;
+	}
+	case OP_COMPLETED:
+		break;
+	default:
+		err = -ENODEV;
+	}
+
+	if (window->unreg_state == OP_COMPLETED && window->ref_count)
+		scif_put_window(window, window->nr_pages);
+
+	if (!window->ref_count) {
+		atomic_inc(&ep->rma_info.tw_refcount);
+		list_del_init(&window->list);
+		scif_free_window_offset(ep, window, window->offset);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
+		    scifdev_alive(ep)) {
+			scif_drain_dma_intr(ep->remote_dev->sdev,
+					    ep->rma_info.dma_chan);
+		} else {
+			if (!__scif_dec_pinned_vm_lock(window->mm,
+						       window->nr_pages, 1)) {
+				__scif_release_mm(window->mm);
+				window->mm = NULL;
+			}
+		}
+		scif_queue_for_cleanup(window, &scif_info.rma);
+		mutex_lock(&ep->rma_info.rma_lock);
+	}
+	return err;
+}
+
+/**
+ * scif_send_alloc_request:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request
+ */
+static int scif_send_alloc_request(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	struct scifmsg msg;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+
+	/* Set up the Alloc Handle */
+	alloc->state = OP_IN_PROGRESS;
+	init_waitqueue_head(&alloc->allocwq);
+
+	/* Send out an allocation request */
+	msg.uop = SCIF_ALLOC_REQ;
+	msg.payload[1] = window->nr_pages;
+	msg.payload[2] = (u64)&window->alloc_handle;
+	return _scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_prep_remote_window:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request, wait for an allocation response,
+ * and prepares the remote window by copying over the page lists
+ */
+static int scif_prep_remote_window(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	struct scifmsg msg;
+	struct scif_window *remote_window;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+	dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
+	int i = 0, j = 0;
+	int nr_contig_chunks, loop_nr_contig_chunks;
+	int remaining_nr_contig_chunks, nr_lookup;
+	int err, map_err;
+
+	map_err = scif_map_window(ep->remote_dev, window);
+	if (map_err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d map_err %d\n", __func__, __LINE__, map_err);
+	remaining_nr_contig_chunks = window->nr_contig_chunks;
+	nr_contig_chunks = window->nr_contig_chunks;
+retry:
+	/* Wait for a SCIF_ALLOC_GNT/REJ message */
+	err = wait_event_timeout(alloc->allocwq,
+				 alloc->state != OP_IN_PROGRESS,
+				 SCIF_NODE_ALIVE_TIMEOUT);
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Synchronize with the thread waking up allocwq */
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+
+	if (!err)
+		err = -ENODEV;
+
+	if (err > 0)
+		err = 0;
+	else
+		return err;
+
+	/* Bail out. The remote end rejected this request */
+	if (alloc->state == OP_FAILED)
+		return -ENOMEM;
+
+	if (map_err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, map_err);
+		msg.uop = SCIF_FREE_VIRT;
+		msg.src = ep->port;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = window->alloc_handle.vaddr;
+		msg.payload[2] = (u64)window;
+		msg.payload[3] = SCIF_REGISTER;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		return err;
+	}
+
+	remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
+				     ep->remote_dev);
+
+	/* Compute the number of lookup entries. 21 == 2MB Shift */
+	nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
+			  >> ilog2(SCIF_NR_ADDR_IN_PAGE);
+
+	dma_phys_lookup =
+		scif_ioremap(remote_window->dma_addr_lookup.offset,
+			     nr_lookup *
+			     sizeof(*remote_window->dma_addr_lookup.lookup),
+			     ep->remote_dev);
+	num_pages_lookup =
+		scif_ioremap(remote_window->num_pages_lookup.offset,
+			     nr_lookup *
+			     sizeof(*remote_window->num_pages_lookup.lookup),
+			     ep->remote_dev);
+
+	while (remaining_nr_contig_chunks) {
+		loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
+					      (int)SCIF_NR_ADDR_IN_PAGE);
+		/* #1/2 - Copy  physical addresses over to the remote side */
+
+		/* #2/2 - Copy DMA addresses (addresses that are fed into the
+		 * DMA engine) We transfer bus addresses which are then
+		 * converted into a MIC physical address on the remote
+		 * side if it is a MIC, if the remote node is a mgmt node we
+		 * transfer the MIC physical address
+		 */
+		tmp = scif_ioremap(dma_phys_lookup[j],
+				   loop_nr_contig_chunks *
+				   sizeof(*window->dma_addr),
+				   ep->remote_dev);
+		tmp1 = scif_ioremap(num_pages_lookup[j],
+				    loop_nr_contig_chunks *
+				    sizeof(*window->num_pages),
+				    ep->remote_dev);
+		if (scif_is_mgmt_node()) {
+			memcpy_toio((void __force __iomem *)tmp,
+				    &window->dma_addr[i], loop_nr_contig_chunks
+				    * sizeof(*window->dma_addr));
+			memcpy_toio((void __force __iomem *)tmp1,
+				    &window->num_pages[i], loop_nr_contig_chunks
+				    * sizeof(*window->num_pages));
+		} else {
+			if (scifdev_is_p2p(ep->remote_dev)) {
+				/*
+				 * add remote node's base address for this node
+				 * to convert it into a MIC address
+				 */
+				int m;
+				dma_addr_t dma_addr;
+
+				for (m = 0; m < loop_nr_contig_chunks; m++) {
+					dma_addr = window->dma_addr[i + m] +
+						ep->remote_dev->base_addr;
+					writeq(dma_addr,
+					       (void __force __iomem *)&tmp[m]);
+				}
+				memcpy_toio((void __force __iomem *)tmp1,
+					    &window->num_pages[i],
+					    loop_nr_contig_chunks
+					    * sizeof(*window->num_pages));
+			} else {
+				/* Mgmt node or loopback - transfer DMA
+				 * addresses as is, this is the same as a
+				 * MIC physical address (we use the dma_addr
+				 * and not the phys_addr array since the
+				 * phys_addr is only setup if there is a mmap()
+				 * request from the mgmt node)
+				 */
+				memcpy_toio((void __force __iomem *)tmp,
+					    &window->dma_addr[i],
+					    loop_nr_contig_chunks *
+					    sizeof(*window->dma_addr));
+				memcpy_toio((void __force __iomem *)tmp1,
+					    &window->num_pages[i],
+					    loop_nr_contig_chunks *
+					    sizeof(*window->num_pages));
+			}
+		}
+		remaining_nr_contig_chunks -= loop_nr_contig_chunks;
+		i += loop_nr_contig_chunks;
+		j++;
+		scif_iounmap(tmp, loop_nr_contig_chunks *
+			     sizeof(*window->dma_addr), ep->remote_dev);
+		scif_iounmap(tmp1, loop_nr_contig_chunks *
+			     sizeof(*window->num_pages), ep->remote_dev);
+	}
+
+	/* Prepare the remote window for the peer */
+	remote_window->peer_window = (u64)window;
+	remote_window->offset = window->offset;
+	remote_window->prot = window->prot;
+	remote_window->nr_contig_chunks = nr_contig_chunks;
+	remote_window->ep = ep->remote_ep;
+	scif_iounmap(num_pages_lookup,
+		     nr_lookup *
+		     sizeof(*remote_window->num_pages_lookup.lookup),
+		     ep->remote_dev);
+	scif_iounmap(dma_phys_lookup,
+		     nr_lookup *
+		     sizeof(*remote_window->dma_addr_lookup.lookup),
+		     ep->remote_dev);
+	scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
+	window->peer_window = alloc->vaddr;
+	return err;
+}
+
+/**
+ * scif_send_scif_register:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_REGISTER message if EP is connected and wait for a
+ * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
+ * message so that the peer can free its remote window allocated earlier.
+ */
+static int scif_send_scif_register(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	int err = 0;
+	struct scifmsg msg;
+
+	msg.src = ep->port;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = window->alloc_handle.vaddr;
+	msg.payload[2] = (u64)window;
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED) {
+		msg.uop = SCIF_REGISTER;
+		window->reg_state = OP_IN_PROGRESS;
+		err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		spin_unlock(&ep->lock);
+		if (!err) {
+retry:
+			/* Wait for a SCIF_REGISTER_(N)ACK message */
+			err = wait_event_timeout(window->regwq,
+						 window->reg_state !=
+						 OP_IN_PROGRESS,
+						 SCIF_NODE_ALIVE_TIMEOUT);
+			if (!err && scifdev_alive(ep))
+				goto retry;
+			err = !err ? -ENODEV : 0;
+			if (window->reg_state == OP_FAILED)
+				err = -ENOTCONN;
+		}
+	} else {
+		msg.uop = SCIF_FREE_VIRT;
+		msg.payload[3] = SCIF_REGISTER;
+		err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		spin_unlock(&ep->lock);
+		if (!err)
+			err = -ENOTCONN;
+	}
+	return err;
+}
+
+/**
+ * scif_get_window_offset:
+ * @ep: end point descriptor
+ * @flags: flags
+ * @offset: offset hint
+ * @num_pages: number of pages
+ * @out_offset: computed offset returned by reference.
+ *
+ * Compute/Claim a new offset for this EP.
+ */
+int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
+			   int num_pages, s64 *out_offset)
+{
+	s64 page_index;
+	struct iova *iova_ptr;
+	int err = 0;
+
+	if (flags & SCIF_MAP_FIXED) {
+		page_index = SCIF_IOVA_PFN(offset);
+		iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
+					page_index + num_pages - 1);
+		if (!iova_ptr)
+			err = -EADDRINUSE;
+	} else {
+		iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
+				      SCIF_DMA_63BIT_PFN - 1, 0);
+		if (!iova_ptr)
+			err = -ENOMEM;
+	}
+	if (!err)
+		*out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
+	return err;
+}
+
+/**
+ * scif_free_window_offset:
+ * @ep: end point descriptor
+ * @window: registration window
+ * @offset: Offset to be freed
+ *
+ * Free offset for this EP. The callee is supposed to grab
+ * the RMA mutex before calling this API.
+ */
+void scif_free_window_offset(struct scif_endpt *ep,
+			     struct scif_window *window, s64 offset)
+{
+	if ((window && !window->offset_freed) || !window) {
+		free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
+		if (window)
+			window->offset_freed = true;
+	}
+}
+
+/**
+ * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is requesting a memory allocation.
+ */
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	int err;
+	struct scif_window *window = NULL;
+	int nr_pages = msg->payload[1];
+
+	window = scif_create_remote_window(scifdev, nr_pages);
+	if (!window) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* The peer's allocation request is granted */
+	msg->uop = SCIF_ALLOC_GNT;
+	msg->payload[0] = (u64)window;
+	msg->payload[1] = window->mapped_offset;
+	err = scif_nodeqp_send(scifdev, msg);
+	if (err)
+		scif_destroy_remote_window(window);
+	return;
+error:
+	/* The peer's allocation request is rejected */
+	dev_err(&scifdev->sdev->dev,
+		"%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
+		__func__, __LINE__, err, window, nr_pages);
+	msg->uop = SCIF_ALLOC_REJ;
+	scif_nodeqp_send(scifdev, msg);
+}
+
+/**
+ * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side responded to a memory allocation.
+ */
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
+	struct scif_window *window = container_of(handle, struct scif_window,
+						  alloc_handle);
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	handle->vaddr = msg->payload[0];
+	handle->phys_addr = msg->payload[1];
+	if (msg->uop == SCIF_ALLOC_GNT)
+		handle->state = OP_COMPLETED;
+	else
+		handle->state = OP_FAILED;
+	wake_up(&handle->allocwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
+ * @msg:        Interrupt message
+ *
+ * Free up memory kmalloc'd earlier.
+ */
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window = (struct scif_window *)msg->payload[1];
+
+	scif_destroy_remote_window(window);
+}
+
+static void
+scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
+{
+	int j;
+	struct scif_hw_dev *sdev = dev->sdev;
+	phys_addr_t apt_base = 0;
+
+	/*
+	 * Add the aperture base if the DMA address is not card relative
+	 * since the DMA addresses need to be an offset into the bar
+	 */
+	if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+	    sdev->aper && !sdev->card_rel_da)
+		apt_base = sdev->aper->pa;
+	else
+		return;
+
+	for (j = 0; j < window->nr_contig_chunks; j++) {
+		if (window->num_pages[j])
+			window->dma_addr[j] += apt_base;
+		else
+			break;
+	}
+}
+
+/**
+ * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
+ * @msg:        Interrupt message
+ *
+ * Update remote window list with a new registered window.
+ */
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED) {
+		msg->uop = SCIF_REGISTER_ACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+		scif_fixup_aper_base(ep->remote_dev, window);
+		/* No further failures expected. Insert new window */
+		scif_insert_window(window, &ep->rma_info.remote_reg_list);
+	} else {
+		msg->uop = SCIF_REGISTER_NACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+	}
+	spin_unlock(&ep->lock);
+	mutex_unlock(&ep->rma_info.rma_lock);
+	/* free up any lookup resources now that page lists are transferred */
+	scif_destroy_remote_lookup(ep->remote_dev, window);
+	/*
+	 * We could not insert the window but we need to
+	 * destroy the window.
+	 */
+	if (msg->uop == SCIF_REGISTER_NACK)
+		scif_destroy_remote_window(window);
+}
+
+/**
+ * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remove window from remote registration list;
+ */
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_window *recv_window =
+		(struct scif_window *)msg->payload[0];
+	struct scif_endpt *ep;
+	int del_window = 0;
+
+	ep = (struct scif_endpt *)recv_window->ep;
+	req.out_window = &window;
+	req.offset = recv_window->offset;
+	req.prot = 0;
+	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.remote_reg_list;
+	msg->payload[0] = ep->remote_ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	if (scif_query_window(&req)) {
+		dev_err(&scifdev->sdev->dev,
+			"%s %d -ENXIO\n", __func__, __LINE__);
+		msg->uop = SCIF_UNREGISTER_ACK;
+		goto error;
+	}
+	if (window) {
+		if (window->ref_count)
+			scif_put_window(window, window->nr_pages);
+		else
+			dev_err(&scifdev->sdev->dev,
+				"%s %d ref count should be +ve\n",
+				__func__, __LINE__);
+		window->unreg_state = OP_COMPLETED;
+		if (!window->ref_count) {
+			msg->uop = SCIF_UNREGISTER_ACK;
+			atomic_inc(&ep->rma_info.tw_refcount);
+			ep->rma_info.async_list_del = 1;
+			list_del_init(&window->list);
+			del_window = 1;
+		} else {
+			/* NACK! There are valid references to this window */
+			msg->uop = SCIF_UNREGISTER_NACK;
+		}
+	} else {
+		/* The window did not make its way to the list at all. ACK */
+		msg->uop = SCIF_UNREGISTER_ACK;
+		scif_destroy_remote_window(recv_window);
+	}
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (del_window)
+		scif_drain_dma_intr(ep->remote_dev->sdev,
+				    ep->rma_info.dma_chan);
+	scif_nodeqp_send(ep->remote_dev, msg);
+	if (del_window)
+		scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/**
+ * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to complete registration.
+ */
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[2];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->reg_state = OP_COMPLETED;
+	wake_up(&window->regwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to inform it that registration
+ * cannot be completed.
+ */
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[2];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->reg_state = OP_FAILED;
+	wake_up(&window->regwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to complete unregistration.
+ */
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->unreg_state = OP_COMPLETED;
+	wake_up(&window->unregwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to inform it that unregistration
+ * cannot be completed immediately.
+ */
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->unreg_state = OP_FAILED;
+	wake_up(&window->unregwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+		     int map_flags, scif_pinned_pages_t *pages)
+{
+	struct scif_pinned_pages *pinned_pages;
+	int nr_pages, err = 0, i;
+	bool vmalloc_addr = false;
+	bool try_upgrade = false;
+	int prot = *out_prot;
+	int ulimit = 0;
+	struct mm_struct *mm = NULL;
+
+	/* Unsupported flags */
+	if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
+		return -EINVAL;
+	ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
+
+	/* Unsupported protection requested */
+	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+		return -EINVAL;
+
+	/* addr/len must be page aligned. len should be non zero */
+	if (!len ||
+	    (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+		return -EINVAL;
+
+	might_sleep();
+
+	nr_pages = len >> PAGE_SHIFT;
+
+	/* Allocate a set of pinned pages */
+	pinned_pages = scif_create_pinned_pages(nr_pages, prot);
+	if (!pinned_pages)
+		return -ENOMEM;
+
+	if (map_flags & SCIF_MAP_KERNEL) {
+		if (is_vmalloc_addr(addr))
+			vmalloc_addr = true;
+
+		for (i = 0; i < nr_pages; i++) {
+			if (vmalloc_addr)
+				pinned_pages->pages[i] =
+					vmalloc_to_page(addr + (i * PAGE_SIZE));
+			else
+				pinned_pages->pages[i] =
+					virt_to_page(addr + (i * PAGE_SIZE));
+		}
+		pinned_pages->nr_pages = nr_pages;
+		pinned_pages->map_flags = SCIF_MAP_KERNEL;
+	} else {
+		/*
+		 * SCIF supports registration caching. If a registration has
+		 * been requested with read only permissions, then we try
+		 * to pin the pages with RW permissions so that a subsequent
+		 * transfer with RW permission can hit the cache instead of
+		 * invalidating it. If the upgrade fails with RW then we
+		 * revert back to R permission and retry
+		 */
+		if (prot == SCIF_PROT_READ)
+			try_upgrade = true;
+		prot |= SCIF_PROT_WRITE;
+retry:
+		mm = current->mm;
+		down_write(&mm->mmap_sem);
+		if (ulimit) {
+			err = __scif_check_inc_pinned_vm(mm, nr_pages);
+			if (err) {
+				up_write(&mm->mmap_sem);
+				pinned_pages->nr_pages = 0;
+				goto error_unmap;
+			}
+		}
+
+		pinned_pages->nr_pages = get_user_pages(
+				current,
+				mm,
+				(u64)addr,
+				nr_pages,
+				!!(prot & SCIF_PROT_WRITE),
+				0,
+				pinned_pages->pages,
+				NULL);
+		up_write(&mm->mmap_sem);
+		if (nr_pages != pinned_pages->nr_pages) {
+			if (try_upgrade) {
+				if (ulimit)
+					__scif_dec_pinned_vm_lock(mm,
+								  nr_pages, 0);
+				/* Roll back any pinned pages */
+				for (i = 0; i < pinned_pages->nr_pages; i++) {
+					if (pinned_pages->pages[i])
+						put_page(
+						pinned_pages->pages[i]);
+				}
+				prot &= ~SCIF_PROT_WRITE;
+				try_upgrade = false;
+				goto retry;
+			}
+		}
+		pinned_pages->map_flags = 0;
+	}
+
+	if (pinned_pages->nr_pages < nr_pages) {
+		err = -EFAULT;
+		pinned_pages->nr_pages = nr_pages;
+		goto dec_pinned;
+	}
+
+	*out_prot = prot;
+	atomic_set(&pinned_pages->ref_count, 1);
+	*pages = pinned_pages;
+	return err;
+dec_pinned:
+	if (ulimit)
+		__scif_dec_pinned_vm_lock(mm, nr_pages, 0);
+	/* Something went wrong! Rollback */
+error_unmap:
+	pinned_pages->nr_pages = nr_pages;
+	scif_destroy_pinned_pages(pinned_pages);
+	*pages = NULL;
+	dev_dbg(scif_info.mdev.this_device,
+		"%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
+	return err;
+}
+
+int scif_pin_pages(void *addr, size_t len, int prot,
+		   int map_flags, scif_pinned_pages_t *pages)
+{
+	return __scif_pin_pages(addr, len, &prot, map_flags, pages);
+}
+EXPORT_SYMBOL_GPL(scif_pin_pages);
+
+int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
+{
+	int err = 0, ret;
+
+	if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
+		return -EINVAL;
+
+	ret = atomic_sub_return(1, &pinned_pages->ref_count);
+	if (ret < 0) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d scif_unpin_pages called without pinning? rc %d\n",
+			__func__, __LINE__, ret);
+		return -EINVAL;
+	}
+	/*
+	 * Destroy the window if the ref count for this set of pinned
+	 * pages has dropped to zero. If it is positive then there is
+	 * a valid registered window which is backed by these pages and
+	 * it will be destroyed once all such windows are unregistered.
+	 */
+	if (!ret)
+		err = scif_destroy_pinned_pages(pinned_pages);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_unpin_pages);
+
+static inline void
+scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
+{
+	mutex_lock(&ep->rma_info.rma_lock);
+	scif_insert_window(window, &ep->rma_info.reg_list);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+off_t scif_register_pinned_pages(scif_epd_t epd,
+				 scif_pinned_pages_t pinned_pages,
+				 off_t offset, int map_flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 computed_offset;
+	struct scif_window *window;
+	int err;
+	size_t len;
+	struct device *spdev;
+
+	/* Unsupported flags */
+	if (map_flags & ~SCIF_MAP_FIXED)
+		return -EINVAL;
+
+	len = pinned_pages->nr_pages << PAGE_SHIFT;
+
+	/*
+	 * Offset is not page aligned/negative or offset+len
+	 * wraps around with SCIF_MAP_FIXED.
+	 */
+	if ((map_flags & SCIF_MAP_FIXED) &&
+	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset < 0) ||
+	    (offset + (off_t)len < offset)))
+		return -EINVAL;
+
+	might_sleep();
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+	/*
+	 * It is an error to pass pinned_pages to scif_register_pinned_pages()
+	 * after calling scif_unpin_pages().
+	 */
+	if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
+		return -EINVAL;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, map_flags, offset,
+				     len, &computed_offset);
+	if (err) {
+		atomic_sub(1, &pinned_pages->ref_count);
+		return err;
+	}
+
+	/* Allocate and prepare self registration window */
+	window = scif_create_window(ep, pinned_pages->nr_pages,
+				    computed_offset, false);
+	if (!window) {
+		atomic_sub(1, &pinned_pages->ref_count);
+		scif_free_window_offset(ep, NULL, computed_offset);
+		return -ENOMEM;
+	}
+
+	window->pinned_pages = pinned_pages;
+	window->nr_pages = pinned_pages->nr_pages;
+	window->prot = pinned_pages->prot;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		scif_destroy_window(ep, window);
+		return err;
+	}
+	err = scif_send_alloc_request(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Prepare the remote registration window */
+	err = scif_prep_remote_window(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Tell the peer about the new window */
+	err = scif_send_scif_register(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	scif_put_peer_dev(spdev);
+	/* No further failures expected. Insert new window */
+	scif_insert_local_window(window, ep);
+	return computed_offset;
+error_unmap:
+	scif_destroy_window(ep, window);
+	scif_put_peer_dev(spdev);
+	dev_err(&ep->remote_dev->sdev->dev,
+		"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
+
+off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+		    int prot, int map_flags)
+{
+	scif_pinned_pages_t pinned_pages;
+	off_t err;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 computed_offset;
+	struct scif_window *window;
+	struct mm_struct *mm = NULL;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
+		epd, addr, len, offset, prot, map_flags);
+	/* Unsupported flags */
+	if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
+		return -EINVAL;
+
+	/*
+	 * Offset is not page aligned/negative or offset+len
+	 * wraps around with SCIF_MAP_FIXED.
+	 */
+	if ((map_flags & SCIF_MAP_FIXED) &&
+	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset < 0) ||
+	    (offset + (off_t)len < offset)))
+		return -EINVAL;
+
+	/* Unsupported protection requested */
+	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+		return -EINVAL;
+
+	/* addr/len must be page aligned. len should be non zero */
+	if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+	    (ALIGN(len, PAGE_SIZE) != len))
+		return -EINVAL;
+
+	might_sleep();
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, map_flags, offset,
+				     len >> PAGE_SHIFT, &computed_offset);
+	if (err)
+		return err;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		scif_free_window_offset(ep, NULL, computed_offset);
+		return err;
+	}
+	/* Allocate and prepare self registration window */
+	window = scif_create_window(ep, len >> PAGE_SHIFT,
+				    computed_offset, false);
+	if (!window) {
+		scif_free_window_offset(ep, NULL, computed_offset);
+		scif_put_peer_dev(spdev);
+		return -ENOMEM;
+	}
+
+	window->nr_pages = len >> PAGE_SHIFT;
+
+	err = scif_send_alloc_request(ep, window);
+	if (err) {
+		scif_destroy_incomplete_window(ep, window);
+		scif_put_peer_dev(spdev);
+		return err;
+	}
+
+	if (!(map_flags & SCIF_MAP_KERNEL)) {
+		mm = __scif_acquire_mm();
+		map_flags |= SCIF_MAP_ULIMIT;
+	}
+	/* Pin down the pages */
+	err = __scif_pin_pages(addr, len, &prot,
+			       map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
+			       &pinned_pages);
+	if (err) {
+		scif_destroy_incomplete_window(ep, window);
+		__scif_release_mm(mm);
+		goto error;
+	}
+
+	window->pinned_pages = pinned_pages;
+	window->prot = pinned_pages->prot;
+	window->mm = mm;
+
+	/* Prepare the remote registration window */
+	err = scif_prep_remote_window(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %ld\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Tell the peer about the new window */
+	err = scif_send_scif_register(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %ld\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	scif_put_peer_dev(spdev);
+	/* No further failures expected. Insert new window */
+	scif_insert_local_window(window, ep);
+	dev_dbg(&ep->remote_dev->sdev->dev,
+		"SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
+		epd, addr, len, computed_offset);
+	return computed_offset;
+error_unmap:
+	scif_destroy_window(ep, window);
+error:
+	scif_put_peer_dev(spdev);
+	dev_err(&ep->remote_dev->sdev->dev,
+		"%s %d err %ld\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_register);
+
+int
+scif_unregister(scif_epd_t epd, off_t offset, size_t len)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_window *window = NULL;
+	struct scif_rma_req req;
+	int nr_pages, err;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
+		ep, offset, len);
+	/* len must be page aligned. len should be non zero */
+	if (!len ||
+	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+		return -EINVAL;
+
+	/* Offset is not page aligned or offset+len wraps around */
+	if ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset + (off_t)len < offset))
+		return -EINVAL;
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	might_sleep();
+	nr_pages = len >> PAGE_SHIFT;
+
+	req.out_window = &window;
+	req.offset = offset;
+	req.prot = 0;
+	req.nr_bytes = len;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.reg_list;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		return err;
+	}
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+	/* Unregister all the windows in this range */
+	err = scif_rma_list_unregister(window, offset, nr_pages);
+	if (err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	scif_put_peer_dev(spdev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_unregister);
diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h
new file mode 100644
index 000000000..fa6722279
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.h
@@ -0,0 +1,464 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_H
+#define SCIF_RMA_H
+
+#include <linux/dma_remapping.h>
+#include <linux/mmu_notifier.h>
+
+#include "../bus/scif_bus.h"
+
+/* If this bit is set then the mark is a remote fence mark */
+#define SCIF_REMOTE_FENCE_BIT          31
+/* Magic value used to indicate a remote fence request */
+#define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT)
+
+#define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL)
+#define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \
+				      (L1_CACHE_BYTES << 1))
+
+#define SCIF_IOVA_START_PFN		(1)
+#define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+#define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64))
+#define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63))
+
+/*
+ * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information
+ *
+ * @reg_list: List of registration windows for self
+ * @remote_reg_list: List of registration windows for peer
+ * @iovad: Offset generator
+ * @rma_lock: Synchronizes access to self/remote list and also protects the
+ *	      window from being destroyed while RMAs are in progress.
+ * @tc_lock: Synchronizes access to temporary cached windows list
+ *	     for SCIF Registration Caching.
+ * @mmn_lock: Synchronizes access to the list of MMU notifiers registered
+ * @tw_refcount: Keeps track of number of outstanding temporary registered
+ *		 windows created by scif_vreadfrom/scif_vwriteto which have
+ *		 not been destroyed.
+ * @tcw_refcount: Same as tw_refcount but for temporary cached windows
+ * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned
+ * @mmn_list: MMU notifier so that we can destroy the windows when required
+ * @fence_refcount: Keeps track of number of outstanding remote fence
+ *		    requests which have been received by the peer.
+ * @dma_chan: DMA channel used for all DMA transfers for this endpoint.
+ * @async_list_del: Detect asynchronous list entry deletion
+ * @vma_list: List of vmas with remote memory mappings
+ * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait
+*/
+struct scif_endpt_rma_info {
+	struct list_head reg_list;
+	struct list_head remote_reg_list;
+	struct iova_domain iovad;
+	struct mutex rma_lock;
+	spinlock_t tc_lock;
+	struct mutex mmn_lock;
+	atomic_t tw_refcount;
+	atomic_t tcw_refcount;
+	atomic_t tcw_total_pages;
+	struct list_head mmn_list;
+	atomic_t fence_refcount;
+	struct dma_chan	*dma_chan;
+	int async_list_del;
+	struct list_head vma_list;
+	wait_queue_head_t markwq;
+};
+
+/*
+ * struct scif_fence_info - used for tracking fence requests
+ *
+ * @state: State of this transfer
+ * @wq: Fences wait on this queue
+ * @dma_mark: Used for storing the DMA mark
+ */
+struct scif_fence_info {
+	enum scif_msg_state state;
+	struct completion comp;
+	int dma_mark;
+};
+
+/*
+ * struct scif_remote_fence_info - used for tracking remote fence requests
+ *
+ * @msg: List of SCIF node QP fence messages
+ * @list: Link to list of remote fence requests
+ */
+struct scif_remote_fence_info {
+	struct scifmsg msg;
+	struct list_head list;
+};
+
+/*
+ * Specifies whether an RMA operation can span across partial windows, a single
+ * window or multiple contiguous windows. Mmaps can span across partial windows.
+ * Unregistration can span across complete windows. scif_get_pages() can span a
+ * single window. A window can also be of type self or peer.
+ */
+enum scif_window_type {
+	SCIF_WINDOW_PARTIAL,
+	SCIF_WINDOW_SINGLE,
+	SCIF_WINDOW_FULL,
+	SCIF_WINDOW_SELF,
+	SCIF_WINDOW_PEER
+};
+
+/* The number of physical addresses that can be stored in a PAGE. */
+#define SCIF_NR_ADDR_IN_PAGE   (0x1000 >> 3)
+
+/*
+ * struct scif_rma_lookup - RMA lookup data structure for page list transfers
+ *
+ * Store an array of lookup offsets. Each offset in this array maps
+ * one 4K page containing 512 physical addresses i.e. 2MB. 512 such
+ * offsets in a 4K page will correspond to 1GB of registered address space.
+
+ * @lookup: Array of offsets
+ * @offset: DMA offset of lookup array
+ */
+struct scif_rma_lookup {
+	dma_addr_t *lookup;
+	dma_addr_t offset;
+};
+
+/*
+ * struct scif_pinned_pages - A set of pinned pages obtained with
+ * scif_pin_pages() which could be part of multiple registered
+ * windows across different end points.
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @prot: read/write protections
+ * @map_flags: Flags specified during the pin operation
+ * @ref_count: Reference count bumped in terms of number of pages
+ * @magic: A magic value
+ * @pages: Array of pointers to struct pages populated with get_user_pages(..)
+ */
+struct scif_pinned_pages {
+	s64 nr_pages;
+	int prot;
+	int map_flags;
+	atomic_t ref_count;
+	u64 magic;
+	struct page **pages;
+};
+
+/*
+ * struct scif_status - Stores DMA status update information
+ *
+ * @src_dma_addr: Source buffer DMA address
+ * @val: src location for value to be written to the destination
+ * @ep: SCIF endpoint
+ */
+struct scif_status {
+	dma_addr_t src_dma_addr;
+	u64 val;
+	struct scif_endpt *ep;
+};
+
+/*
+ * struct scif_window - Registration Window for Self and Remote
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @nr_contig_chunks: Number of contiguous physical chunks
+ * @prot: read/write protections
+ * @ref_count: reference count in terms of number of pages
+ * @magic: Cookie to detect corruption
+ * @offset: registered offset
+ * @va_for_temp: va address that this window represents
+ * @dma_mark: Used to determine if all DMAs against the window are done
+ * @ep: Pointer to EP. Useful for passing EP around with messages to
+	avoid expensive list traversals.
+ * @list: link to list of windows for the endpoint
+ * @type: self or peer window
+ * @peer_window: Pointer to peer window. Useful for sending messages to peer
+ *		 without requiring an extra list traversal
+ * @unreg_state: unregistration state
+ * @offset_freed: True if the offset has been freed
+ * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @st: scatter gather table for DMA mappings with IOMMU enabled
+ * @pinned_pages: The set of pinned_pages backing this window
+ * @alloc_handle: Handle for sending ALLOC_REQ
+ * @regwq: Wait Queue for an registration (N)ACK
+ * @reg_state: Registration state
+ * @unregwq: Wait Queue for an unregistration (N)ACK
+ * @dma_addr_lookup: Lookup for physical addresses used for DMA
+ * @nr_lookup: Number of entries in lookup
+ * @mapped_offset: Offset used to map the window by the peer
+ * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA
+ * @num_pages: Array specifying number of pages for each physical address
+ */
+struct scif_window {
+	s64 nr_pages;
+	int nr_contig_chunks;
+	int prot;
+	int ref_count;
+	u64 magic;
+	s64 offset;
+	unsigned long va_for_temp;
+	int dma_mark;
+	u64 ep;
+	struct list_head list;
+	enum scif_window_type type;
+	u64 peer_window;
+	enum scif_msg_state unreg_state;
+	bool offset_freed;
+	bool temp;
+	struct mm_struct *mm;
+	struct sg_table *st;
+	union {
+		struct {
+			struct scif_pinned_pages *pinned_pages;
+			struct scif_allocmsg alloc_handle;
+			wait_queue_head_t regwq;
+			enum scif_msg_state reg_state;
+			wait_queue_head_t unregwq;
+		};
+		struct {
+			struct scif_rma_lookup dma_addr_lookup;
+			struct scif_rma_lookup num_pages_lookup;
+			int nr_lookup;
+			dma_addr_t mapped_offset;
+		};
+	};
+	dma_addr_t *dma_addr;
+	u64 *num_pages;
+} __packed;
+
+/*
+ * scif_mmu_notif - SCIF mmu notifier information
+ *
+ * @mmu_notifier ep_mmu_notifier: MMU notifier operations
+ * @tc_reg_list: List of temp registration windows for self
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @ep: SCIF endpoint
+ * @list: link to list of MMU notifier information
+ */
+struct scif_mmu_notif {
+#ifdef CONFIG_MMU_NOTIFIER
+	struct mmu_notifier ep_mmu_notifier;
+#endif
+	struct list_head tc_reg_list;
+	struct mm_struct *mm;
+	struct scif_endpt *ep;
+	struct list_head list;
+};
+
+enum scif_rma_dir {
+	SCIF_LOCAL_TO_REMOTE,
+	SCIF_REMOTE_TO_LOCAL
+};
+
+extern struct kmem_cache *unaligned_cache;
+/* Initialize RMA for this EP */
+void scif_rma_ep_init(struct scif_endpt *ep);
+/* Check if epd can be uninitialized */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep);
+/* Obtain a new offset. Callee must grab RMA lock */
+int scif_get_window_offset(struct scif_endpt *ep, int flags,
+			   s64 offset, int nr_pages, s64 *out_offset);
+/* Free offset. Callee must grab RMA lock */
+void scif_free_window_offset(struct scif_endpt *ep,
+			     struct scif_window *window, s64 offset);
+/* Create self registration window */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+				       s64 offset, bool temp);
+/* Destroy self registration window.*/
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window);
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window);
+/* Map pages of self window to Aperture/PCI */
+int scif_map_window(struct scif_dev *remote_dev,
+		    struct scif_window *window);
+/* Unregister a self window */
+int scif_unregister_window(struct scif_window *window);
+/* Destroy remote registration window */
+void
+scif_destroy_remote_window(struct scif_window *window);
+/* remove valid remote memory mappings from process address space */
+void scif_zap_mmaps(int node);
+/* Query if any applications have remote memory mappings */
+bool scif_rma_do_apps_have_mmaps(int node);
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node);
+/* Reserve a DMA channel for a particular endpoint */
+int scif_reserve_dma_chan(struct scif_endpt *ep);
+/* Setup a DMA mark for an endpoint */
+int _scif_fence_mark(scif_epd_t epd, int *mark);
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+		     enum scif_window_type type);
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_mmu_notif_handler(struct work_struct *work);
+void scif_rma_handle_remote_fences(void);
+void scif_rma_destroy_windows(void);
+void scif_rma_destroy_tcw_invalid(void);
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan);
+
+struct scif_window_iter {
+	s64 offset;
+	int index;
+};
+
+static inline void
+scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter)
+{
+	iter->offset = window->offset;
+	iter->index = 0;
+}
+
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+				size_t *nr_bytes,
+				struct scif_window_iter *iter);
+static inline
+dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off)
+{
+	return scif_off_to_dma_addr(window, off, NULL, NULL);
+}
+
+static inline bool scif_unaligned(off_t src_offset, off_t dst_offset)
+{
+	src_offset = src_offset & (L1_CACHE_BYTES - 1);
+	dst_offset = dst_offset & (L1_CACHE_BYTES - 1);
+	return !(src_offset == dst_offset);
+}
+
+/*
+ * scif_zalloc:
+ * @size: Size of the allocation request.
+ *
+ * Helper API which attempts to allocate zeroed pages via
+ * __get_free_pages(..) first and then falls back on
+ * vzalloc(..) if that fails.
+ */
+static inline void *scif_zalloc(size_t size)
+{
+	void *ret = NULL;
+	size_t align = ALIGN(size, PAGE_SIZE);
+
+	if (align && get_order(align) < MAX_ORDER)
+		ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					       get_order(align));
+	return ret ? ret : vzalloc(align);
+}
+
+/*
+ * scif_free:
+ * @addr: Address to be freed.
+ * @size: Size of the allocation.
+ * Helper API which frees memory allocated via scif_zalloc().
+ */
+static inline void scif_free(void *addr, size_t size)
+{
+	size_t align = ALIGN(size, PAGE_SIZE);
+
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		free_pages((unsigned long)addr, get_order(align));
+}
+
+static inline void scif_get_window(struct scif_window *window, int nr_pages)
+{
+	window->ref_count += nr_pages;
+}
+
+static inline void scif_put_window(struct scif_window *window, int nr_pages)
+{
+	window->ref_count -= nr_pages;
+}
+
+static inline void scif_set_window_ref(struct scif_window *window, int nr_pages)
+{
+	window->ref_count = nr_pages;
+}
+
+static inline void
+scif_queue_for_cleanup(struct scif_window *window, struct list_head *list)
+{
+	spin_lock(&scif_info.rmalock);
+	list_add_tail(&window->list, list);
+	spin_unlock(&scif_info.rmalock);
+	schedule_work(&scif_info.misc_work);
+}
+
+static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window)
+{
+	list_del_init(&window->list);
+	scif_queue_for_cleanup(window, &scif_info.rma_tc);
+}
+
+static inline bool scif_is_iommu_enabled(void)
+{
+#ifdef CONFIG_INTEL_IOMMU
+	return intel_iommu_enabled;
+#else
+	return false;
+#endif
+}
+#endif /* SCIF_RMA_H */
diff --git a/drivers/misc/mic/scif/scif_rma_list.c b/drivers/misc/mic/scif/scif_rma_list.c
new file mode 100644
index 000000000..e1ef8daed
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.c
@@ -0,0 +1,291 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include <linux/mmu_notifier.h>
+#include <linux/highmem.h>
+
+/*
+ * scif_insert_tcw:
+ *
+ * Insert a temp window to the temp registration list sorted by va_for_temp.
+ * RMA lock must be held.
+ */
+void scif_insert_tcw(struct scif_window *window, struct list_head *head)
+{
+	struct scif_window *curr = NULL;
+	struct scif_window *prev = list_entry(head, struct scif_window, list);
+	struct list_head *item;
+
+	INIT_LIST_HEAD(&window->list);
+	/* Compare with tail and if the entry is new tail add it to the end */
+	if (!list_empty(head)) {
+		curr = list_entry(head->prev, struct scif_window, list);
+		if (curr->va_for_temp < window->va_for_temp) {
+			list_add_tail(&window->list, head);
+			return;
+		}
+	}
+	list_for_each(item, head) {
+		curr = list_entry(item, struct scif_window, list);
+		if (curr->va_for_temp > window->va_for_temp)
+			break;
+		prev = curr;
+	}
+	list_add(&window->list, &prev->list);
+}
+
+/*
+ * scif_insert_window:
+ *
+ * Insert a window to the self registration list sorted by offset.
+ * RMA lock must be held.
+ */
+void scif_insert_window(struct scif_window *window, struct list_head *head)
+{
+	struct scif_window *curr = NULL, *prev = NULL;
+	struct list_head *item;
+
+	INIT_LIST_HEAD(&window->list);
+	list_for_each(item, head) {
+		curr = list_entry(item, struct scif_window, list);
+		if (curr->offset > window->offset)
+			break;
+		prev = curr;
+	}
+	if (!prev)
+		list_add(&window->list, head);
+	else
+		list_add(&window->list, &prev->list);
+	scif_set_window_ref(window, window->nr_pages);
+}
+
+/*
+ * scif_query_tcw:
+ *
+ * Query the temp cached registration list of ep for an overlapping window
+ * in case of permission mismatch, destroy the previous window. if permissions
+ * match and overlap is partial, destroy the window but return the new range
+ * RMA lock must be held.
+ */
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *req)
+{
+	struct list_head *item, *temp, *head = req->head;
+	struct scif_window *window;
+	u64 start_va_window, start_va_req = req->va_for_temp;
+	u64 end_va_window, end_va_req = start_va_req + req->nr_bytes;
+
+	if (!req->nr_bytes)
+		return -EINVAL;
+	/*
+	 * Avoid traversing the entire list to find out that there
+	 * is no entry that matches
+	 */
+	if (!list_empty(head)) {
+		window = list_last_entry(head, struct scif_window, list);
+		end_va_window = window->va_for_temp +
+			(window->nr_pages << PAGE_SHIFT);
+		if (start_va_req > end_va_window)
+			return -ENXIO;
+	}
+	list_for_each_safe(item, temp, head) {
+		window = list_entry(item, struct scif_window, list);
+		start_va_window = window->va_for_temp;
+		end_va_window = window->va_for_temp +
+			(window->nr_pages << PAGE_SHIFT);
+		if (start_va_req < start_va_window &&
+		    end_va_req < start_va_window)
+			break;
+		if (start_va_req >= end_va_window)
+			continue;
+		if ((window->prot & req->prot) == req->prot) {
+			if (start_va_req >= start_va_window &&
+			    end_va_req <= end_va_window) {
+				*req->out_window = window;
+				return 0;
+			}
+			/* expand window */
+			if (start_va_req < start_va_window) {
+				req->nr_bytes +=
+					start_va_window - start_va_req;
+				req->va_for_temp = start_va_window;
+			}
+			if (end_va_req >= end_va_window)
+				req->nr_bytes += end_va_window - end_va_req;
+		}
+		/* Destroy the old window to create a new one */
+		__scif_rma_destroy_tcw_helper(window);
+		break;
+	}
+	return -ENXIO;
+}
+
+/*
+ * scif_query_window:
+ *
+ * Query the registration list and check if a valid contiguous
+ * range of windows exist.
+ * RMA lock must be held.
+ */
+int scif_query_window(struct scif_rma_req *req)
+{
+	struct list_head *item;
+	struct scif_window *window;
+	s64 end_offset, offset = req->offset;
+	u64 tmp_min, nr_bytes_left = req->nr_bytes;
+
+	if (!req->nr_bytes)
+		return -EINVAL;
+
+	list_for_each(item, req->head) {
+		window = list_entry(item, struct scif_window, list);
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		if (offset < window->offset)
+			/* Offset not found! */
+			return -ENXIO;
+		if (offset >= end_offset)
+			continue;
+		/* Check read/write protections. */
+		if ((window->prot & req->prot) != req->prot)
+			return -EPERM;
+		if (nr_bytes_left == req->nr_bytes)
+			/* Store the first window */
+			*req->out_window = window;
+		tmp_min = min((u64)end_offset - offset, nr_bytes_left);
+		nr_bytes_left -= tmp_min;
+		offset += tmp_min;
+		/*
+		 * Range requested encompasses
+		 * multiple windows contiguously.
+		 */
+		if (!nr_bytes_left) {
+			/* Done for partial window */
+			if (req->type == SCIF_WINDOW_PARTIAL ||
+			    req->type == SCIF_WINDOW_SINGLE)
+				return 0;
+			/* Extra logic for full windows */
+			if (offset == end_offset)
+				/* Spanning multiple whole windows */
+				return 0;
+				/* Not spanning multiple whole windows */
+			return -ENXIO;
+		}
+		if (req->type == SCIF_WINDOW_SINGLE)
+			break;
+	}
+	dev_err(scif_info.mdev.this_device,
+		"%s %d ENXIO\n", __func__, __LINE__);
+	return -ENXIO;
+}
+
+/*
+ * scif_rma_list_unregister:
+ *
+ * Traverse the self registration list starting from window:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_rma_list_unregister(struct scif_window *window,
+			     s64 offset, int nr_pages)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+	struct list_head *head = &ep->rma_info.reg_list;
+	s64 end_offset;
+	int err = 0;
+	int loop_nr_pages;
+	struct scif_window *_window;
+
+	list_for_each_entry_safe_from(window, _window, head, list) {
+		end_offset = window->offset + (window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT),
+				    nr_pages);
+		err = scif_unregister_window(window);
+		if (err)
+			return err;
+		nr_pages -= loop_nr_pages;
+		offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages)
+			break;
+	}
+	return 0;
+}
+
+/*
+ * scif_unmap_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Delete any DMA mappings created
+ */
+void scif_unmap_all_windows(scif_epd_t epd)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct list_head *head = &ep->rma_info.reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	list_for_each_safe(item, tmp, head) {
+		window = list_entry(item, struct scif_window, list);
+		scif_unmap_window(ep->remote_dev, window);
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/*
+ * scif_unregister_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_unregister_all_windows(scif_epd_t epd)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct list_head *head = &ep->rma_info.reg_list;
+	int err = 0;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+retry:
+	item = NULL;
+	tmp = NULL;
+	list_for_each_safe(item, tmp, head) {
+		window = list_entry(item, struct scif_window, list);
+		ep->rma_info.async_list_del = 0;
+		err = scif_unregister_window(window);
+		if (err)
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err %d\n",
+				__func__, __LINE__, err);
+		/*
+		 * Need to restart list traversal if there has been
+		 * an asynchronous list entry deletion.
+		 */
+		if (ACCESS_ONCE(ep->rma_info.async_list_del))
+			goto retry;
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (!list_empty(&ep->rma_info.mmn_list)) {
+		spin_lock(&scif_info.rmalock);
+		list_add_tail(&ep->mmu_list, &scif_info.mmu_notif_cleanup);
+		spin_unlock(&scif_info.rmalock);
+		schedule_work(&scif_info.mmu_notif_work);
+	}
+	return err;
+}
diff --git a/drivers/misc/mic/scif/scif_rma_list.h b/drivers/misc/mic/scif/scif_rma_list.h
new file mode 100644
index 000000000..7d58d1d55
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.h
@@ -0,0 +1,57 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_LIST_H
+#define SCIF_RMA_LIST_H
+
+/*
+ * struct scif_rma_req - Self Registration list RMA Request query
+ *
+ * @out_window - Returns the window if found
+ * @offset: Starting offset
+ * @nr_bytes: number of bytes
+ * @prot: protection requested i.e. read or write or both
+ * @type: Specify single, partial or multiple windows
+ * @head: Head of list on which to search
+ * @va_for_temp: VA for searching temporary cached windows
+ */
+struct scif_rma_req {
+	struct scif_window **out_window;
+	union {
+		s64 offset;
+		unsigned long va_for_temp;
+	};
+	size_t nr_bytes;
+	int prot;
+	enum scif_window_type type;
+	struct list_head *head;
+};
+
+/* Insert */
+void scif_insert_window(struct scif_window *window, struct list_head *head);
+void scif_insert_tcw(struct scif_window *window,
+		     struct list_head *head);
+/* Query */
+int scif_query_window(struct scif_rma_req *request);
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *request);
+/* Called from close to unregister all self windows */
+int scif_unregister_all_windows(scif_epd_t epd);
+void scif_unmap_all_windows(scif_epd_t epd);
+/* Traverse list and unregister */
+int scif_rma_list_unregister(struct scif_window *window, s64 offset,
+			     int nr_pages);
+#endif /* SCIF_RMA_LIST_H */
author	André Fabian Silva Delgado <emulatorman@parabola.nu>	2016-01-20 14:01:31 -0300
committer	André Fabian Silva Delgado <emulatorman@parabola.nu>	2016-01-20 14:01:31 -0300
commit	b4b7ff4b08e691656c9d77c758fc355833128ac0 (patch)
tree	82fcb00e6b918026dc9f2d1f05ed8eee83874cc0 /drivers/misc/mic/scif
parent	35acfa0fc609f2a2cd95cef4a6a9c3a5c38f1778 (diff)