13 files changed, 5779 insertions, 0 deletions
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
new file mode 100644
index 000000000..7639ffc36
--- /dev/null
+++ b/drivers/crypto/ccp/Kconfig
@@ -0,0 +1,24 @@
+config CRYPTO_DEV_CCP_DD
+	tristate "Cryptographic Coprocessor device driver"
+	depends on CRYPTO_DEV_CCP
+	default m
+	select HW_RANDOM
+	help
+	  Provides the interface to use the AMD Cryptographic Coprocessor
+	  which can be used to accelerate or offload encryption operations
+	  such as SHA, AES and more. If you choose 'M' here, this module
+	  will be called ccp.
+
+config CRYPTO_DEV_CCP_CRYPTO
+	tristate "Encryption and hashing acceleration support"
+	depends on CRYPTO_DEV_CCP_DD
+	default m
+	select CRYPTO_ALGAPI
+	select CRYPTO_HASH
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AUTHENC
+	help
+	  Support for using the cryptographic API with the AMD Cryptographic
+	  Coprocessor. This module supports acceleration and offload of SHA
+	  and AES algorithms.  If you choose 'M' here, this module will be
+	  called ccp_crypto.
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
new file mode 100644
index 000000000..55a1f3951
--- /dev/null
+++ b/drivers/crypto/ccp/Makefile
@@ -0,0 +1,10 @@
+obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
+ccp-objs := ccp-dev.o ccp-ops.o ccp-platform.o
+ccp-$(CONFIG_PCI) += ccp-pci.o
+
+obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
+ccp-crypto-objs := ccp-crypto-main.o \
+		   ccp-crypto-aes.o \
+		   ccp-crypto-aes-cmac.o \
+		   ccp-crypto-aes-xts.o \
+		   ccp-crypto-sha.o
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
new file mode 100644
index 000000000..ea7e84469
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -0,0 +1,367 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) AES CMAC crypto API support
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
+				 int ret)
+{
+	struct ahash_request *req = ahash_request_cast(async_req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	unsigned int digest_size = crypto_ahash_digestsize(tfm);
+
+	if (ret)
+		goto e_free;
+
+	if (rctx->hash_rem) {
+		/* Save remaining data to buffer */
+		unsigned int offset = rctx->nbytes - rctx->hash_rem;
+
+		scatterwalk_map_and_copy(rctx->buf, rctx->src,
+					 offset, rctx->hash_rem, 0);
+		rctx->buf_count = rctx->hash_rem;
+	} else {
+		rctx->buf_count = 0;
+	}
+
+	/* Update result area if supplied */
+	if (req->result)
+		memcpy(req->result, rctx->iv, digest_size);
+
+e_free:
+	sg_free_table(&rctx->data_sg);
+
+	return ret;
+}
+
+static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes,
+			      unsigned int final)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	struct scatterlist *sg, *cmac_key_sg = NULL;
+	unsigned int block_size =
+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+	unsigned int need_pad, sg_count;
+	gfp_t gfp;
+	u64 len;
+	int ret;
+
+	if (!ctx->u.aes.key_len)
+		return -EINVAL;
+
+	if (nbytes)
+		rctx->null_msg = 0;
+
+	len = (u64)rctx->buf_count + (u64)nbytes;
+
+	if (!final && (len <= block_size)) {
+		scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src,
+					 0, nbytes, 0);
+		rctx->buf_count += nbytes;
+
+		return 0;
+	}
+
+	rctx->src = req->src;
+	rctx->nbytes = nbytes;
+
+	rctx->final = final;
+	rctx->hash_rem = final ? 0 : len & (block_size - 1);
+	rctx->hash_cnt = len - rctx->hash_rem;
+	if (!final && !rctx->hash_rem) {
+		/* CCP can't do zero length final, so keep some data around */
+		rctx->hash_cnt -= block_size;
+		rctx->hash_rem = block_size;
+	}
+
+	if (final && (rctx->null_msg || (len & (block_size - 1))))
+		need_pad = 1;
+	else
+		need_pad = 0;
+
+	sg_init_one(&rctx->iv_sg, rctx->iv, sizeof(rctx->iv));
+
+	/* Build the data scatterlist table - allocate enough entries for all
+	 * possible data pieces (buffer, input data, padding)
+	 */
+	sg_count = (nbytes) ? sg_nents(req->src) + 2 : 2;
+	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+		GFP_KERNEL : GFP_ATOMIC;
+	ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp);
+	if (ret)
+		return ret;
+
+	sg = NULL;
+	if (rctx->buf_count) {
+		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg);
+	}
+
+	if (nbytes)
+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src);
+
+	if (need_pad) {
+		int pad_length = block_size - (len & (block_size - 1));
+
+		rctx->hash_cnt += pad_length;
+
+		memset(rctx->pad, 0, sizeof(rctx->pad));
+		rctx->pad[0] = 0x80;
+		sg_init_one(&rctx->pad_sg, rctx->pad, pad_length);
+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->pad_sg);
+	}
+	if (sg) {
+		sg_mark_end(sg);
+		sg = rctx->data_sg.sgl;
+	}
+
+	/* Initialize the K1/K2 scatterlist */
+	if (final)
+		cmac_key_sg = (need_pad) ? &ctx->u.aes.k2_sg
+					 : &ctx->u.aes.k1_sg;
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_AES;
+	rctx->cmd.u.aes.type = ctx->u.aes.type;
+	rctx->cmd.u.aes.mode = ctx->u.aes.mode;
+	rctx->cmd.u.aes.action = CCP_AES_ACTION_ENCRYPT;
+	rctx->cmd.u.aes.key = &ctx->u.aes.key_sg;
+	rctx->cmd.u.aes.key_len = ctx->u.aes.key_len;
+	rctx->cmd.u.aes.iv = &rctx->iv_sg;
+	rctx->cmd.u.aes.iv_len = AES_BLOCK_SIZE;
+	rctx->cmd.u.aes.src = sg;
+	rctx->cmd.u.aes.src_len = rctx->hash_cnt;
+	rctx->cmd.u.aes.dst = NULL;
+	rctx->cmd.u.aes.cmac_key = cmac_key_sg;
+	rctx->cmd.u.aes.cmac_key_len = ctx->u.aes.kn_len;
+	rctx->cmd.u.aes.cmac_final = final;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_aes_cmac_init(struct ahash_request *req)
+{
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+
+	memset(rctx, 0, sizeof(*rctx));
+
+	rctx->null_msg = 1;
+
+	return 0;
+}
+
+static int ccp_aes_cmac_update(struct ahash_request *req)
+{
+	return ccp_do_cmac_update(req, req->nbytes, 0);
+}
+
+static int ccp_aes_cmac_final(struct ahash_request *req)
+{
+	return ccp_do_cmac_update(req, 0, 1);
+}
+
+static int ccp_aes_cmac_finup(struct ahash_request *req)
+{
+	return ccp_do_cmac_update(req, req->nbytes, 1);
+}
+
+static int ccp_aes_cmac_digest(struct ahash_request *req)
+{
+	int ret;
+
+	ret = ccp_aes_cmac_init(req);
+	if (ret)
+		return ret;
+
+	return ccp_aes_cmac_finup(req);
+}
+
+static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+			       unsigned int key_len)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct ccp_crypto_ahash_alg *alg =
+		ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm));
+	u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo;
+	u64 rb_hi = 0x00, rb_lo = 0x87;
+	__be64 *gk;
+	int ret;
+
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+		ctx->u.aes.type = CCP_AES_TYPE_128;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->u.aes.type = CCP_AES_TYPE_192;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->u.aes.type = CCP_AES_TYPE_256;
+		break;
+	default:
+		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	ctx->u.aes.mode = alg->mode;
+
+	/* Set to zero until complete */
+	ctx->u.aes.key_len = 0;
+
+	/* Set the key for the AES cipher used to generate the keys */
+	ret = crypto_cipher_setkey(ctx->u.aes.tfm_cipher, key, key_len);
+	if (ret)
+		return ret;
+
+	/* Encrypt a block of zeroes - use key area in context */
+	memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key));
+	crypto_cipher_encrypt_one(ctx->u.aes.tfm_cipher, ctx->u.aes.key,
+				  ctx->u.aes.key);
+
+	/* Generate K1 and K2 */
+	k0_hi = be64_to_cpu(*((__be64 *)ctx->u.aes.key));
+	k0_lo = be64_to_cpu(*((__be64 *)ctx->u.aes.key + 1));
+
+	k1_hi = (k0_hi << 1) | (k0_lo >> 63);
+	k1_lo = k0_lo << 1;
+	if (ctx->u.aes.key[0] & 0x80) {
+		k1_hi ^= rb_hi;
+		k1_lo ^= rb_lo;
+	}
+	gk = (__be64 *)ctx->u.aes.k1;
+	*gk = cpu_to_be64(k1_hi);
+	gk++;
+	*gk = cpu_to_be64(k1_lo);
+
+	k2_hi = (k1_hi << 1) | (k1_lo >> 63);
+	k2_lo = k1_lo << 1;
+	if (ctx->u.aes.k1[0] & 0x80) {
+		k2_hi ^= rb_hi;
+		k2_lo ^= rb_lo;
+	}
+	gk = (__be64 *)ctx->u.aes.k2;
+	*gk = cpu_to_be64(k2_hi);
+	gk++;
+	*gk = cpu_to_be64(k2_lo);
+
+	ctx->u.aes.kn_len = sizeof(ctx->u.aes.k1);
+	sg_init_one(&ctx->u.aes.k1_sg, ctx->u.aes.k1, sizeof(ctx->u.aes.k1));
+	sg_init_one(&ctx->u.aes.k2_sg, ctx->u.aes.k2, sizeof(ctx->u.aes.k2));
+
+	/* Save the supplied key */
+	memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key));
+	memcpy(ctx->u.aes.key, key, key_len);
+	ctx->u.aes.key_len = key_len;
+	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
+
+	return ret;
+}
+
+static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct crypto_cipher *cipher_tfm;
+
+	ctx->complete = ccp_aes_cmac_complete;
+	ctx->u.aes.key_len = 0;
+
+	crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx));
+
+	cipher_tfm = crypto_alloc_cipher("aes", 0,
+					 CRYPTO_ALG_ASYNC |
+					 CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(cipher_tfm)) {
+		pr_warn("could not load aes cipher driver\n");
+		return PTR_ERR(cipher_tfm);
+	}
+	ctx->u.aes.tfm_cipher = cipher_tfm;
+
+	return 0;
+}
+
+static void ccp_aes_cmac_cra_exit(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (ctx->u.aes.tfm_cipher)
+		crypto_free_cipher(ctx->u.aes.tfm_cipher);
+	ctx->u.aes.tfm_cipher = NULL;
+}
+
+int ccp_register_aes_cmac_algs(struct list_head *head)
+{
+	struct ccp_crypto_ahash_alg *ccp_alg;
+	struct ahash_alg *alg;
+	struct hash_alg_common *halg;
+	struct crypto_alg *base;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+	ccp_alg->mode = CCP_AES_MODE_CMAC;
+
+	alg = &ccp_alg->alg;
+	alg->init = ccp_aes_cmac_init;
+	alg->update = ccp_aes_cmac_update;
+	alg->final = ccp_aes_cmac_final;
+	alg->finup = ccp_aes_cmac_finup;
+	alg->digest = ccp_aes_cmac_digest;
+	alg->setkey = ccp_aes_cmac_setkey;
+
+	halg = &alg->halg;
+	halg->digestsize = AES_BLOCK_SIZE;
+
+	base = &halg->base;
+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)");
+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "cmac-aes-ccp");
+	base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC |
+			  CRYPTO_ALG_KERN_DRIVER_ONLY |
+			  CRYPTO_ALG_NEED_FALLBACK;
+	base->cra_blocksize = AES_BLOCK_SIZE;
+	base->cra_ctxsize = sizeof(struct ccp_ctx);
+	base->cra_priority = CCP_CRA_PRIORITY;
+	base->cra_type = &crypto_ahash_type;
+	base->cra_init = ccp_aes_cmac_cra_init;
+	base->cra_exit = ccp_aes_cmac_cra_exit;
+	base->cra_module = THIS_MODULE;
+
+	ret = crypto_register_ahash(alg);
+	if (ret) {
+		pr_err("%s ahash algorithm registration error (%d)\n",
+		       base->cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
new file mode 100644
index 000000000..52c7395cb
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -0,0 +1,277 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+struct ccp_aes_xts_def {
+	const char *name;
+	const char *drv_name;
+};
+
+static struct ccp_aes_xts_def aes_xts_algs[] = {
+	{
+		.name		= "xts(aes)",
+		.drv_name	= "xts-aes-ccp",
+	},
+};
+
+struct ccp_unit_size_map {
+	unsigned int size;
+	u32 value;
+};
+
+static struct ccp_unit_size_map unit_size_map[] = {
+	{
+		.size	= 4096,
+		.value	= CCP_XTS_AES_UNIT_SIZE_4096,
+	},
+	{
+		.size	= 2048,
+		.value	= CCP_XTS_AES_UNIT_SIZE_2048,
+	},
+	{
+		.size	= 1024,
+		.value	= CCP_XTS_AES_UNIT_SIZE_1024,
+	},
+	{
+		.size	= 512,
+		.value	= CCP_XTS_AES_UNIT_SIZE_512,
+	},
+	{
+		.size	= 256,
+		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
+	},
+	{
+		.size	= 128,
+		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
+	},
+	{
+		.size	= 64,
+		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
+	},
+	{
+		.size	= 32,
+		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
+	},
+	{
+		.size	= 16,
+		.value	= CCP_XTS_AES_UNIT_SIZE_16,
+	},
+	{
+		.size	= 1,
+		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
+	},
+};
+
+static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret)
+{
+	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+
+	if (ret)
+		return ret;
+
+	memcpy(req->info, rctx->iv, AES_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			      unsigned int key_len)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+
+	/* Only support 128-bit AES key with a 128-bit Tweak key,
+	 * otherwise use the fallback
+	 */
+	switch (key_len) {
+	case AES_KEYSIZE_128 * 2:
+		memcpy(ctx->u.aes.key, key, key_len);
+		break;
+	}
+	ctx->u.aes.key_len = key_len / 2;
+	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
+
+	return crypto_ablkcipher_setkey(ctx->u.aes.tfm_ablkcipher, key,
+					key_len);
+}
+
+static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
+			     unsigned int encrypt)
+{
+	struct crypto_tfm *tfm =
+		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	unsigned int unit;
+	int ret;
+
+	if (!ctx->u.aes.key_len)
+		return -EINVAL;
+
+	if (req->nbytes & (AES_BLOCK_SIZE - 1))
+		return -EINVAL;
+
+	if (!req->info)
+		return -EINVAL;
+
+	for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++)
+		if (!(req->nbytes & (unit_size_map[unit].size - 1)))
+			break;
+
+	if ((unit_size_map[unit].value == CCP_XTS_AES_UNIT_SIZE__LAST) ||
+	    (ctx->u.aes.key_len != AES_KEYSIZE_128)) {
+		/* Use the fallback to process the request for any
+		 * unsupported unit sizes or key sizes
+		 */
+		ablkcipher_request_set_tfm(req, ctx->u.aes.tfm_ablkcipher);
+		ret = (encrypt) ? crypto_ablkcipher_encrypt(req) :
+				  crypto_ablkcipher_decrypt(req);
+		ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
+
+		return ret;
+	}
+
+	memcpy(rctx->iv, req->info, AES_BLOCK_SIZE);
+	sg_init_one(&rctx->iv_sg, rctx->iv, AES_BLOCK_SIZE);
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_XTS_AES_128;
+	rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT
+					   : CCP_AES_ACTION_DECRYPT;
+	rctx->cmd.u.xts.unit_size = unit_size_map[unit].value;
+	rctx->cmd.u.xts.key = &ctx->u.aes.key_sg;
+	rctx->cmd.u.xts.key_len = ctx->u.aes.key_len;
+	rctx->cmd.u.xts.iv = &rctx->iv_sg;
+	rctx->cmd.u.xts.iv_len = AES_BLOCK_SIZE;
+	rctx->cmd.u.xts.src = req->src;
+	rctx->cmd.u.xts.src_len = req->nbytes;
+	rctx->cmd.u.xts.dst = req->dst;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_aes_xts_encrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_xts_crypt(req, 1);
+}
+
+static int ccp_aes_xts_decrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_xts_crypt(req, 0);
+}
+
+static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_ablkcipher *fallback_tfm;
+
+	ctx->complete = ccp_aes_xts_complete;
+	ctx->u.aes.key_len = 0;
+
+	fallback_tfm = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), 0,
+					       CRYPTO_ALG_ASYNC |
+					       CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm)) {
+		pr_warn("could not load fallback driver %s\n",
+			crypto_tfm_alg_name(tfm));
+		return PTR_ERR(fallback_tfm);
+	}
+	ctx->u.aes.tfm_ablkcipher = fallback_tfm;
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx) +
+				      fallback_tfm->base.crt_ablkcipher.reqsize;
+
+	return 0;
+}
+
+static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (ctx->u.aes.tfm_ablkcipher)
+		crypto_free_ablkcipher(ctx->u.aes.tfm_ablkcipher);
+	ctx->u.aes.tfm_ablkcipher = NULL;
+}
+
+static int ccp_register_aes_xts_alg(struct list_head *head,
+				    const struct ccp_aes_xts_def *def)
+{
+	struct ccp_crypto_ablkcipher_alg *ccp_alg;
+	struct crypto_alg *alg;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	alg = &ccp_alg->alg;
+
+	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->drv_name);
+	alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
+			 CRYPTO_ALG_KERN_DRIVER_ONLY |
+			 CRYPTO_ALG_NEED_FALLBACK;
+	alg->cra_blocksize = AES_BLOCK_SIZE;
+	alg->cra_ctxsize = sizeof(struct ccp_ctx);
+	alg->cra_priority = CCP_CRA_PRIORITY;
+	alg->cra_type = &crypto_ablkcipher_type;
+	alg->cra_ablkcipher.setkey = ccp_aes_xts_setkey;
+	alg->cra_ablkcipher.encrypt = ccp_aes_xts_encrypt;
+	alg->cra_ablkcipher.decrypt = ccp_aes_xts_decrypt;
+	alg->cra_ablkcipher.min_keysize = AES_MIN_KEY_SIZE * 2;
+	alg->cra_ablkcipher.max_keysize = AES_MAX_KEY_SIZE * 2;
+	alg->cra_ablkcipher.ivsize = AES_BLOCK_SIZE;
+	alg->cra_init = ccp_aes_xts_cra_init;
+	alg->cra_exit = ccp_aes_xts_cra_exit;
+	alg->cra_module = THIS_MODULE;
+
+	ret = crypto_register_alg(alg);
+	if (ret) {
+		pr_err("%s ablkcipher algorithm registration error (%d)\n",
+		       alg->cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return 0;
+}
+
+int ccp_register_aes_xts_algs(struct list_head *head)
+{
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(aes_xts_algs); i++) {
+		ret = ccp_register_aes_xts_alg(head, &aes_xts_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
new file mode 100644
index 000000000..7984f9108
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -0,0 +1,368 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) AES crypto API support
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/ctr.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+static int ccp_aes_complete(struct crypto_async_request *async_req, int ret)
+{
+	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+
+	if (ret)
+		return ret;
+
+	if (ctx->u.aes.mode != CCP_AES_MODE_ECB)
+		memcpy(req->info, rctx->iv, AES_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int ccp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			  unsigned int key_len)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+	struct ccp_crypto_ablkcipher_alg *alg =
+		ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm));
+
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+		ctx->u.aes.type = CCP_AES_TYPE_128;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->u.aes.type = CCP_AES_TYPE_192;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->u.aes.type = CCP_AES_TYPE_256;
+		break;
+	default:
+		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	ctx->u.aes.mode = alg->mode;
+	ctx->u.aes.key_len = key_len;
+
+	memcpy(ctx->u.aes.key, key, key_len);
+	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
+
+	return 0;
+}
+
+static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct scatterlist *iv_sg = NULL;
+	unsigned int iv_len = 0;
+	int ret;
+
+	if (!ctx->u.aes.key_len)
+		return -EINVAL;
+
+	if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) ||
+	     (ctx->u.aes.mode == CCP_AES_MODE_CBC) ||
+	     (ctx->u.aes.mode == CCP_AES_MODE_CFB)) &&
+	    (req->nbytes & (AES_BLOCK_SIZE - 1)))
+		return -EINVAL;
+
+	if (ctx->u.aes.mode != CCP_AES_MODE_ECB) {
+		if (!req->info)
+			return -EINVAL;
+
+		memcpy(rctx->iv, req->info, AES_BLOCK_SIZE);
+		iv_sg = &rctx->iv_sg;
+		iv_len = AES_BLOCK_SIZE;
+		sg_init_one(iv_sg, rctx->iv, iv_len);
+	}
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_AES;
+	rctx->cmd.u.aes.type = ctx->u.aes.type;
+	rctx->cmd.u.aes.mode = ctx->u.aes.mode;
+	rctx->cmd.u.aes.action =
+		(encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT;
+	rctx->cmd.u.aes.key = &ctx->u.aes.key_sg;
+	rctx->cmd.u.aes.key_len = ctx->u.aes.key_len;
+	rctx->cmd.u.aes.iv = iv_sg;
+	rctx->cmd.u.aes.iv_len = iv_len;
+	rctx->cmd.u.aes.src = req->src;
+	rctx->cmd.u.aes.src_len = req->nbytes;
+	rctx->cmd.u.aes.dst = req->dst;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_aes_encrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_crypt(req, true);
+}
+
+static int ccp_aes_decrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_crypt(req, false);
+}
+
+static int ccp_aes_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->complete = ccp_aes_complete;
+	ctx->u.aes.key_len = 0;
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
+
+	return 0;
+}
+
+static void ccp_aes_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req,
+				    int ret)
+{
+	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+
+	/* Restore the original pointer */
+	req->info = rctx->rfc3686_info;
+
+	return ccp_aes_complete(async_req, ret);
+}
+
+static int ccp_aes_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+				  unsigned int key_len)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+
+	if (key_len < CTR_RFC3686_NONCE_SIZE)
+		return -EINVAL;
+
+	key_len -= CTR_RFC3686_NONCE_SIZE;
+	memcpy(ctx->u.aes.nonce, key + key_len, CTR_RFC3686_NONCE_SIZE);
+
+	return ccp_aes_setkey(tfm, key, key_len);
+}
+
+static int ccp_aes_rfc3686_crypt(struct ablkcipher_request *req, bool encrypt)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	u8 *iv;
+
+	/* Initialize the CTR block */
+	iv = rctx->rfc3686_iv;
+	memcpy(iv, ctx->u.aes.nonce, CTR_RFC3686_NONCE_SIZE);
+
+	iv += CTR_RFC3686_NONCE_SIZE;
+	memcpy(iv, req->info, CTR_RFC3686_IV_SIZE);
+
+	iv += CTR_RFC3686_IV_SIZE;
+	*(__be32 *)iv = cpu_to_be32(1);
+
+	/* Point to the new IV */
+	rctx->rfc3686_info = req->info;
+	req->info = rctx->rfc3686_iv;
+
+	return ccp_aes_crypt(req, encrypt);
+}
+
+static int ccp_aes_rfc3686_encrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_rfc3686_crypt(req, true);
+}
+
+static int ccp_aes_rfc3686_decrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_rfc3686_crypt(req, false);
+}
+
+static int ccp_aes_rfc3686_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->complete = ccp_aes_rfc3686_complete;
+	ctx->u.aes.key_len = 0;
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
+
+	return 0;
+}
+
+static void ccp_aes_rfc3686_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static struct crypto_alg ccp_aes_defaults = {
+	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
+			  CRYPTO_ALG_ASYNC |
+			  CRYPTO_ALG_KERN_DRIVER_ONLY |
+			  CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize	= AES_BLOCK_SIZE,
+	.cra_ctxsize	= sizeof(struct ccp_ctx),
+	.cra_priority	= CCP_CRA_PRIORITY,
+	.cra_type	= &crypto_ablkcipher_type,
+	.cra_init	= ccp_aes_cra_init,
+	.cra_exit	= ccp_aes_cra_exit,
+	.cra_module	= THIS_MODULE,
+	.cra_ablkcipher	= {
+		.setkey		= ccp_aes_setkey,
+		.encrypt	= ccp_aes_encrypt,
+		.decrypt	= ccp_aes_decrypt,
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+	},
+};
+
+static struct crypto_alg ccp_aes_rfc3686_defaults = {
+	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
+			   CRYPTO_ALG_ASYNC |
+			   CRYPTO_ALG_KERN_DRIVER_ONLY |
+			   CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize	= CTR_RFC3686_BLOCK_SIZE,
+	.cra_ctxsize	= sizeof(struct ccp_ctx),
+	.cra_priority	= CCP_CRA_PRIORITY,
+	.cra_type	= &crypto_ablkcipher_type,
+	.cra_init	= ccp_aes_rfc3686_cra_init,
+	.cra_exit	= ccp_aes_rfc3686_cra_exit,
+	.cra_module	= THIS_MODULE,
+	.cra_ablkcipher	= {
+		.setkey		= ccp_aes_rfc3686_setkey,
+		.encrypt	= ccp_aes_rfc3686_encrypt,
+		.decrypt	= ccp_aes_rfc3686_decrypt,
+		.min_keysize	= AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+	},
+};
+
+struct ccp_aes_def {
+	enum ccp_aes_mode mode;
+	const char *name;
+	const char *driver_name;
+	unsigned int blocksize;
+	unsigned int ivsize;
+	struct crypto_alg *alg_defaults;
+};
+
+static struct ccp_aes_def aes_algs[] = {
+	{
+		.mode		= CCP_AES_MODE_ECB,
+		.name		= "ecb(aes)",
+		.driver_name	= "ecb-aes-ccp",
+		.blocksize	= AES_BLOCK_SIZE,
+		.ivsize		= 0,
+		.alg_defaults	= &ccp_aes_defaults,
+	},
+	{
+		.mode		= CCP_AES_MODE_CBC,
+		.name		= "cbc(aes)",
+		.driver_name	= "cbc-aes-ccp",
+		.blocksize	= AES_BLOCK_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.alg_defaults	= &ccp_aes_defaults,
+	},
+	{
+		.mode		= CCP_AES_MODE_CFB,
+		.name		= "cfb(aes)",
+		.driver_name	= "cfb-aes-ccp",
+		.blocksize	= AES_BLOCK_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.alg_defaults	= &ccp_aes_defaults,
+	},
+	{
+		.mode		= CCP_AES_MODE_OFB,
+		.name		= "ofb(aes)",
+		.driver_name	= "ofb-aes-ccp",
+		.blocksize	= 1,
+		.ivsize		= AES_BLOCK_SIZE,
+		.alg_defaults	= &ccp_aes_defaults,
+	},
+	{
+		.mode		= CCP_AES_MODE_CTR,
+		.name		= "ctr(aes)",
+		.driver_name	= "ctr-aes-ccp",
+		.blocksize	= 1,
+		.ivsize		= AES_BLOCK_SIZE,
+		.alg_defaults	= &ccp_aes_defaults,
+	},
+	{
+		.mode		= CCP_AES_MODE_CTR,
+		.name		= "rfc3686(ctr(aes))",
+		.driver_name	= "rfc3686-ctr-aes-ccp",
+		.blocksize	= 1,
+		.ivsize		= CTR_RFC3686_IV_SIZE,
+		.alg_defaults	= &ccp_aes_rfc3686_defaults,
+	},
+};
+
+static int ccp_register_aes_alg(struct list_head *head,
+				const struct ccp_aes_def *def)
+{
+	struct ccp_crypto_ablkcipher_alg *ccp_alg;
+	struct crypto_alg *alg;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	ccp_alg->mode = def->mode;
+
+	/* Copy the defaults and override as necessary */
+	alg = &ccp_alg->alg;
+	*alg = *def->alg_defaults;
+	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->driver_name);
+	alg->cra_blocksize = def->blocksize;
+	alg->cra_ablkcipher.ivsize = def->ivsize;
+
+	ret = crypto_register_alg(alg);
+	if (ret) {
+		pr_err("%s ablkcipher algorithm registration error (%d)\n",
+		       alg->cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return 0;
+}
+
+int ccp_register_aes_algs(struct list_head *head)
+{
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+		ret = ccp_register_aes_alg(head, &aes_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
new file mode 100644
index 000000000..bdec01ec6
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -0,0 +1,391 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) crypto API support
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/ccp.h>
+#include <linux/scatterlist.h>
+#include <crypto/internal/hash.h>
+
+#include "ccp-crypto.h"
+
+MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0.0");
+MODULE_DESCRIPTION("AMD Cryptographic Coprocessor crypto API support");
+
+static unsigned int aes_disable;
+module_param(aes_disable, uint, 0444);
+MODULE_PARM_DESC(aes_disable, "Disable use of AES - any non-zero value");
+
+static unsigned int sha_disable;
+module_param(sha_disable, uint, 0444);
+MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value");
+
+/* List heads for the supported algorithms */
+static LIST_HEAD(hash_algs);
+static LIST_HEAD(cipher_algs);
+
+/* For any tfm, requests for that tfm must be returned on the order
+ * received.  With multiple queues available, the CCP can process more
+ * than one cmd at a time.  Therefore we must maintain a cmd list to insure
+ * the proper ordering of requests on a given tfm.
+ */
+struct ccp_crypto_queue {
+	struct list_head cmds;
+	struct list_head *backlog;
+	unsigned int cmd_count;
+};
+
+#define CCP_CRYPTO_MAX_QLEN	100
+
+static struct ccp_crypto_queue req_queue;
+static spinlock_t req_queue_lock;
+
+struct ccp_crypto_cmd {
+	struct list_head entry;
+
+	struct ccp_cmd *cmd;
+
+	/* Save the crypto_tfm and crypto_async_request addresses
+	 * separately to avoid any reference to a possibly invalid
+	 * crypto_async_request structure after invoking the request
+	 * callback
+	 */
+	struct crypto_async_request *req;
+	struct crypto_tfm *tfm;
+
+	/* Used for held command processing to determine state */
+	int ret;
+};
+
+struct ccp_crypto_cpu {
+	struct work_struct work;
+	struct completion completion;
+	struct ccp_crypto_cmd *crypto_cmd;
+	int err;
+};
+
+static inline bool ccp_crypto_success(int err)
+{
+	if (err && (err != -EINPROGRESS) && (err != -EBUSY))
+		return false;
+
+	return true;
+}
+
+static struct ccp_crypto_cmd *ccp_crypto_cmd_complete(
+	struct ccp_crypto_cmd *crypto_cmd, struct ccp_crypto_cmd **backlog)
+{
+	struct ccp_crypto_cmd *held = NULL, *tmp;
+	unsigned long flags;
+
+	*backlog = NULL;
+
+	spin_lock_irqsave(&req_queue_lock, flags);
+
+	/* Held cmds will be after the current cmd in the queue so start
+	 * searching for a cmd with a matching tfm for submission.
+	 */
+	tmp = crypto_cmd;
+	list_for_each_entry_continue(tmp, &req_queue.cmds, entry) {
+		if (crypto_cmd->tfm != tmp->tfm)
+			continue;
+		held = tmp;
+		break;
+	}
+
+	/* Process the backlog:
+	 *   Because cmds can be executed from any point in the cmd list
+	 *   special precautions have to be taken when handling the backlog.
+	 */
+	if (req_queue.backlog != &req_queue.cmds) {
+		/* Skip over this cmd if it is the next backlog cmd */
+		if (req_queue.backlog == &crypto_cmd->entry)
+			req_queue.backlog = crypto_cmd->entry.next;
+
+		*backlog = container_of(req_queue.backlog,
+					struct ccp_crypto_cmd, entry);
+		req_queue.backlog = req_queue.backlog->next;
+
+		/* Skip over this cmd if it is now the next backlog cmd */
+		if (req_queue.backlog == &crypto_cmd->entry)
+			req_queue.backlog = crypto_cmd->entry.next;
+	}
+
+	/* Remove the cmd entry from the list of cmds */
+	req_queue.cmd_count--;
+	list_del(&crypto_cmd->entry);
+
+	spin_unlock_irqrestore(&req_queue_lock, flags);
+
+	return held;
+}
+
+static void ccp_crypto_complete(void *data, int err)
+{
+	struct ccp_crypto_cmd *crypto_cmd = data;
+	struct ccp_crypto_cmd *held, *next, *backlog;
+	struct crypto_async_request *req = crypto_cmd->req;
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm);
+	int ret;
+
+	if (err == -EINPROGRESS) {
+		/* Only propagate the -EINPROGRESS if necessary */
+		if (crypto_cmd->ret == -EBUSY) {
+			crypto_cmd->ret = -EINPROGRESS;
+			req->complete(req, -EINPROGRESS);
+		}
+
+		return;
+	}
+
+	/* Operation has completed - update the queue before invoking
+	 * the completion callbacks and retrieve the next cmd (cmd with
+	 * a matching tfm) that can be submitted to the CCP.
+	 */
+	held = ccp_crypto_cmd_complete(crypto_cmd, &backlog);
+	if (backlog) {
+		backlog->ret = -EINPROGRESS;
+		backlog->req->complete(backlog->req, -EINPROGRESS);
+	}
+
+	/* Transition the state from -EBUSY to -EINPROGRESS first */
+	if (crypto_cmd->ret == -EBUSY)
+		req->complete(req, -EINPROGRESS);
+
+	/* Completion callbacks */
+	ret = err;
+	if (ctx->complete)
+		ret = ctx->complete(req, ret);
+	req->complete(req, ret);
+
+	/* Submit the next cmd */
+	while (held) {
+		/* Since we have already queued the cmd, we must indicate that
+		 * we can backlog so as not to "lose" this request.
+		 */
+		held->cmd->flags |= CCP_CMD_MAY_BACKLOG;
+		ret = ccp_enqueue_cmd(held->cmd);
+		if (ccp_crypto_success(ret))
+			break;
+
+		/* Error occurred, report it and get the next entry */
+		ctx = crypto_tfm_ctx(held->req->tfm);
+		if (ctx->complete)
+			ret = ctx->complete(held->req, ret);
+		held->req->complete(held->req, ret);
+
+		next = ccp_crypto_cmd_complete(held, &backlog);
+		if (backlog) {
+			backlog->ret = -EINPROGRESS;
+			backlog->req->complete(backlog->req, -EINPROGRESS);
+		}
+
+		kfree(held);
+		held = next;
+	}
+
+	kfree(crypto_cmd);
+}
+
+static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd)
+{
+	struct ccp_crypto_cmd *active = NULL, *tmp;
+	unsigned long flags;
+	bool free_cmd = true;
+	int ret;
+
+	spin_lock_irqsave(&req_queue_lock, flags);
+
+	/* Check if the cmd can/should be queued */
+	if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) {
+		ret = -EBUSY;
+		if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG))
+			goto e_lock;
+	}
+
+	/* Look for an entry with the same tfm.  If there is a cmd
+	 * with the same tfm in the list then the current cmd cannot
+	 * be submitted to the CCP yet.
+	 */
+	list_for_each_entry(tmp, &req_queue.cmds, entry) {
+		if (crypto_cmd->tfm != tmp->tfm)
+			continue;
+		active = tmp;
+		break;
+	}
+
+	ret = -EINPROGRESS;
+	if (!active) {
+		ret = ccp_enqueue_cmd(crypto_cmd->cmd);
+		if (!ccp_crypto_success(ret))
+			goto e_lock;	/* Error, don't queue it */
+		if ((ret == -EBUSY) &&
+		    !(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG))
+			goto e_lock;	/* Not backlogging, don't queue it */
+	}
+
+	if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) {
+		ret = -EBUSY;
+		if (req_queue.backlog == &req_queue.cmds)
+			req_queue.backlog = &crypto_cmd->entry;
+	}
+	crypto_cmd->ret = ret;
+
+	req_queue.cmd_count++;
+	list_add_tail(&crypto_cmd->entry, &req_queue.cmds);
+
+	free_cmd = false;
+
+e_lock:
+	spin_unlock_irqrestore(&req_queue_lock, flags);
+
+	if (free_cmd)
+		kfree(crypto_cmd);
+
+	return ret;
+}
+
+/**
+ * ccp_crypto_enqueue_request - queue an crypto async request for processing
+ *				by the CCP
+ *
+ * @req: crypto_async_request struct to be processed
+ * @cmd: ccp_cmd struct to be sent to the CCP
+ */
+int ccp_crypto_enqueue_request(struct crypto_async_request *req,
+			       struct ccp_cmd *cmd)
+{
+	struct ccp_crypto_cmd *crypto_cmd;
+	gfp_t gfp;
+
+	gfp = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
+
+	crypto_cmd = kzalloc(sizeof(*crypto_cmd), gfp);
+	if (!crypto_cmd)
+		return -ENOMEM;
+
+	/* The tfm pointer must be saved and not referenced from the
+	 * crypto_async_request (req) pointer because it is used after
+	 * completion callback for the request and the req pointer
+	 * might not be valid anymore.
+	 */
+	crypto_cmd->cmd = cmd;
+	crypto_cmd->req = req;
+	crypto_cmd->tfm = req->tfm;
+
+	cmd->callback = ccp_crypto_complete;
+	cmd->data = crypto_cmd;
+
+	if (req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+		cmd->flags |= CCP_CMD_MAY_BACKLOG;
+	else
+		cmd->flags &= ~CCP_CMD_MAY_BACKLOG;
+
+	return ccp_crypto_enqueue_cmd(crypto_cmd);
+}
+
+struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
+					    struct scatterlist *sg_add)
+{
+	struct scatterlist *sg, *sg_last = NULL;
+
+	for (sg = table->sgl; sg; sg = sg_next(sg))
+		if (!sg_page(sg))
+			break;
+	BUG_ON(!sg);
+
+	for (; sg && sg_add; sg = sg_next(sg), sg_add = sg_next(sg_add)) {
+		sg_set_page(sg, sg_page(sg_add), sg_add->length,
+			    sg_add->offset);
+		sg_last = sg;
+	}
+	BUG_ON(sg_add);
+
+	return sg_last;
+}
+
+static int ccp_register_algs(void)
+{
+	int ret;
+
+	if (!aes_disable) {
+		ret = ccp_register_aes_algs(&cipher_algs);
+		if (ret)
+			return ret;
+
+		ret = ccp_register_aes_cmac_algs(&hash_algs);
+		if (ret)
+			return ret;
+
+		ret = ccp_register_aes_xts_algs(&cipher_algs);
+		if (ret)
+			return ret;
+	}
+
+	if (!sha_disable) {
+		ret = ccp_register_sha_algs(&hash_algs);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void ccp_unregister_algs(void)
+{
+	struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp;
+	struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp;
+
+	list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) {
+		crypto_unregister_ahash(&ahash_alg->alg);
+		list_del(&ahash_alg->entry);
+		kfree(ahash_alg);
+	}
+
+	list_for_each_entry_safe(ablk_alg, ablk_tmp, &cipher_algs, entry) {
+		crypto_unregister_alg(&ablk_alg->alg);
+		list_del(&ablk_alg->entry);
+		kfree(ablk_alg);
+	}
+}
+
+static int ccp_crypto_init(void)
+{
+	int ret;
+
+	ret = ccp_present();
+	if (ret)
+		return ret;
+
+	spin_lock_init(&req_queue_lock);
+	INIT_LIST_HEAD(&req_queue.cmds);
+	req_queue.backlog = &req_queue.cmds;
+	req_queue.cmd_count = 0;
+
+	ret = ccp_register_algs();
+	if (ret)
+		ccp_unregister_algs();
+
+	return ret;
+}
+
+static void ccp_crypto_exit(void)
+{
+	ccp_unregister_algs();
+}
+
+module_init(ccp_crypto_init);
+module_exit(ccp_crypto_exit);
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
new file mode 100644
index 000000000..507b34e0c
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -0,0 +1,438 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) SHA crypto API support
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
+{
+	struct ahash_request *req = ahash_request_cast(async_req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	unsigned int digest_size = crypto_ahash_digestsize(tfm);
+
+	if (ret)
+		goto e_free;
+
+	if (rctx->hash_rem) {
+		/* Save remaining data to buffer */
+		unsigned int offset = rctx->nbytes - rctx->hash_rem;
+
+		scatterwalk_map_and_copy(rctx->buf, rctx->src,
+					 offset, rctx->hash_rem, 0);
+		rctx->buf_count = rctx->hash_rem;
+	} else {
+		rctx->buf_count = 0;
+	}
+
+	/* Update result area if supplied */
+	if (req->result)
+		memcpy(req->result, rctx->ctx, digest_size);
+
+e_free:
+	sg_free_table(&rctx->data_sg);
+
+	return ret;
+}
+
+static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes,
+			     unsigned int final)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct scatterlist *sg;
+	unsigned int block_size =
+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+	unsigned int sg_count;
+	gfp_t gfp;
+	u64 len;
+	int ret;
+
+	len = (u64)rctx->buf_count + (u64)nbytes;
+
+	if (!final && (len <= block_size)) {
+		scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src,
+					 0, nbytes, 0);
+		rctx->buf_count += nbytes;
+
+		return 0;
+	}
+
+	rctx->src = req->src;
+	rctx->nbytes = nbytes;
+
+	rctx->final = final;
+	rctx->hash_rem = final ? 0 : len & (block_size - 1);
+	rctx->hash_cnt = len - rctx->hash_rem;
+	if (!final && !rctx->hash_rem) {
+		/* CCP can't do zero length final, so keep some data around */
+		rctx->hash_cnt -= block_size;
+		rctx->hash_rem = block_size;
+	}
+
+	/* Initialize the context scatterlist */
+	sg_init_one(&rctx->ctx_sg, rctx->ctx, sizeof(rctx->ctx));
+
+	sg = NULL;
+	if (rctx->buf_count && nbytes) {
+		/* Build the data scatterlist table - allocate enough entries
+		 * for both data pieces (buffer and input data)
+		 */
+		gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+			GFP_KERNEL : GFP_ATOMIC;
+		sg_count = sg_nents(req->src) + 1;
+		ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp);
+		if (ret)
+			return ret;
+
+		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg);
+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src);
+		sg_mark_end(sg);
+
+		sg = rctx->data_sg.sgl;
+	} else if (rctx->buf_count) {
+		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
+
+		sg = &rctx->buf_sg;
+	} else if (nbytes) {
+		sg = req->src;
+	}
+
+	rctx->msg_bits += (rctx->hash_cnt << 3);	/* Total in bits */
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_SHA;
+	rctx->cmd.u.sha.type = rctx->type;
+	rctx->cmd.u.sha.ctx = &rctx->ctx_sg;
+	rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx);
+	rctx->cmd.u.sha.src = sg;
+	rctx->cmd.u.sha.src_len = rctx->hash_cnt;
+	rctx->cmd.u.sha.opad = ctx->u.sha.key_len ?
+		&ctx->u.sha.opad_sg : NULL;
+	rctx->cmd.u.sha.opad_len = ctx->u.sha.key_len ?
+		ctx->u.sha.opad_count : 0;
+	rctx->cmd.u.sha.first = rctx->first;
+	rctx->cmd.u.sha.final = rctx->final;
+	rctx->cmd.u.sha.msg_bits = rctx->msg_bits;
+
+	rctx->first = 0;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_sha_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_crypto_ahash_alg *alg =
+		ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm));
+	unsigned int block_size =
+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+
+	memset(rctx, 0, sizeof(*rctx));
+
+	rctx->type = alg->type;
+	rctx->first = 1;
+
+	if (ctx->u.sha.key_len) {
+		/* Buffer the HMAC key for first update */
+		memcpy(rctx->buf, ctx->u.sha.ipad, block_size);
+		rctx->buf_count = block_size;
+	}
+
+	return 0;
+}
+
+static int ccp_sha_update(struct ahash_request *req)
+{
+	return ccp_do_sha_update(req, req->nbytes, 0);
+}
+
+static int ccp_sha_final(struct ahash_request *req)
+{
+	return ccp_do_sha_update(req, 0, 1);
+}
+
+static int ccp_sha_finup(struct ahash_request *req)
+{
+	return ccp_do_sha_update(req, req->nbytes, 1);
+}
+
+static int ccp_sha_digest(struct ahash_request *req)
+{
+	int ret;
+
+	ret = ccp_sha_init(req);
+	if (ret)
+		return ret;
+
+	return ccp_sha_finup(req);
+}
+
+static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
+			  unsigned int key_len)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct crypto_shash *shash = ctx->u.sha.hmac_tfm;
+
+	SHASH_DESC_ON_STACK(sdesc, shash);
+
+	unsigned int block_size = crypto_shash_blocksize(shash);
+	unsigned int digest_size = crypto_shash_digestsize(shash);
+	int i, ret;
+
+	/* Set to zero until complete */
+	ctx->u.sha.key_len = 0;
+
+	/* Clear key area to provide zero padding for keys smaller
+	 * than the block size
+	 */
+	memset(ctx->u.sha.key, 0, sizeof(ctx->u.sha.key));
+
+	if (key_len > block_size) {
+		/* Must hash the input key */
+		sdesc->tfm = shash;
+		sdesc->flags = crypto_ahash_get_flags(tfm) &
+			CRYPTO_TFM_REQ_MAY_SLEEP;
+
+		ret = crypto_shash_digest(sdesc, key, key_len,
+					  ctx->u.sha.key);
+		if (ret) {
+			crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+			return -EINVAL;
+		}
+
+		key_len = digest_size;
+	} else {
+		memcpy(ctx->u.sha.key, key, key_len);
+	}
+
+	for (i = 0; i < block_size; i++) {
+		ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36;
+		ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c;
+	}
+
+	sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size);
+	ctx->u.sha.opad_count = block_size;
+
+	ctx->u.sha.key_len = key_len;
+
+	return 0;
+}
+
+static int ccp_sha_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+
+	ctx->complete = ccp_sha_complete;
+	ctx->u.sha.key_len = 0;
+
+	crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_sha_req_ctx));
+
+	return 0;
+}
+
+static void ccp_sha_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm);
+	struct crypto_shash *hmac_tfm;
+
+	hmac_tfm = crypto_alloc_shash(alg->child_alg, 0, 0);
+	if (IS_ERR(hmac_tfm)) {
+		pr_warn("could not load driver %s need for HMAC support\n",
+			alg->child_alg);
+		return PTR_ERR(hmac_tfm);
+	}
+
+	ctx->u.sha.hmac_tfm = hmac_tfm;
+
+	return ccp_sha_cra_init(tfm);
+}
+
+static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (ctx->u.sha.hmac_tfm)
+		crypto_free_shash(ctx->u.sha.hmac_tfm);
+
+	ccp_sha_cra_exit(tfm);
+}
+
+struct ccp_sha_def {
+	const char *name;
+	const char *drv_name;
+	enum ccp_sha_type type;
+	u32 digest_size;
+	u32 block_size;
+};
+
+static struct ccp_sha_def sha_algs[] = {
+	{
+		.name		= "sha1",
+		.drv_name	= "sha1-ccp",
+		.type		= CCP_SHA_TYPE_1,
+		.digest_size	= SHA1_DIGEST_SIZE,
+		.block_size	= SHA1_BLOCK_SIZE,
+	},
+	{
+		.name		= "sha224",
+		.drv_name	= "sha224-ccp",
+		.type		= CCP_SHA_TYPE_224,
+		.digest_size	= SHA224_DIGEST_SIZE,
+		.block_size	= SHA224_BLOCK_SIZE,
+	},
+	{
+		.name		= "sha256",
+		.drv_name	= "sha256-ccp",
+		.type		= CCP_SHA_TYPE_256,
+		.digest_size	= SHA256_DIGEST_SIZE,
+		.block_size	= SHA256_BLOCK_SIZE,
+	},
+};
+
+static int ccp_register_hmac_alg(struct list_head *head,
+				 const struct ccp_sha_def *def,
+				 const struct ccp_crypto_ahash_alg *base_alg)
+{
+	struct ccp_crypto_ahash_alg *ccp_alg;
+	struct ahash_alg *alg;
+	struct hash_alg_common *halg;
+	struct crypto_alg *base;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	/* Copy the base algorithm and only change what's necessary */
+	*ccp_alg = *base_alg;
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	strncpy(ccp_alg->child_alg, def->name, CRYPTO_MAX_ALG_NAME);
+
+	alg = &ccp_alg->alg;
+	alg->setkey = ccp_sha_setkey;
+
+	halg = &alg->halg;
+
+	base = &halg->base;
+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", def->name);
+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "hmac-%s",
+		 def->drv_name);
+	base->cra_init = ccp_hmac_sha_cra_init;
+	base->cra_exit = ccp_hmac_sha_cra_exit;
+
+	ret = crypto_register_ahash(alg);
+	if (ret) {
+		pr_err("%s ahash algorithm registration error (%d)\n",
+		       base->cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return ret;
+}
+
+static int ccp_register_sha_alg(struct list_head *head,
+				const struct ccp_sha_def *def)
+{
+	struct ccp_crypto_ahash_alg *ccp_alg;
+	struct ahash_alg *alg;
+	struct hash_alg_common *halg;
+	struct crypto_alg *base;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	ccp_alg->type = def->type;
+
+	alg = &ccp_alg->alg;
+	alg->init = ccp_sha_init;
+	alg->update = ccp_sha_update;
+	alg->final = ccp_sha_final;
+	alg->finup = ccp_sha_finup;
+	alg->digest = ccp_sha_digest;
+
+	halg = &alg->halg;
+	halg->digestsize = def->digest_size;
+
+	base = &halg->base;
+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->drv_name);
+	base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC |
+			  CRYPTO_ALG_KERN_DRIVER_ONLY |
+			  CRYPTO_ALG_NEED_FALLBACK;
+	base->cra_blocksize = def->block_size;
+	base->cra_ctxsize = sizeof(struct ccp_ctx);
+	base->cra_priority = CCP_CRA_PRIORITY;
+	base->cra_type = &crypto_ahash_type;
+	base->cra_init = ccp_sha_cra_init;
+	base->cra_exit = ccp_sha_cra_exit;
+	base->cra_module = THIS_MODULE;
+
+	ret = crypto_register_ahash(alg);
+	if (ret) {
+		pr_err("%s ahash algorithm registration error (%d)\n",
+		       base->cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	ret = ccp_register_hmac_alg(head, def, ccp_alg);
+
+	return ret;
+}
+
+int ccp_register_sha_algs(struct list_head *head)
+{
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
+		ret = ccp_register_sha_alg(head, &sha_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
new file mode 100644
index 000000000..76a96f0f4
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -0,0 +1,194 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) crypto API support
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CCP_CRYPTO_H__
+#define __CCP_CRYPTO_H__
+
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/pci.h>
+#include <linux/ccp.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/ctr.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+
+#define CCP_CRA_PRIORITY	300
+
+struct ccp_crypto_ablkcipher_alg {
+	struct list_head entry;
+
+	u32 mode;
+
+	struct crypto_alg alg;
+};
+
+struct ccp_crypto_ahash_alg {
+	struct list_head entry;
+
+	const __be32 *init;
+	u32 type;
+	u32 mode;
+
+	/* Child algorithm used for HMAC, CMAC, etc */
+	char child_alg[CRYPTO_MAX_ALG_NAME];
+
+	struct ahash_alg alg;
+};
+
+static inline struct ccp_crypto_ablkcipher_alg *
+	ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+
+	return container_of(alg, struct ccp_crypto_ablkcipher_alg, alg);
+}
+
+static inline struct ccp_crypto_ahash_alg *
+	ccp_crypto_ahash_alg(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct ahash_alg *ahash_alg;
+
+	ahash_alg = container_of(alg, struct ahash_alg, halg.base);
+
+	return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg);
+}
+
+/***** AES related defines *****/
+struct ccp_aes_ctx {
+	/* Fallback cipher for XTS with unsupported unit sizes */
+	struct crypto_ablkcipher *tfm_ablkcipher;
+
+	/* Cipher used to generate CMAC K1/K2 keys */
+	struct crypto_cipher *tfm_cipher;
+
+	enum ccp_engine engine;
+	enum ccp_aes_type type;
+	enum ccp_aes_mode mode;
+
+	struct scatterlist key_sg;
+	unsigned int key_len;
+	u8 key[AES_MAX_KEY_SIZE];
+
+	u8 nonce[CTR_RFC3686_NONCE_SIZE];
+
+	/* CMAC key structures */
+	struct scatterlist k1_sg;
+	struct scatterlist k2_sg;
+	unsigned int kn_len;
+	u8 k1[AES_BLOCK_SIZE];
+	u8 k2[AES_BLOCK_SIZE];
+};
+
+struct ccp_aes_req_ctx {
+	struct scatterlist iv_sg;
+	u8 iv[AES_BLOCK_SIZE];
+
+	/* Fields used for RFC3686 requests */
+	u8 *rfc3686_info;
+	u8 rfc3686_iv[AES_BLOCK_SIZE];
+
+	struct ccp_cmd cmd;
+};
+
+struct ccp_aes_cmac_req_ctx {
+	unsigned int null_msg;
+	unsigned int final;
+
+	struct scatterlist *src;
+	unsigned int nbytes;
+
+	u64 hash_cnt;
+	unsigned int hash_rem;
+
+	struct sg_table data_sg;
+
+	struct scatterlist iv_sg;
+	u8 iv[AES_BLOCK_SIZE];
+
+	struct scatterlist buf_sg;
+	unsigned int buf_count;
+	u8 buf[AES_BLOCK_SIZE];
+
+	struct scatterlist pad_sg;
+	unsigned int pad_count;
+	u8 pad[AES_BLOCK_SIZE];
+
+	struct ccp_cmd cmd;
+};
+
+/***** SHA related defines *****/
+#define MAX_SHA_CONTEXT_SIZE	SHA256_DIGEST_SIZE
+#define MAX_SHA_BLOCK_SIZE	SHA256_BLOCK_SIZE
+
+struct ccp_sha_ctx {
+	struct scatterlist opad_sg;
+	unsigned int opad_count;
+
+	unsigned int key_len;
+	u8 key[MAX_SHA_BLOCK_SIZE];
+	u8 ipad[MAX_SHA_BLOCK_SIZE];
+	u8 opad[MAX_SHA_BLOCK_SIZE];
+	struct crypto_shash *hmac_tfm;
+};
+
+struct ccp_sha_req_ctx {
+	enum ccp_sha_type type;
+
+	u64 msg_bits;
+
+	unsigned int first;
+	unsigned int final;
+
+	struct scatterlist *src;
+	unsigned int nbytes;
+
+	u64 hash_cnt;
+	unsigned int hash_rem;
+
+	struct sg_table data_sg;
+
+	struct scatterlist ctx_sg;
+	u8 ctx[MAX_SHA_CONTEXT_SIZE];
+
+	struct scatterlist buf_sg;
+	unsigned int buf_count;
+	u8 buf[MAX_SHA_BLOCK_SIZE];
+
+	/* CCP driver command */
+	struct ccp_cmd cmd;
+};
+
+/***** Common Context Structure *****/
+struct ccp_ctx {
+	int (*complete)(struct crypto_async_request *req, int ret);
+
+	union {
+		struct ccp_aes_ctx aes;
+		struct ccp_sha_ctx sha;
+	} u;
+};
+
+int ccp_crypto_enqueue_request(struct crypto_async_request *req,
+			       struct ccp_cmd *cmd);
+struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
+					    struct scatterlist *sg_add);
+
+int ccp_register_aes_algs(struct list_head *head);
+int ccp_register_aes_cmac_algs(struct list_head *head);
+int ccp_register_aes_xts_algs(struct list_head *head);
+int ccp_register_sha_algs(struct list_head *head);
+
+#endif
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
new file mode 100644
index 000000000..861bacc1b
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -0,0 +1,654 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/cpu.h>
+#ifdef CONFIG_X86
+#include <asm/cpu_device_id.h>
+#endif
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0.0");
+MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver");
+
+struct ccp_tasklet_data {
+	struct completion completion;
+	struct ccp_cmd *cmd;
+};
+
+static struct ccp_device *ccp_dev;
+static inline struct ccp_device *ccp_get_device(void)
+{
+	return ccp_dev;
+}
+
+static inline void ccp_add_device(struct ccp_device *ccp)
+{
+	ccp_dev = ccp;
+}
+
+static inline void ccp_del_device(struct ccp_device *ccp)
+{
+	ccp_dev = NULL;
+}
+
+/**
+ * ccp_present - check if a CCP device is present
+ *
+ * Returns zero if a CCP device is present, -ENODEV otherwise.
+ */
+int ccp_present(void)
+{
+	if (ccp_get_device())
+		return 0;
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(ccp_present);
+
+/**
+ * ccp_enqueue_cmd - queue an operation for processing by the CCP
+ *
+ * @cmd: ccp_cmd struct to be processed
+ *
+ * Queue a cmd to be processed by the CCP. If queueing the cmd
+ * would exceed the defined length of the cmd queue the cmd will
+ * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will
+ * result in a return code of -EBUSY.
+ *
+ * The callback routine specified in the ccp_cmd struct will be
+ * called to notify the caller of completion (if the cmd was not
+ * backlogged) or advancement out of the backlog. If the cmd has
+ * advanced out of the backlog the "err" value of the callback
+ * will be -EINPROGRESS. Any other "err" value during callback is
+ * the result of the operation.
+ *
+ * The cmd has been successfully queued if:
+ *   the return code is -EINPROGRESS or
+ *   the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set
+ */
+int ccp_enqueue_cmd(struct ccp_cmd *cmd)
+{
+	struct ccp_device *ccp = ccp_get_device();
+	unsigned long flags;
+	unsigned int i;
+	int ret;
+
+	if (!ccp)
+		return -ENODEV;
+
+	/* Caller must supply a callback routine */
+	if (!cmd->callback)
+		return -EINVAL;
+
+	cmd->ccp = ccp;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	i = ccp->cmd_q_count;
+
+	if (ccp->cmd_count >= MAX_CMD_QLEN) {
+		ret = -EBUSY;
+		if (cmd->flags & CCP_CMD_MAY_BACKLOG)
+			list_add_tail(&cmd->entry, &ccp->backlog);
+	} else {
+		ret = -EINPROGRESS;
+		ccp->cmd_count++;
+		list_add_tail(&cmd->entry, &ccp->cmd);
+
+		/* Find an idle queue */
+		if (!ccp->suspending) {
+			for (i = 0; i < ccp->cmd_q_count; i++) {
+				if (ccp->cmd_q[i].active)
+					continue;
+
+				break;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	/* If we found an idle queue, wake it up */
+	if (i < ccp->cmd_q_count)
+		wake_up_process(ccp->cmd_q[i].kthread);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ccp_enqueue_cmd);
+
+static void ccp_do_cmd_backlog(struct work_struct *work)
+{
+	struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work);
+	struct ccp_device *ccp = cmd->ccp;
+	unsigned long flags;
+	unsigned int i;
+
+	cmd->callback(cmd->data, -EINPROGRESS);
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->cmd_count++;
+	list_add_tail(&cmd->entry, &ccp->cmd);
+
+	/* Find an idle queue */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		if (ccp->cmd_q[i].active)
+			continue;
+
+		break;
+	}
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	/* If we found an idle queue, wake it up */
+	if (i < ccp->cmd_q_count)
+		wake_up_process(ccp->cmd_q[i].kthread);
+}
+
+static struct ccp_cmd *ccp_dequeue_cmd(struct ccp_cmd_queue *cmd_q)
+{
+	struct ccp_device *ccp = cmd_q->ccp;
+	struct ccp_cmd *cmd = NULL;
+	struct ccp_cmd *backlog = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	cmd_q->active = 0;
+
+	if (ccp->suspending) {
+		cmd_q->suspended = 1;
+
+		spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+		wake_up_interruptible(&ccp->suspend_queue);
+
+		return NULL;
+	}
+
+	if (ccp->cmd_count) {
+		cmd_q->active = 1;
+
+		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+
+		ccp->cmd_count--;
+	}
+
+	if (!list_empty(&ccp->backlog)) {
+		backlog = list_first_entry(&ccp->backlog, struct ccp_cmd,
+					   entry);
+		list_del(&backlog->entry);
+	}
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	if (backlog) {
+		INIT_WORK(&backlog->work, ccp_do_cmd_backlog);
+		schedule_work(&backlog->work);
+	}
+
+	return cmd;
+}
+
+static void ccp_do_cmd_complete(unsigned long data)
+{
+	struct ccp_tasklet_data *tdata = (struct ccp_tasklet_data *)data;
+	struct ccp_cmd *cmd = tdata->cmd;
+
+	cmd->callback(cmd->data, cmd->ret);
+	complete(&tdata->completion);
+}
+
+static int ccp_cmd_queue_thread(void *data)
+{
+	struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data;
+	struct ccp_cmd *cmd;
+	struct ccp_tasklet_data tdata;
+	struct tasklet_struct tasklet;
+
+	tasklet_init(&tasklet, ccp_do_cmd_complete, (unsigned long)&tdata);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		schedule();
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		cmd = ccp_dequeue_cmd(cmd_q);
+		if (!cmd)
+			continue;
+
+		__set_current_state(TASK_RUNNING);
+
+		/* Execute the command */
+		cmd->ret = ccp_run_cmd(cmd_q, cmd);
+
+		/* Schedule the completion callback */
+		tdata.cmd = cmd;
+		init_completion(&tdata.completion);
+		tasklet_schedule(&tasklet);
+		wait_for_completion(&tdata.completion);
+	}
+
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
+	u32 trng_value;
+	int len = min_t(int, sizeof(trng_value), max);
+
+	/*
+	 * Locking is provided by the caller so we can update device
+	 * hwrng-related fields safely
+	 */
+	trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
+	if (!trng_value) {
+		/* Zero is returned if not data is available or if a
+		 * bad-entropy error is present. Assume an error if
+		 * we exceed TRNG_RETRIES reads of zero.
+		 */
+		if (ccp->hwrng_retries++ > TRNG_RETRIES)
+			return -EIO;
+
+		return 0;
+	}
+
+	/* Reset the counter and save the rng value */
+	ccp->hwrng_retries = 0;
+	memcpy(data, &trng_value, len);
+
+	return len;
+}
+
+/**
+ * ccp_alloc_struct - allocate and initialize the ccp_device struct
+ *
+ * @dev: device struct of the CCP
+ */
+struct ccp_device *ccp_alloc_struct(struct device *dev)
+{
+	struct ccp_device *ccp;
+
+	ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL);
+	if (!ccp)
+		return NULL;
+	ccp->dev = dev;
+
+	INIT_LIST_HEAD(&ccp->cmd);
+	INIT_LIST_HEAD(&ccp->backlog);
+
+	spin_lock_init(&ccp->cmd_lock);
+	mutex_init(&ccp->req_mutex);
+	mutex_init(&ccp->ksb_mutex);
+	ccp->ksb_count = KSB_COUNT;
+	ccp->ksb_start = 0;
+
+	return ccp;
+}
+
+/**
+ * ccp_init - initialize the CCP device
+ *
+ * @ccp: ccp_device struct
+ */
+int ccp_init(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	struct ccp_cmd_queue *cmd_q;
+	struct dma_pool *dma_pool;
+	char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
+	unsigned int qmr, qim, i;
+	int ret;
+
+	/* Find available queues */
+	qim = 0;
+	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
+	for (i = 0; i < MAX_HW_QUEUES; i++) {
+		if (!(qmr & (1 << i)))
+			continue;
+
+		/* Allocate a dma pool for this queue */
+		snprintf(dma_pool_name, sizeof(dma_pool_name), "ccp_q%d", i);
+		dma_pool = dma_pool_create(dma_pool_name, dev,
+					   CCP_DMAPOOL_MAX_SIZE,
+					   CCP_DMAPOOL_ALIGN, 0);
+		if (!dma_pool) {
+			dev_err(dev, "unable to allocate dma pool\n");
+			ret = -ENOMEM;
+			goto e_pool;
+		}
+
+		cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
+		ccp->cmd_q_count++;
+
+		cmd_q->ccp = ccp;
+		cmd_q->id = i;
+		cmd_q->dma_pool = dma_pool;
+
+		/* Reserve 2 KSB regions for the queue */
+		cmd_q->ksb_key = KSB_START + ccp->ksb_start++;
+		cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++;
+		ccp->ksb_count -= 2;
+
+		/* Preset some register values and masks that are queue
+		 * number dependent
+		 */
+		cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE +
+				    (CMD_Q_STATUS_INCR * i);
+		cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE +
+					(CMD_Q_STATUS_INCR * i);
+		cmd_q->int_ok = 1 << (i * 2);
+		cmd_q->int_err = 1 << ((i * 2) + 1);
+
+		cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+
+		init_waitqueue_head(&cmd_q->int_queue);
+
+		/* Build queue interrupt mask (two interrupts per queue) */
+		qim |= cmd_q->int_ok | cmd_q->int_err;
+
+#ifdef CONFIG_ARM64
+		/* For arm64 set the recommended queue cache settings */
+		iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE +
+			  (CMD_Q_CACHE_INC * i));
+#endif
+
+		dev_dbg(dev, "queue #%u available\n", i);
+	}
+	if (ccp->cmd_q_count == 0) {
+		dev_notice(dev, "no command queues available\n");
+		ret = -EIO;
+		goto e_pool;
+	}
+	dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
+
+	/* Disable and clear interrupts until ready */
+	iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		ioread32(cmd_q->reg_int_status);
+		ioread32(cmd_q->reg_status);
+	}
+	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
+
+	/* Request an irq */
+	ret = ccp->get_irq(ccp);
+	if (ret) {
+		dev_err(dev, "unable to allocate an IRQ\n");
+		goto e_pool;
+	}
+
+	/* Initialize the queues used to wait for KSB space and suspend */
+	init_waitqueue_head(&ccp->ksb_queue);
+	init_waitqueue_head(&ccp->suspend_queue);
+
+	/* Create a kthread for each queue */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct task_struct *kthread;
+
+		cmd_q = &ccp->cmd_q[i];
+
+		kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
+					 "ccp-q%u", cmd_q->id);
+		if (IS_ERR(kthread)) {
+			dev_err(dev, "error creating queue thread (%ld)\n",
+				PTR_ERR(kthread));
+			ret = PTR_ERR(kthread);
+			goto e_kthread;
+		}
+
+		cmd_q->kthread = kthread;
+		wake_up_process(kthread);
+	}
+
+	/* Register the RNG */
+	ccp->hwrng.name = "ccp-rng";
+	ccp->hwrng.read = ccp_trng_read;
+	ret = hwrng_register(&ccp->hwrng);
+	if (ret) {
+		dev_err(dev, "error registering hwrng (%d)\n", ret);
+		goto e_kthread;
+	}
+
+	/* Make the device struct available before enabling interrupts */
+	ccp_add_device(ccp);
+
+	/* Enable interrupts */
+	iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
+
+	return 0;
+
+e_kthread:
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].kthread)
+			kthread_stop(ccp->cmd_q[i].kthread);
+
+	ccp->free_irq(ccp);
+
+e_pool:
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
+
+	return ret;
+}
+
+/**
+ * ccp_destroy - tear down the CCP device
+ *
+ * @ccp: ccp_device struct
+ */
+void ccp_destroy(struct ccp_device *ccp)
+{
+	struct ccp_cmd_queue *cmd_q;
+	struct ccp_cmd *cmd;
+	unsigned int qim, i;
+
+	/* Remove general access to the device struct */
+	ccp_del_device(ccp);
+
+	/* Unregister the RNG */
+	hwrng_unregister(&ccp->hwrng);
+
+	/* Stop the queue kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].kthread)
+			kthread_stop(ccp->cmd_q[i].kthread);
+
+	/* Build queue interrupt mask (two interrupt masks per queue) */
+	qim = 0;
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+		qim |= cmd_q->int_ok | cmd_q->int_err;
+	}
+
+	/* Disable and clear interrupts */
+	iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		ioread32(cmd_q->reg_int_status);
+		ioread32(cmd_q->reg_status);
+	}
+	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
+
+	ccp->free_irq(ccp);
+
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
+
+	/* Flush the cmd and backlog queue */
+	while (!list_empty(&ccp->cmd)) {
+		/* Invoke the callback directly with an error code */
+		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+		cmd->callback(cmd->data, -ENODEV);
+	}
+	while (!list_empty(&ccp->backlog)) {
+		/* Invoke the callback directly with an error code */
+		cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+		cmd->callback(cmd->data, -ENODEV);
+	}
+}
+
+/**
+ * ccp_irq_handler - handle interrupts generated by the CCP device
+ *
+ * @irq: the irq associated with the interrupt
+ * @data: the data value supplied when the irq was created
+ */
+irqreturn_t ccp_irq_handler(int irq, void *data)
+{
+	struct device *dev = data;
+	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct ccp_cmd_queue *cmd_q;
+	u32 q_int, status;
+	unsigned int i;
+
+	status = ioread32(ccp->io_regs + IRQ_STATUS_REG);
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		q_int = status & (cmd_q->int_ok | cmd_q->int_err);
+		if (q_int) {
+			cmd_q->int_status = status;
+			cmd_q->q_status = ioread32(cmd_q->reg_status);
+			cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
+
+			/* On error, only save the first error value */
+			if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error)
+				cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+			cmd_q->int_rcvd = 1;
+
+			/* Acknowledge the interrupt and wake the kthread */
+			iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG);
+			wake_up_interruptible(&cmd_q->int_queue);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_PM
+bool ccp_queues_suspended(struct ccp_device *ccp)
+{
+	unsigned int suspended = 0;
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].suspended)
+			suspended++;
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	return ccp->cmd_q_count == suspended;
+}
+#endif
+
+#ifdef CONFIG_X86
+static const struct x86_cpu_id ccp_support[] = {
+	{ X86_VENDOR_AMD, 22, },
+	{ },
+};
+#endif
+
+static int __init ccp_mod_init(void)
+{
+#ifdef CONFIG_X86
+	struct cpuinfo_x86 *cpuinfo = &boot_cpu_data;
+	int ret;
+
+	if (!x86_match_cpu(ccp_support))
+		return -ENODEV;
+
+	switch (cpuinfo->x86) {
+	case 22:
+		if ((cpuinfo->x86_model < 48) || (cpuinfo->x86_model > 63))
+			return -ENODEV;
+
+		ret = ccp_pci_init();
+		if (ret)
+			return ret;
+
+		/* Don't leave the driver loaded if init failed */
+		if (!ccp_get_device()) {
+			ccp_pci_exit();
+			return -ENODEV;
+		}
+
+		return 0;
+
+		break;
+	}
+#endif
+
+#ifdef CONFIG_ARM64
+	int ret;
+
+	ret = ccp_platform_init();
+	if (ret)
+		return ret;
+
+	/* Don't leave the driver loaded if init failed */
+	if (!ccp_get_device()) {
+		ccp_platform_exit();
+		return -ENODEV;
+	}
+
+	return 0;
+#endif
+
+	return -ENODEV;
+}
+
+static void __exit ccp_mod_exit(void)
+{
+#ifdef CONFIG_X86
+	struct cpuinfo_x86 *cpuinfo = &boot_cpu_data;
+
+	switch (cpuinfo->x86) {
+	case 22:
+		ccp_pci_exit();
+		break;
+	}
+#endif
+
+#ifdef CONFIG_ARM64
+	ccp_platform_exit();
+#endif
+}
+
+module_init(ccp_mod_init);
+module_exit(ccp_mod_exit);
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
new file mode 100644
index 000000000..6ff89031f
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -0,0 +1,276 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CCP_DEV_H__
+#define __CCP_DEV_H__
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+#include <linux/hw_random.h>
+#include <linux/bitops.h>
+
+#define MAX_DMAPOOL_NAME_LEN		32
+
+#define MAX_HW_QUEUES			5
+#define MAX_CMD_QLEN			100
+
+#define TRNG_RETRIES			10
+
+#define CACHE_NONE			0x00
+#define CACHE_WB_NO_ALLOC		0xb7
+
+/****** Register Mappings ******/
+#define Q_MASK_REG			0x000
+#define TRNG_OUT_REG			0x00c
+#define IRQ_MASK_REG			0x040
+#define IRQ_STATUS_REG			0x200
+
+#define DEL_CMD_Q_JOB			0x124
+#define DEL_Q_ACTIVE			0x00000200
+#define DEL_Q_ID_SHIFT			6
+
+#define CMD_REQ0			0x180
+#define CMD_REQ_INCR			0x04
+
+#define CMD_Q_STATUS_BASE		0x210
+#define CMD_Q_INT_STATUS_BASE		0x214
+#define CMD_Q_STATUS_INCR		0x20
+
+#define CMD_Q_CACHE_BASE		0x228
+#define CMD_Q_CACHE_INC			0x20
+
+#define CMD_Q_ERROR(__qs)		((__qs) & 0x0000003f)
+#define CMD_Q_DEPTH(__qs)		(((__qs) >> 12) & 0x0000000f)
+
+/****** REQ0 Related Values ******/
+#define REQ0_WAIT_FOR_WRITE		0x00000004
+#define REQ0_INT_ON_COMPLETE		0x00000002
+#define REQ0_STOP_ON_COMPLETE		0x00000001
+
+#define REQ0_CMD_Q_SHIFT		9
+#define REQ0_JOBID_SHIFT		3
+
+/****** REQ1 Related Values ******/
+#define REQ1_PROTECT_SHIFT		27
+#define REQ1_ENGINE_SHIFT		23
+#define REQ1_KEY_KSB_SHIFT		2
+
+#define REQ1_EOM			0x00000002
+#define REQ1_INIT			0x00000001
+
+/* AES Related Values */
+#define REQ1_AES_TYPE_SHIFT		21
+#define REQ1_AES_MODE_SHIFT		18
+#define REQ1_AES_ACTION_SHIFT		17
+#define REQ1_AES_CFB_SIZE_SHIFT		10
+
+/* XTS-AES Related Values */
+#define REQ1_XTS_AES_SIZE_SHIFT		10
+
+/* SHA Related Values */
+#define REQ1_SHA_TYPE_SHIFT		21
+
+/* RSA Related Values */
+#define REQ1_RSA_MOD_SIZE_SHIFT		10
+
+/* Pass-Through Related Values */
+#define REQ1_PT_BW_SHIFT		12
+#define REQ1_PT_BS_SHIFT		10
+
+/* ECC Related Values */
+#define REQ1_ECC_AFFINE_CONVERT		0x00200000
+#define REQ1_ECC_FUNCTION_SHIFT		18
+
+/****** REQ4 Related Values ******/
+#define REQ4_KSB_SHIFT			18
+#define REQ4_MEMTYPE_SHIFT		16
+
+/****** REQ6 Related Values ******/
+#define REQ6_MEMTYPE_SHIFT		16
+
+/****** Key Storage Block ******/
+#define KSB_START			77
+#define KSB_END				127
+#define KSB_COUNT			(KSB_END - KSB_START + 1)
+#define CCP_KSB_BITS			256
+#define CCP_KSB_BYTES			32
+
+#define CCP_JOBID_MASK			0x0000003f
+
+#define CCP_DMAPOOL_MAX_SIZE		64
+#define CCP_DMAPOOL_ALIGN		BIT(5)
+
+#define CCP_REVERSE_BUF_SIZE		64
+
+#define CCP_AES_KEY_KSB_COUNT		1
+#define CCP_AES_CTX_KSB_COUNT		1
+
+#define CCP_XTS_AES_KEY_KSB_COUNT	1
+#define CCP_XTS_AES_CTX_KSB_COUNT	1
+
+#define CCP_SHA_KSB_COUNT		1
+
+#define CCP_RSA_MAX_WIDTH		4096
+
+#define CCP_PASSTHRU_BLOCKSIZE		256
+#define CCP_PASSTHRU_MASKSIZE		32
+#define CCP_PASSTHRU_KSB_COUNT		1
+
+#define CCP_ECC_MODULUS_BYTES		48      /* 384-bits */
+#define CCP_ECC_MAX_OPERANDS		6
+#define CCP_ECC_MAX_OUTPUTS		3
+#define CCP_ECC_SRC_BUF_SIZE		448
+#define CCP_ECC_DST_BUF_SIZE		192
+#define CCP_ECC_OPERAND_SIZE		64
+#define CCP_ECC_OUTPUT_SIZE		64
+#define CCP_ECC_RESULT_OFFSET		60
+#define CCP_ECC_RESULT_SUCCESS		0x0001
+
+struct ccp_device;
+struct ccp_cmd;
+
+struct ccp_cmd_queue {
+	struct ccp_device *ccp;
+
+	/* Queue identifier */
+	u32 id;
+
+	/* Queue dma pool */
+	struct dma_pool *dma_pool;
+
+	/* Queue reserved KSB regions */
+	u32 ksb_key;
+	u32 ksb_ctx;
+
+	/* Queue processing thread */
+	struct task_struct *kthread;
+	unsigned int active;
+	unsigned int suspended;
+
+	/* Number of free command slots available */
+	unsigned int free_slots;
+
+	/* Interrupt masks */
+	u32 int_ok;
+	u32 int_err;
+
+	/* Register addresses for queue */
+	void __iomem *reg_status;
+	void __iomem *reg_int_status;
+
+	/* Status values from job */
+	u32 int_status;
+	u32 q_status;
+	u32 q_int_status;
+	u32 cmd_error;
+
+	/* Interrupt wait queue */
+	wait_queue_head_t int_queue;
+	unsigned int int_rcvd;
+} ____cacheline_aligned;
+
+struct ccp_device {
+	struct device *dev;
+
+	/*
+	 * Bus specific device information
+	 */
+	void *dev_specific;
+	int (*get_irq)(struct ccp_device *ccp);
+	void (*free_irq)(struct ccp_device *ccp);
+	unsigned int irq;
+
+	/*
+	 * I/O area used for device communication. The register mapping
+	 * starts at an offset into the mapped bar.
+	 *   The CMD_REQx registers and the Delete_Cmd_Queue_Job register
+	 *   need to be protected while a command queue thread is accessing
+	 *   them.
+	 */
+	struct mutex req_mutex ____cacheline_aligned;
+	void __iomem *io_map;
+	void __iomem *io_regs;
+
+	/*
+	 * Master lists that all cmds are queued on. Because there can be
+	 * more than one CCP command queue that can process a cmd a separate
+	 * backlog list is neeeded so that the backlog completion call
+	 * completes before the cmd is available for execution.
+	 */
+	spinlock_t cmd_lock ____cacheline_aligned;
+	unsigned int cmd_count;
+	struct list_head cmd;
+	struct list_head backlog;
+
+	/*
+	 * The command queues. These represent the queues available on the
+	 * CCP that are available for processing cmds
+	 */
+	struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES];
+	unsigned int cmd_q_count;
+
+	/*
+	 * Support for the CCP True RNG
+	 */
+	struct hwrng hwrng;
+	unsigned int hwrng_retries;
+
+	/*
+	 * A counter used to generate job-ids for cmds submitted to the CCP
+	 */
+	atomic_t current_id ____cacheline_aligned;
+
+	/*
+	 * The CCP uses key storage blocks (KSB) to maintain context for certain
+	 * operations. To prevent multiple cmds from using the same KSB range
+	 * a command queue reserves a KSB range for the duration of the cmd.
+	 * Each queue, will however, reserve 2 KSB blocks for operations that
+	 * only require single KSB entries (eg. AES context/iv and key) in order
+	 * to avoid allocation contention.  This will reserve at most 10 KSB
+	 * entries, leaving 40 KSB entries available for dynamic allocation.
+	 */
+	struct mutex ksb_mutex ____cacheline_aligned;
+	DECLARE_BITMAP(ksb, KSB_COUNT);
+	wait_queue_head_t ksb_queue;
+	unsigned int ksb_avail;
+	unsigned int ksb_count;
+	u32 ksb_start;
+
+	/* Suspend support */
+	unsigned int suspending;
+	wait_queue_head_t suspend_queue;
+
+	/* DMA caching attribute support */
+	unsigned int axcache;
+};
+
+int ccp_pci_init(void);
+void ccp_pci_exit(void);
+
+int ccp_platform_init(void);
+void ccp_platform_exit(void);
+
+struct ccp_device *ccp_alloc_struct(struct device *dev);
+int ccp_init(struct ccp_device *ccp);
+void ccp_destroy(struct ccp_device *ccp);
+bool ccp_queues_suspended(struct ccp_device *ccp);
+
+irqreturn_t ccp_irq_handler(int irq, void *data);
+
+int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd);
+
+#endif
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
new file mode 100644
index 000000000..71f2e3c89
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -0,0 +1,2126 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/ccp.h>
+#include <linux/scatterlist.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
+
+#include "ccp-dev.h"
+
+enum ccp_memtype {
+	CCP_MEMTYPE_SYSTEM = 0,
+	CCP_MEMTYPE_KSB,
+	CCP_MEMTYPE_LOCAL,
+	CCP_MEMTYPE__LAST,
+};
+
+struct ccp_dma_info {
+	dma_addr_t address;
+	unsigned int offset;
+	unsigned int length;
+	enum dma_data_direction dir;
+};
+
+struct ccp_dm_workarea {
+	struct device *dev;
+	struct dma_pool *dma_pool;
+	unsigned int length;
+
+	u8 *address;
+	struct ccp_dma_info dma;
+};
+
+struct ccp_sg_workarea {
+	struct scatterlist *sg;
+	unsigned int nents;
+	unsigned int length;
+
+	struct scatterlist *dma_sg;
+	struct device *dma_dev;
+	unsigned int dma_count;
+	enum dma_data_direction dma_dir;
+
+	unsigned int sg_used;
+
+	u64 bytes_left;
+};
+
+struct ccp_data {
+	struct ccp_sg_workarea sg_wa;
+	struct ccp_dm_workarea dm_wa;
+};
+
+struct ccp_mem {
+	enum ccp_memtype type;
+	union {
+		struct ccp_dma_info dma;
+		u32 ksb;
+	} u;
+};
+
+struct ccp_aes_op {
+	enum ccp_aes_type type;
+	enum ccp_aes_mode mode;
+	enum ccp_aes_action action;
+};
+
+struct ccp_xts_aes_op {
+	enum ccp_aes_action action;
+	enum ccp_xts_aes_unit_size unit_size;
+};
+
+struct ccp_sha_op {
+	enum ccp_sha_type type;
+	u64 msg_bits;
+};
+
+struct ccp_rsa_op {
+	u32 mod_size;
+	u32 input_len;
+};
+
+struct ccp_passthru_op {
+	enum ccp_passthru_bitwise bit_mod;
+	enum ccp_passthru_byteswap byte_swap;
+};
+
+struct ccp_ecc_op {
+	enum ccp_ecc_function function;
+};
+
+struct ccp_op {
+	struct ccp_cmd_queue *cmd_q;
+
+	u32 jobid;
+	u32 ioc;
+	u32 soc;
+	u32 ksb_key;
+	u32 ksb_ctx;
+	u32 init;
+	u32 eom;
+
+	struct ccp_mem src;
+	struct ccp_mem dst;
+
+	union {
+		struct ccp_aes_op aes;
+		struct ccp_xts_aes_op xts;
+		struct ccp_sha_op sha;
+		struct ccp_rsa_op rsa;
+		struct ccp_passthru_op passthru;
+		struct ccp_ecc_op ecc;
+	} u;
+};
+
+/* SHA initial context values */
+static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
+	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
+	cpu_to_be32(SHA1_H4), 0, 0, 0,
+};
+
+static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
+	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
+	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
+	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
+};
+
+static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
+	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
+	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
+	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
+};
+
+/* The CCP cannot perform zero-length sha operations so the caller
+ * is required to buffer data for the final operation.  However, a
+ * sha operation for a message with a total length of zero is valid
+ * so known values are required to supply the result.
+ */
+static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
+	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
+	0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
+	0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
+	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
+	0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
+	0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
+	0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
+	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
+	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
+	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
+	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
+};
+
+static u32 ccp_addr_lo(struct ccp_dma_info *info)
+{
+	return lower_32_bits(info->address + info->offset);
+}
+
+static u32 ccp_addr_hi(struct ccp_dma_info *info)
+{
+	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
+}
+
+static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
+{
+	struct ccp_cmd_queue *cmd_q = op->cmd_q;
+	struct ccp_device *ccp = cmd_q->ccp;
+	void __iomem *cr_addr;
+	u32 cr0, cmd;
+	unsigned int i;
+	int ret = 0;
+
+	/* We could read a status register to see how many free slots
+	 * are actually available, but reading that register resets it
+	 * and you could lose some error information.
+	 */
+	cmd_q->free_slots--;
+
+	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
+	      | (op->jobid << REQ0_JOBID_SHIFT)
+	      | REQ0_WAIT_FOR_WRITE;
+
+	if (op->soc)
+		cr0 |= REQ0_STOP_ON_COMPLETE
+		       | REQ0_INT_ON_COMPLETE;
+
+	if (op->ioc || !cmd_q->free_slots)
+		cr0 |= REQ0_INT_ON_COMPLETE;
+
+	/* Start at CMD_REQ1 */
+	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
+
+	mutex_lock(&ccp->req_mutex);
+
+	/* Write CMD_REQ1 through CMD_REQx first */
+	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
+		iowrite32(*(cr + i), cr_addr);
+
+	/* Tell the CCP to start */
+	wmb();
+	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
+
+	mutex_unlock(&ccp->req_mutex);
+
+	if (cr0 & REQ0_INT_ON_COMPLETE) {
+		/* Wait for the job to complete */
+		ret = wait_event_interruptible(cmd_q->int_queue,
+					       cmd_q->int_rcvd);
+		if (ret || cmd_q->cmd_error) {
+			/* On error delete all related jobs from the queue */
+			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
+			      | op->jobid;
+
+			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
+
+			if (!ret)
+				ret = -EIO;
+		} else if (op->soc) {
+			/* Delete just head job from the queue on SoC */
+			cmd = DEL_Q_ACTIVE
+			      | (cmd_q->id << DEL_Q_ID_SHIFT)
+			      | op->jobid;
+
+			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
+		}
+
+		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
+
+		cmd_q->int_rcvd = 0;
+	}
+
+	return ret;
+}
+
+static int ccp_perform_aes(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
+		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
+		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
+		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
+		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	if (op->u.aes.mode == CCP_AES_MODE_CFB)
+		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
+
+	if (op->eom)
+		cr[0] |= REQ1_EOM;
+
+	if (op->init)
+		cr[0] |= REQ1_INIT;
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_xts_aes(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
+		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
+		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
+		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	if (op->eom)
+		cr[0] |= REQ1_EOM;
+
+	if (op->init)
+		cr[0] |= REQ1_INIT;
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_sha(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
+		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
+		| REQ1_INIT;
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+
+	if (op->eom) {
+		cr[0] |= REQ1_EOM;
+		cr[4] = lower_32_bits(op->u.sha.msg_bits);
+		cr[5] = upper_32_bits(op->u.sha.msg_bits);
+	} else {
+		cr[4] = 0;
+		cr[5] = 0;
+	}
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_rsa(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
+		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
+		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
+		| REQ1_EOM;
+	cr[1] = op->u.rsa.input_len - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_passthru(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
+		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
+		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
+
+	if (op->src.type == CCP_MEMTYPE_SYSTEM)
+		cr[1] = op->src.u.dma.length - 1;
+	else
+		cr[1] = op->dst.u.dma.length - 1;
+
+	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
+		cr[2] = ccp_addr_lo(&op->src.u.dma);
+		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+			| ccp_addr_hi(&op->src.u.dma);
+
+		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
+			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
+	} else {
+		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
+		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
+	}
+
+	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
+		cr[4] = ccp_addr_lo(&op->dst.u.dma);
+		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+			| ccp_addr_hi(&op->dst.u.dma);
+	} else {
+		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
+		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
+	}
+
+	if (op->eom)
+		cr[0] |= REQ1_EOM;
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_ecc(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = REQ1_ECC_AFFINE_CONVERT
+		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
+		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
+		| REQ1_EOM;
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
+{
+	int start;
+
+	for (;;) {
+		mutex_lock(&ccp->ksb_mutex);
+
+		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
+							ccp->ksb_count,
+							ccp->ksb_start,
+							count, 0);
+		if (start <= ccp->ksb_count) {
+			bitmap_set(ccp->ksb, start, count);
+
+			mutex_unlock(&ccp->ksb_mutex);
+			break;
+		}
+
+		ccp->ksb_avail = 0;
+
+		mutex_unlock(&ccp->ksb_mutex);
+
+		/* Wait for KSB entries to become available */
+		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
+			return 0;
+	}
+
+	return KSB_START + start;
+}
+
+static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
+			 unsigned int count)
+{
+	if (!start)
+		return;
+
+	mutex_lock(&ccp->ksb_mutex);
+
+	bitmap_clear(ccp->ksb, start - KSB_START, count);
+
+	ccp->ksb_avail = 1;
+
+	mutex_unlock(&ccp->ksb_mutex);
+
+	wake_up_interruptible_all(&ccp->ksb_queue);
+}
+
+static u32 ccp_gen_jobid(struct ccp_device *ccp)
+{
+	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
+}
+
+static void ccp_sg_free(struct ccp_sg_workarea *wa)
+{
+	if (wa->dma_count)
+		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
+
+	wa->dma_count = 0;
+}
+
+static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
+				struct scatterlist *sg, u64 len,
+				enum dma_data_direction dma_dir)
+{
+	memset(wa, 0, sizeof(*wa));
+
+	wa->sg = sg;
+	if (!sg)
+		return 0;
+
+	wa->nents = sg_nents(sg);
+	wa->length = sg->length;
+	wa->bytes_left = len;
+	wa->sg_used = 0;
+
+	if (len == 0)
+		return 0;
+
+	if (dma_dir == DMA_NONE)
+		return 0;
+
+	wa->dma_sg = sg;
+	wa->dma_dev = dev;
+	wa->dma_dir = dma_dir;
+	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
+	if (!wa->dma_count)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
+{
+	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
+
+	if (!wa->sg)
+		return;
+
+	wa->sg_used += nbytes;
+	wa->bytes_left -= nbytes;
+	if (wa->sg_used == wa->sg->length) {
+		wa->sg = sg_next(wa->sg);
+		wa->sg_used = 0;
+	}
+}
+
+static void ccp_dm_free(struct ccp_dm_workarea *wa)
+{
+	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
+		if (wa->address)
+			dma_pool_free(wa->dma_pool, wa->address,
+				      wa->dma.address);
+	} else {
+		if (wa->dma.address)
+			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
+					 wa->dma.dir);
+		kfree(wa->address);
+	}
+
+	wa->address = NULL;
+	wa->dma.address = 0;
+}
+
+static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
+				struct ccp_cmd_queue *cmd_q,
+				unsigned int len,
+				enum dma_data_direction dir)
+{
+	memset(wa, 0, sizeof(*wa));
+
+	if (!len)
+		return 0;
+
+	wa->dev = cmd_q->ccp->dev;
+	wa->length = len;
+
+	if (len <= CCP_DMAPOOL_MAX_SIZE) {
+		wa->dma_pool = cmd_q->dma_pool;
+
+		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
+					     &wa->dma.address);
+		if (!wa->address)
+			return -ENOMEM;
+
+		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
+
+		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
+	} else {
+		wa->address = kzalloc(len, GFP_KERNEL);
+		if (!wa->address)
+			return -ENOMEM;
+
+		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
+						 dir);
+		if (!wa->dma.address)
+			return -ENOMEM;
+
+		wa->dma.length = len;
+	}
+	wa->dma.dir = dir;
+
+	return 0;
+}
+
+static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
+			    struct scatterlist *sg, unsigned int sg_offset,
+			    unsigned int len)
+{
+	WARN_ON(!wa->address);
+
+	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
+				 0);
+}
+
+static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
+			    struct scatterlist *sg, unsigned int sg_offset,
+			    unsigned int len)
+{
+	WARN_ON(!wa->address);
+
+	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
+				 1);
+}
+
+static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
+				    struct scatterlist *sg,
+				    unsigned int len, unsigned int se_len,
+				    bool sign_extend)
+{
+	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
+	u8 buffer[CCP_REVERSE_BUF_SIZE];
+
+	BUG_ON(se_len > sizeof(buffer));
+
+	sg_offset = len;
+	dm_offset = 0;
+	nbytes = len;
+	while (nbytes) {
+		ksb_len = min_t(unsigned int, nbytes, se_len);
+		sg_offset -= ksb_len;
+
+		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
+		for (i = 0; i < ksb_len; i++)
+			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
+
+		dm_offset += ksb_len;
+		nbytes -= ksb_len;
+
+		if ((ksb_len != se_len) && sign_extend) {
+			/* Must sign-extend to nearest sign-extend length */
+			if (wa->address[dm_offset - 1] & 0x80)
+				memset(wa->address + dm_offset, 0xff,
+				       se_len - ksb_len);
+		}
+	}
+}
+
+static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
+				    struct scatterlist *sg,
+				    unsigned int len)
+{
+	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
+	u8 buffer[CCP_REVERSE_BUF_SIZE];
+
+	sg_offset = 0;
+	dm_offset = len;
+	nbytes = len;
+	while (nbytes) {
+		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
+		dm_offset -= ksb_len;
+
+		for (i = 0; i < ksb_len; i++)
+			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
+		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
+
+		sg_offset += ksb_len;
+		nbytes -= ksb_len;
+	}
+}
+
+static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
+{
+	ccp_dm_free(&data->dm_wa);
+	ccp_sg_free(&data->sg_wa);
+}
+
+static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
+			 struct scatterlist *sg, u64 sg_len,
+			 unsigned int dm_len,
+			 enum dma_data_direction dir)
+{
+	int ret;
+
+	memset(data, 0, sizeof(*data));
+
+	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
+				   dir);
+	if (ret)
+		goto e_err;
+
+	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
+	if (ret)
+		goto e_err;
+
+	return 0;
+
+e_err:
+	ccp_free_data(data, cmd_q);
+
+	return ret;
+}
+
+static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
+{
+	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
+	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
+	unsigned int buf_count, nbytes;
+
+	/* Clear the buffer if setting it */
+	if (!from)
+		memset(dm_wa->address, 0, dm_wa->length);
+
+	if (!sg_wa->sg)
+		return 0;
+
+	/* Perform the copy operation
+	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
+	 *   an unsigned int
+	 */
+	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
+	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
+				 nbytes, from);
+
+	/* Update the structures and generate the count */
+	buf_count = 0;
+	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
+		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
+			     dm_wa->length - buf_count);
+		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
+
+		buf_count += nbytes;
+		ccp_update_sg_workarea(sg_wa, nbytes);
+	}
+
+	return buf_count;
+}
+
+static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
+{
+	return ccp_queue_buf(data, 0);
+}
+
+static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
+{
+	return ccp_queue_buf(data, 1);
+}
+
+static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
+			     struct ccp_op *op, unsigned int block_size,
+			     bool blocksize_op)
+{
+	unsigned int sg_src_len, sg_dst_len, op_len;
+
+	/* The CCP can only DMA from/to one address each per operation. This
+	 * requires that we find the smallest DMA area between the source
+	 * and destination. The resulting len values will always be <= UINT_MAX
+	 * because the dma length is an unsigned int.
+	 */
+	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
+	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
+
+	if (dst) {
+		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
+		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
+		op_len = min(sg_src_len, sg_dst_len);
+	} else {
+		op_len = sg_src_len;
+	}
+
+	/* The data operation length will be at least block_size in length
+	 * or the smaller of available sg room remaining for the source or
+	 * the destination
+	 */
+	op_len = max(op_len, block_size);
+
+	/* Unless we have to buffer data, there's no reason to wait */
+	op->soc = 0;
+
+	if (sg_src_len < block_size) {
+		/* Not enough data in the sg element, so it
+		 * needs to be buffered into a blocksize chunk
+		 */
+		int cp_len = ccp_fill_queue_buf(src);
+
+		op->soc = 1;
+		op->src.u.dma.address = src->dm_wa.dma.address;
+		op->src.u.dma.offset = 0;
+		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
+	} else {
+		/* Enough data in the sg element, but we need to
+		 * adjust for any previously copied data
+		 */
+		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
+		op->src.u.dma.offset = src->sg_wa.sg_used;
+		op->src.u.dma.length = op_len & ~(block_size - 1);
+
+		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
+	}
+
+	if (dst) {
+		if (sg_dst_len < block_size) {
+			/* Not enough room in the sg element or we're on the
+			 * last piece of data (when using padding), so the
+			 * output needs to be buffered into a blocksize chunk
+			 */
+			op->soc = 1;
+			op->dst.u.dma.address = dst->dm_wa.dma.address;
+			op->dst.u.dma.offset = 0;
+			op->dst.u.dma.length = op->src.u.dma.length;
+		} else {
+			/* Enough room in the sg element, but we need to
+			 * adjust for any previously used area
+			 */
+			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
+			op->dst.u.dma.offset = dst->sg_wa.sg_used;
+			op->dst.u.dma.length = op->src.u.dma.length;
+		}
+	}
+}
+
+static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
+			     struct ccp_op *op)
+{
+	op->init = 0;
+
+	if (dst) {
+		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
+			ccp_empty_queue_buf(dst);
+		else
+			ccp_update_sg_workarea(&dst->sg_wa,
+					       op->dst.u.dma.length);
+	}
+}
+
+static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
+				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
+				u32 byte_swap, bool from)
+{
+	struct ccp_op op;
+
+	memset(&op, 0, sizeof(op));
+
+	op.cmd_q = cmd_q;
+	op.jobid = jobid;
+	op.eom = 1;
+
+	if (from) {
+		op.soc = 1;
+		op.src.type = CCP_MEMTYPE_KSB;
+		op.src.u.ksb = ksb;
+		op.dst.type = CCP_MEMTYPE_SYSTEM;
+		op.dst.u.dma.address = wa->dma.address;
+		op.dst.u.dma.length = wa->length;
+	} else {
+		op.src.type = CCP_MEMTYPE_SYSTEM;
+		op.src.u.dma.address = wa->dma.address;
+		op.src.u.dma.length = wa->length;
+		op.dst.type = CCP_MEMTYPE_KSB;
+		op.dst.u.ksb = ksb;
+	}
+
+	op.u.passthru.byte_swap = byte_swap;
+
+	return ccp_perform_passthru(&op);
+}
+
+static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
+			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
+			   u32 byte_swap)
+{
+	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
+}
+
+static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
+			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
+			     u32 byte_swap)
+{
+	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
+}
+
+static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
+				struct ccp_cmd *cmd)
+{
+	struct ccp_aes_engine *aes = &cmd->u.aes;
+	struct ccp_dm_workarea key, ctx;
+	struct ccp_data src;
+	struct ccp_op op;
+	unsigned int dm_offset;
+	int ret;
+
+	if (!((aes->key_len == AES_KEYSIZE_128) ||
+	      (aes->key_len == AES_KEYSIZE_192) ||
+	      (aes->key_len == AES_KEYSIZE_256)))
+		return -EINVAL;
+
+	if (aes->src_len & (AES_BLOCK_SIZE - 1))
+		return -EINVAL;
+
+	if (aes->iv_len != AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (!aes->key || !aes->iv || !aes->src)
+		return -EINVAL;
+
+	if (aes->cmac_final) {
+		if (aes->cmac_key_len != AES_BLOCK_SIZE)
+			return -EINVAL;
+
+		if (!aes->cmac_key)
+			return -EINVAL;
+	}
+
+	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
+	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
+
+	ret = -EIO;
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.ksb_key = cmd_q->ksb_key;
+	op.ksb_ctx = cmd_q->ksb_ctx;
+	op.init = 1;
+	op.u.aes.type = aes->type;
+	op.u.aes.mode = aes->mode;
+	op.u.aes.action = aes->action;
+
+	/* All supported key sizes fit in a single (32-byte) KSB entry
+	 * and must be in little endian format. Use the 256-bit byte
+	 * swap passthru option to convert from big endian to little
+	 * endian.
+	 */
+	ret = ccp_init_dm_workarea(&key, cmd_q,
+				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	dm_offset = CCP_KSB_BYTES - aes->key_len;
+	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
+			      CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_key;
+	}
+
+	/* The AES context fits in a single (32-byte) KSB entry and
+	 * must be in little endian format. Use the 256-bit byte swap
+	 * passthru option to convert from big endian to little endian.
+	 */
+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
+				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+				   DMA_BIDIRECTIONAL);
+	if (ret)
+		goto e_key;
+
+	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
+			      CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_ctx;
+	}
+
+	/* Send data to the CCP AES engine */
+	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
+			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
+	if (ret)
+		goto e_ctx;
+
+	while (src.sg_wa.bytes_left) {
+		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
+		if (aes->cmac_final && !src.sg_wa.bytes_left) {
+			op.eom = 1;
+
+			/* Push the K1/K2 key to the CCP now */
+			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
+						op.ksb_ctx,
+						CCP_PASSTHRU_BYTESWAP_256BIT);
+			if (ret) {
+				cmd->engine_error = cmd_q->cmd_error;
+				goto e_src;
+			}
+
+			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
+					aes->cmac_key_len);
+			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
+					      CCP_PASSTHRU_BYTESWAP_256BIT);
+			if (ret) {
+				cmd->engine_error = cmd_q->cmd_error;
+				goto e_src;
+			}
+		}
+
+		ret = ccp_perform_aes(&op);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_src;
+		}
+
+		ccp_process_data(&src, NULL, &op);
+	}
+
+	/* Retrieve the AES context - convert from LE to BE using
+	 * 32-byte (256-bit) byteswapping
+	 */
+	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
+				CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_src;
+	}
+
+	/* ...but we only need AES_BLOCK_SIZE bytes */
+	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+
+e_src:
+	ccp_free_data(&src, cmd_q);
+
+e_ctx:
+	ccp_dm_free(&ctx);
+
+e_key:
+	ccp_dm_free(&key);
+
+	return ret;
+}
+
+static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_aes_engine *aes = &cmd->u.aes;
+	struct ccp_dm_workarea key, ctx;
+	struct ccp_data src, dst;
+	struct ccp_op op;
+	unsigned int dm_offset;
+	bool in_place = false;
+	int ret;
+
+	if (aes->mode == CCP_AES_MODE_CMAC)
+		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
+
+	if (!((aes->key_len == AES_KEYSIZE_128) ||
+	      (aes->key_len == AES_KEYSIZE_192) ||
+	      (aes->key_len == AES_KEYSIZE_256)))
+		return -EINVAL;
+
+	if (((aes->mode == CCP_AES_MODE_ECB) ||
+	     (aes->mode == CCP_AES_MODE_CBC) ||
+	     (aes->mode == CCP_AES_MODE_CFB)) &&
+	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
+		return -EINVAL;
+
+	if (!aes->key || !aes->src || !aes->dst)
+		return -EINVAL;
+
+	if (aes->mode != CCP_AES_MODE_ECB) {
+		if (aes->iv_len != AES_BLOCK_SIZE)
+			return -EINVAL;
+
+		if (!aes->iv)
+			return -EINVAL;
+	}
+
+	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
+	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
+
+	ret = -EIO;
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.ksb_key = cmd_q->ksb_key;
+	op.ksb_ctx = cmd_q->ksb_ctx;
+	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
+	op.u.aes.type = aes->type;
+	op.u.aes.mode = aes->mode;
+	op.u.aes.action = aes->action;
+
+	/* All supported key sizes fit in a single (32-byte) KSB entry
+	 * and must be in little endian format. Use the 256-bit byte
+	 * swap passthru option to convert from big endian to little
+	 * endian.
+	 */
+	ret = ccp_init_dm_workarea(&key, cmd_q,
+				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	dm_offset = CCP_KSB_BYTES - aes->key_len;
+	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
+			      CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_key;
+	}
+
+	/* The AES context fits in a single (32-byte) KSB entry and
+	 * must be in little endian format. Use the 256-bit byte swap
+	 * passthru option to convert from big endian to little endian.
+	 */
+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
+				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+				   DMA_BIDIRECTIONAL);
+	if (ret)
+		goto e_key;
+
+	if (aes->mode != CCP_AES_MODE_ECB) {
+		/* Load the AES context - conver to LE */
+		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
+				      CCP_PASSTHRU_BYTESWAP_256BIT);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_ctx;
+		}
+	}
+
+	/* Prepare the input and output data workareas. For in-place
+	 * operations we need to set the dma direction to BIDIRECTIONAL
+	 * and copy the src workarea to the dst workarea.
+	 */
+	if (sg_virt(aes->src) == sg_virt(aes->dst))
+		in_place = true;
+
+	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
+			    AES_BLOCK_SIZE,
+			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
+	if (ret)
+		goto e_ctx;
+
+	if (in_place) {
+		dst = src;
+	} else {
+		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
+				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
+		if (ret)
+			goto e_src;
+	}
+
+	/* Send data to the CCP AES engine */
+	while (src.sg_wa.bytes_left) {
+		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
+		if (!src.sg_wa.bytes_left) {
+			op.eom = 1;
+
+			/* Since we don't retrieve the AES context in ECB
+			 * mode we have to wait for the operation to complete
+			 * on the last piece of data
+			 */
+			if (aes->mode == CCP_AES_MODE_ECB)
+				op.soc = 1;
+		}
+
+		ret = ccp_perform_aes(&op);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_dst;
+		}
+
+		ccp_process_data(&src, &dst, &op);
+	}
+
+	if (aes->mode != CCP_AES_MODE_ECB) {
+		/* Retrieve the AES context - convert from LE to BE using
+		 * 32-byte (256-bit) byteswapping
+		 */
+		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
+					CCP_PASSTHRU_BYTESWAP_256BIT);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_dst;
+		}
+
+		/* ...but we only need AES_BLOCK_SIZE bytes */
+		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	}
+
+e_dst:
+	if (!in_place)
+		ccp_free_data(&dst, cmd_q);
+
+e_src:
+	ccp_free_data(&src, cmd_q);
+
+e_ctx:
+	ccp_dm_free(&ctx);
+
+e_key:
+	ccp_dm_free(&key);
+
+	return ret;
+}
+
+static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
+			       struct ccp_cmd *cmd)
+{
+	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
+	struct ccp_dm_workarea key, ctx;
+	struct ccp_data src, dst;
+	struct ccp_op op;
+	unsigned int unit_size, dm_offset;
+	bool in_place = false;
+	int ret;
+
+	switch (xts->unit_size) {
+	case CCP_XTS_AES_UNIT_SIZE_16:
+		unit_size = 16;
+		break;
+	case CCP_XTS_AES_UNIT_SIZE_512:
+		unit_size = 512;
+		break;
+	case CCP_XTS_AES_UNIT_SIZE_1024:
+		unit_size = 1024;
+		break;
+	case CCP_XTS_AES_UNIT_SIZE_2048:
+		unit_size = 2048;
+		break;
+	case CCP_XTS_AES_UNIT_SIZE_4096:
+		unit_size = 4096;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (xts->key_len != AES_KEYSIZE_128)
+		return -EINVAL;
+
+	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
+		return -EINVAL;
+
+	if (xts->iv_len != AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
+		return -EINVAL;
+
+	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
+	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
+
+	ret = -EIO;
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.ksb_key = cmd_q->ksb_key;
+	op.ksb_ctx = cmd_q->ksb_ctx;
+	op.init = 1;
+	op.u.xts.action = xts->action;
+	op.u.xts.unit_size = xts->unit_size;
+
+	/* All supported key sizes fit in a single (32-byte) KSB entry
+	 * and must be in little endian format. Use the 256-bit byte
+	 * swap passthru option to convert from big endian to little
+	 * endian.
+	 */
+	ret = ccp_init_dm_workarea(&key, cmd_q,
+				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
+	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
+	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
+	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
+			      CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_key;
+	}
+
+	/* The AES context fits in a single (32-byte) KSB entry and
+	 * for XTS is already in little endian format so no byte swapping
+	 * is needed.
+	 */
+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
+				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+				   DMA_BIDIRECTIONAL);
+	if (ret)
+		goto e_key;
+
+	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
+	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
+			      CCP_PASSTHRU_BYTESWAP_NOOP);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_ctx;
+	}
+
+	/* Prepare the input and output data workareas. For in-place
+	 * operations we need to set the dma direction to BIDIRECTIONAL
+	 * and copy the src workarea to the dst workarea.
+	 */
+	if (sg_virt(xts->src) == sg_virt(xts->dst))
+		in_place = true;
+
+	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
+			    unit_size,
+			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
+	if (ret)
+		goto e_ctx;
+
+	if (in_place) {
+		dst = src;
+	} else {
+		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
+				    unit_size, DMA_FROM_DEVICE);
+		if (ret)
+			goto e_src;
+	}
+
+	/* Send data to the CCP AES engine */
+	while (src.sg_wa.bytes_left) {
+		ccp_prepare_data(&src, &dst, &op, unit_size, true);
+		if (!src.sg_wa.bytes_left)
+			op.eom = 1;
+
+		ret = ccp_perform_xts_aes(&op);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_dst;
+		}
+
+		ccp_process_data(&src, &dst, &op);
+	}
+
+	/* Retrieve the AES context - convert from LE to BE using
+	 * 32-byte (256-bit) byteswapping
+	 */
+	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
+				CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_dst;
+	}
+
+	/* ...but we only need AES_BLOCK_SIZE bytes */
+	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
+
+e_dst:
+	if (!in_place)
+		ccp_free_data(&dst, cmd_q);
+
+e_src:
+	ccp_free_data(&src, cmd_q);
+
+e_ctx:
+	ccp_dm_free(&ctx);
+
+e_key:
+	ccp_dm_free(&key);
+
+	return ret;
+}
+
+static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_sha_engine *sha = &cmd->u.sha;
+	struct ccp_dm_workarea ctx;
+	struct ccp_data src;
+	struct ccp_op op;
+	int ret;
+
+	if (sha->ctx_len != CCP_SHA_CTXSIZE)
+		return -EINVAL;
+
+	if (!sha->ctx)
+		return -EINVAL;
+
+	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
+		return -EINVAL;
+
+	if (!sha->src_len) {
+		const u8 *sha_zero;
+
+		/* Not final, just return */
+		if (!sha->final)
+			return 0;
+
+		/* CCP can't do a zero length sha operation so the caller
+		 * must buffer the data.
+		 */
+		if (sha->msg_bits)
+			return -EINVAL;
+
+		/* A sha operation for a message with a total length of zero,
+		 * return known result.
+		 */
+		switch (sha->type) {
+		case CCP_SHA_TYPE_1:
+			sha_zero = ccp_sha1_zero;
+			break;
+		case CCP_SHA_TYPE_224:
+			sha_zero = ccp_sha224_zero;
+			break;
+		case CCP_SHA_TYPE_256:
+			sha_zero = ccp_sha256_zero;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
+					 sha->ctx_len, 1);
+
+		return 0;
+	}
+
+	if (!sha->src)
+		return -EINVAL;
+
+	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.ksb_ctx = cmd_q->ksb_ctx;
+	op.u.sha.type = sha->type;
+	op.u.sha.msg_bits = sha->msg_bits;
+
+	/* The SHA context fits in a single (32-byte) KSB entry and
+	 * must be in little endian format. Use the 256-bit byte swap
+	 * passthru option to convert from big endian to little endian.
+	 */
+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
+				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
+				   DMA_BIDIRECTIONAL);
+	if (ret)
+		return ret;
+
+	if (sha->first) {
+		const __be32 *init;
+
+		switch (sha->type) {
+		case CCP_SHA_TYPE_1:
+			init = ccp_sha1_init;
+			break;
+		case CCP_SHA_TYPE_224:
+			init = ccp_sha224_init;
+			break;
+		case CCP_SHA_TYPE_256:
+			init = ccp_sha256_init;
+			break;
+		default:
+			ret = -EINVAL;
+			goto e_ctx;
+		}
+		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
+	} else {
+		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
+	}
+
+	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
+			      CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_ctx;
+	}
+
+	/* Send data to the CCP SHA engine */
+	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
+			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
+	if (ret)
+		goto e_ctx;
+
+	while (src.sg_wa.bytes_left) {
+		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
+		if (sha->final && !src.sg_wa.bytes_left)
+			op.eom = 1;
+
+		ret = ccp_perform_sha(&op);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_data;
+		}
+
+		ccp_process_data(&src, NULL, &op);
+	}
+
+	/* Retrieve the SHA context - convert from LE to BE using
+	 * 32-byte (256-bit) byteswapping to BE
+	 */
+	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
+				CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_data;
+	}
+
+	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
+
+	if (sha->final && sha->opad) {
+		/* HMAC operation, recursively perform final SHA */
+		struct ccp_cmd hmac_cmd;
+		struct scatterlist sg;
+		u64 block_size, digest_size;
+		u8 *hmac_buf;
+
+		switch (sha->type) {
+		case CCP_SHA_TYPE_1:
+			block_size = SHA1_BLOCK_SIZE;
+			digest_size = SHA1_DIGEST_SIZE;
+			break;
+		case CCP_SHA_TYPE_224:
+			block_size = SHA224_BLOCK_SIZE;
+			digest_size = SHA224_DIGEST_SIZE;
+			break;
+		case CCP_SHA_TYPE_256:
+			block_size = SHA256_BLOCK_SIZE;
+			digest_size = SHA256_DIGEST_SIZE;
+			break;
+		default:
+			ret = -EINVAL;
+			goto e_data;
+		}
+
+		if (sha->opad_len != block_size) {
+			ret = -EINVAL;
+			goto e_data;
+		}
+
+		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
+		if (!hmac_buf) {
+			ret = -ENOMEM;
+			goto e_data;
+		}
+		sg_init_one(&sg, hmac_buf, block_size + digest_size);
+
+		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
+		memcpy(hmac_buf + block_size, ctx.address, digest_size);
+
+		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
+		hmac_cmd.engine = CCP_ENGINE_SHA;
+		hmac_cmd.u.sha.type = sha->type;
+		hmac_cmd.u.sha.ctx = sha->ctx;
+		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
+		hmac_cmd.u.sha.src = &sg;
+		hmac_cmd.u.sha.src_len = block_size + digest_size;
+		hmac_cmd.u.sha.opad = NULL;
+		hmac_cmd.u.sha.opad_len = 0;
+		hmac_cmd.u.sha.first = 1;
+		hmac_cmd.u.sha.final = 1;
+		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
+
+		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
+		if (ret)
+			cmd->engine_error = hmac_cmd.engine_error;
+
+		kfree(hmac_buf);
+	}
+
+e_data:
+	ccp_free_data(&src, cmd_q);
+
+e_ctx:
+	ccp_dm_free(&ctx);
+
+	return ret;
+}
+
+static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
+	struct ccp_dm_workarea exp, src;
+	struct ccp_data dst;
+	struct ccp_op op;
+	unsigned int ksb_count, i_len, o_len;
+	int ret;
+
+	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
+		return -EINVAL;
+
+	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
+		return -EINVAL;
+
+	/* The RSA modulus must precede the message being acted upon, so
+	 * it must be copied to a DMA area where the message and the
+	 * modulus can be concatenated.  Therefore the input buffer
+	 * length required is twice the output buffer length (which
+	 * must be a multiple of 256-bits).
+	 */
+	o_len = ((rsa->key_size + 255) / 256) * 32;
+	i_len = o_len * 2;
+
+	ksb_count = o_len / CCP_KSB_BYTES;
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
+	if (!op.ksb_key)
+		return -EIO;
+
+	/* The RSA exponent may span multiple (32-byte) KSB entries and must
+	 * be in little endian format. Reverse copy each 32-byte chunk
+	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
+	 * and each byte within that chunk and do not perform any byte swap
+	 * operations on the passthru operation.
+	 */
+	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
+	if (ret)
+		goto e_ksb;
+
+	ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
+				false);
+	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
+			      CCP_PASSTHRU_BYTESWAP_NOOP);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_exp;
+	}
+
+	/* Concatenate the modulus and the message. Both the modulus and
+	 * the operands must be in little endian format.  Since the input
+	 * is in big endian format it must be converted.
+	 */
+	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
+	if (ret)
+		goto e_exp;
+
+	ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
+				false);
+	src.address += o_len;	/* Adjust the address for the copy operation */
+	ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
+				false);
+	src.address -= o_len;	/* Reset the address to original value */
+
+	/* Prepare the output area for the operation */
+	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
+			    o_len, DMA_FROM_DEVICE);
+	if (ret)
+		goto e_src;
+
+	op.soc = 1;
+	op.src.u.dma.address = src.dma.address;
+	op.src.u.dma.offset = 0;
+	op.src.u.dma.length = i_len;
+	op.dst.u.dma.address = dst.dm_wa.dma.address;
+	op.dst.u.dma.offset = 0;
+	op.dst.u.dma.length = o_len;
+
+	op.u.rsa.mod_size = rsa->key_size;
+	op.u.rsa.input_len = i_len;
+
+	ret = ccp_perform_rsa(&op);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_dst;
+	}
+
+	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
+
+e_dst:
+	ccp_free_data(&dst, cmd_q);
+
+e_src:
+	ccp_dm_free(&src);
+
+e_exp:
+	ccp_dm_free(&exp);
+
+e_ksb:
+	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
+
+	return ret;
+}
+
+static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
+				struct ccp_cmd *cmd)
+{
+	struct ccp_passthru_engine *pt = &cmd->u.passthru;
+	struct ccp_dm_workarea mask;
+	struct ccp_data src, dst;
+	struct ccp_op op;
+	bool in_place = false;
+	unsigned int i;
+	int ret;
+
+	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
+		return -EINVAL;
+
+	if (!pt->src || !pt->dst)
+		return -EINVAL;
+
+	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
+		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
+			return -EINVAL;
+		if (!pt->mask)
+			return -EINVAL;
+	}
+
+	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+
+	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
+		/* Load the mask */
+		op.ksb_key = cmd_q->ksb_key;
+
+		ret = ccp_init_dm_workarea(&mask, cmd_q,
+					   CCP_PASSTHRU_KSB_COUNT *
+					   CCP_KSB_BYTES,
+					   DMA_TO_DEVICE);
+		if (ret)
+			return ret;
+
+		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
+		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
+				      CCP_PASSTHRU_BYTESWAP_NOOP);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_mask;
+		}
+	}
+
+	/* Prepare the input and output data workareas. For in-place
+	 * operations we need to set the dma direction to BIDIRECTIONAL
+	 * and copy the src workarea to the dst workarea.
+	 */
+	if (sg_virt(pt->src) == sg_virt(pt->dst))
+		in_place = true;
+
+	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
+			    CCP_PASSTHRU_MASKSIZE,
+			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
+	if (ret)
+		goto e_mask;
+
+	if (in_place) {
+		dst = src;
+	} else {
+		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
+				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
+		if (ret)
+			goto e_src;
+	}
+
+	/* Send data to the CCP Passthru engine
+	 *   Because the CCP engine works on a single source and destination
+	 *   dma address at a time, each entry in the source scatterlist
+	 *   (after the dma_map_sg call) must be less than or equal to the
+	 *   (remaining) length in the destination scatterlist entry and the
+	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
+	 */
+	dst.sg_wa.sg_used = 0;
+	for (i = 1; i <= src.sg_wa.dma_count; i++) {
+		if (!dst.sg_wa.sg ||
+		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
+			ret = -EINVAL;
+			goto e_dst;
+		}
+
+		if (i == src.sg_wa.dma_count) {
+			op.eom = 1;
+			op.soc = 1;
+		}
+
+		op.src.type = CCP_MEMTYPE_SYSTEM;
+		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
+		op.src.u.dma.offset = 0;
+		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
+
+		op.dst.type = CCP_MEMTYPE_SYSTEM;
+		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
+		op.dst.u.dma.offset = dst.sg_wa.sg_used;
+		op.dst.u.dma.length = op.src.u.dma.length;
+
+		ret = ccp_perform_passthru(&op);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_dst;
+		}
+
+		dst.sg_wa.sg_used += src.sg_wa.sg->length;
+		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
+			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
+			dst.sg_wa.sg_used = 0;
+		}
+		src.sg_wa.sg = sg_next(src.sg_wa.sg);
+	}
+
+e_dst:
+	if (!in_place)
+		ccp_free_data(&dst, cmd_q);
+
+e_src:
+	ccp_free_data(&src, cmd_q);
+
+e_mask:
+	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
+		ccp_dm_free(&mask);
+
+	return ret;
+}
+
+static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
+	struct ccp_dm_workarea src, dst;
+	struct ccp_op op;
+	int ret;
+	u8 *save;
+
+	if (!ecc->u.mm.operand_1 ||
+	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
+		return -EINVAL;
+
+	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
+		if (!ecc->u.mm.operand_2 ||
+		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
+			return -EINVAL;
+
+	if (!ecc->u.mm.result ||
+	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
+		return -EINVAL;
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+
+	/* Concatenate the modulus and the operands. Both the modulus and
+	 * the operands must be in little endian format.  Since the input
+	 * is in big endian format it must be converted and placed in a
+	 * fixed length buffer.
+	 */
+	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	/* Save the workarea address since it is updated in order to perform
+	 * the concatenation
+	 */
+	save = src.address;
+
+	/* Copy the ECC modulus */
+	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
+				CCP_ECC_OPERAND_SIZE, false);
+	src.address += CCP_ECC_OPERAND_SIZE;
+
+	/* Copy the first operand */
+	ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
+				ecc->u.mm.operand_1_len,
+				CCP_ECC_OPERAND_SIZE, false);
+	src.address += CCP_ECC_OPERAND_SIZE;
+
+	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
+		/* Copy the second operand */
+		ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
+					ecc->u.mm.operand_2_len,
+					CCP_ECC_OPERAND_SIZE, false);
+		src.address += CCP_ECC_OPERAND_SIZE;
+	}
+
+	/* Restore the workarea address */
+	src.address = save;
+
+	/* Prepare the output area for the operation */
+	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
+				   DMA_FROM_DEVICE);
+	if (ret)
+		goto e_src;
+
+	op.soc = 1;
+	op.src.u.dma.address = src.dma.address;
+	op.src.u.dma.offset = 0;
+	op.src.u.dma.length = src.length;
+	op.dst.u.dma.address = dst.dma.address;
+	op.dst.u.dma.offset = 0;
+	op.dst.u.dma.length = dst.length;
+
+	op.u.ecc.function = cmd->u.ecc.function;
+
+	ret = ccp_perform_ecc(&op);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_dst;
+	}
+
+	ecc->ecc_result = le16_to_cpup(
+		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
+	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
+		ret = -EIO;
+		goto e_dst;
+	}
+
+	/* Save the ECC result */
+	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
+
+e_dst:
+	ccp_dm_free(&dst);
+
+e_src:
+	ccp_dm_free(&src);
+
+	return ret;
+}
+
+static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
+	struct ccp_dm_workarea src, dst;
+	struct ccp_op op;
+	int ret;
+	u8 *save;
+
+	if (!ecc->u.pm.point_1.x ||
+	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
+	    !ecc->u.pm.point_1.y ||
+	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
+		return -EINVAL;
+
+	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
+		if (!ecc->u.pm.point_2.x ||
+		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
+		    !ecc->u.pm.point_2.y ||
+		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
+			return -EINVAL;
+	} else {
+		if (!ecc->u.pm.domain_a ||
+		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
+			return -EINVAL;
+
+		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
+			if (!ecc->u.pm.scalar ||
+			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
+				return -EINVAL;
+	}
+
+	if (!ecc->u.pm.result.x ||
+	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
+	    !ecc->u.pm.result.y ||
+	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
+		return -EINVAL;
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+
+	/* Concatenate the modulus and the operands. Both the modulus and
+	 * the operands must be in little endian format.  Since the input
+	 * is in big endian format it must be converted and placed in a
+	 * fixed length buffer.
+	 */
+	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	/* Save the workarea address since it is updated in order to perform
+	 * the concatenation
+	 */
+	save = src.address;
+
+	/* Copy the ECC modulus */
+	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
+				CCP_ECC_OPERAND_SIZE, false);
+	src.address += CCP_ECC_OPERAND_SIZE;
+
+	/* Copy the first point X and Y coordinate */
+	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
+				ecc->u.pm.point_1.x_len,
+				CCP_ECC_OPERAND_SIZE, false);
+	src.address += CCP_ECC_OPERAND_SIZE;
+	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
+				ecc->u.pm.point_1.y_len,
+				CCP_ECC_OPERAND_SIZE, false);
+	src.address += CCP_ECC_OPERAND_SIZE;
+
+	/* Set the first point Z coordianate to 1 */
+	*src.address = 0x01;
+	src.address += CCP_ECC_OPERAND_SIZE;
+
+	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
+		/* Copy the second point X and Y coordinate */
+		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
+					ecc->u.pm.point_2.x_len,
+					CCP_ECC_OPERAND_SIZE, false);
+		src.address += CCP_ECC_OPERAND_SIZE;
+		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
+					ecc->u.pm.point_2.y_len,
+					CCP_ECC_OPERAND_SIZE, false);
+		src.address += CCP_ECC_OPERAND_SIZE;
+
+		/* Set the second point Z coordianate to 1 */
+		*src.address = 0x01;
+		src.address += CCP_ECC_OPERAND_SIZE;
+	} else {
+		/* Copy the Domain "a" parameter */
+		ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
+					ecc->u.pm.domain_a_len,
+					CCP_ECC_OPERAND_SIZE, false);
+		src.address += CCP_ECC_OPERAND_SIZE;
+
+		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
+			/* Copy the scalar value */
+			ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
+						ecc->u.pm.scalar_len,
+						CCP_ECC_OPERAND_SIZE, false);
+			src.address += CCP_ECC_OPERAND_SIZE;
+		}
+	}
+
+	/* Restore the workarea address */
+	src.address = save;
+
+	/* Prepare the output area for the operation */
+	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
+				   DMA_FROM_DEVICE);
+	if (ret)
+		goto e_src;
+
+	op.soc = 1;
+	op.src.u.dma.address = src.dma.address;
+	op.src.u.dma.offset = 0;
+	op.src.u.dma.length = src.length;
+	op.dst.u.dma.address = dst.dma.address;
+	op.dst.u.dma.offset = 0;
+	op.dst.u.dma.length = dst.length;
+
+	op.u.ecc.function = cmd->u.ecc.function;
+
+	ret = ccp_perform_ecc(&op);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_dst;
+	}
+
+	ecc->ecc_result = le16_to_cpup(
+		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
+	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
+		ret = -EIO;
+		goto e_dst;
+	}
+
+	/* Save the workarea address since it is updated as we walk through
+	 * to copy the point math result
+	 */
+	save = dst.address;
+
+	/* Save the ECC result X and Y coordinates */
+	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
+				CCP_ECC_MODULUS_BYTES);
+	dst.address += CCP_ECC_OUTPUT_SIZE;
+	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
+				CCP_ECC_MODULUS_BYTES);
+	dst.address += CCP_ECC_OUTPUT_SIZE;
+
+	/* Restore the workarea address */
+	dst.address = save;
+
+e_dst:
+	ccp_dm_free(&dst);
+
+e_src:
+	ccp_dm_free(&src);
+
+	return ret;
+}
+
+static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
+
+	ecc->ecc_result = 0;
+
+	if (!ecc->mod ||
+	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
+		return -EINVAL;
+
+	switch (ecc->function) {
+	case CCP_ECC_FUNCTION_MMUL_384BIT:
+	case CCP_ECC_FUNCTION_MADD_384BIT:
+	case CCP_ECC_FUNCTION_MINV_384BIT:
+		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
+
+	case CCP_ECC_FUNCTION_PADD_384BIT:
+	case CCP_ECC_FUNCTION_PMUL_384BIT:
+	case CCP_ECC_FUNCTION_PDBL_384BIT:
+		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	int ret;
+
+	cmd->engine_error = 0;
+	cmd_q->cmd_error = 0;
+	cmd_q->int_rcvd = 0;
+	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+
+	switch (cmd->engine) {
+	case CCP_ENGINE_AES:
+		ret = ccp_run_aes_cmd(cmd_q, cmd);
+		break;
+	case CCP_ENGINE_XTS_AES_128:
+		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
+		break;
+	case CCP_ENGINE_SHA:
+		ret = ccp_run_sha_cmd(cmd_q, cmd);
+		break;
+	case CCP_ENGINE_RSA:
+		ret = ccp_run_rsa_cmd(cmd_q, cmd);
+		break;
+	case CCP_ENGINE_PASSTHRU:
+		ret = ccp_run_passthru_cmd(cmd_q, cmd);
+		break;
+	case CCP_ENGINE_ECC:
+		ret = ccp_run_ecc_cmd(cmd_q, cmd);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c
new file mode 100644
index 000000000..af190d479
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -0,0 +1,340 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/dma-mapping.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+#define IO_BAR				2
+#define IO_OFFSET			0x20000
+
+#define MSIX_VECTORS			2
+
+struct ccp_msix {
+	u32 vector;
+	char name[16];
+};
+
+struct ccp_pci {
+	int msix_count;
+	struct ccp_msix msix[MSIX_VECTORS];
+};
+
+static int ccp_get_msix_irqs(struct ccp_device *ccp)
+{
+	struct ccp_pci *ccp_pci = ccp->dev_specific;
+	struct device *dev = ccp->dev;
+	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+	struct msix_entry msix_entry[MSIX_VECTORS];
+	unsigned int name_len = sizeof(ccp_pci->msix[0].name) - 1;
+	int v, ret;
+
+	for (v = 0; v < ARRAY_SIZE(msix_entry); v++)
+		msix_entry[v].entry = v;
+
+	ret = pci_enable_msix_range(pdev, msix_entry, 1, v);
+	if (ret < 0)
+		return ret;
+
+	ccp_pci->msix_count = ret;
+	for (v = 0; v < ccp_pci->msix_count; v++) {
+		/* Set the interrupt names and request the irqs */
+		snprintf(ccp_pci->msix[v].name, name_len, "ccp-%u", v);
+		ccp_pci->msix[v].vector = msix_entry[v].vector;
+		ret = request_irq(ccp_pci->msix[v].vector, ccp_irq_handler,
+				  0, ccp_pci->msix[v].name, dev);
+		if (ret) {
+			dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n",
+				   ret);
+			goto e_irq;
+		}
+	}
+
+	return 0;
+
+e_irq:
+	while (v--)
+		free_irq(ccp_pci->msix[v].vector, dev);
+
+	pci_disable_msix(pdev);
+
+	ccp_pci->msix_count = 0;
+
+	return ret;
+}
+
+static int ccp_get_msi_irq(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+	int ret;
+
+	ret = pci_enable_msi(pdev);
+	if (ret)
+		return ret;
+
+	ccp->irq = pdev->irq;
+	ret = request_irq(ccp->irq, ccp_irq_handler, 0, "ccp", dev);
+	if (ret) {
+		dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret);
+		goto e_msi;
+	}
+
+	return 0;
+
+e_msi:
+	pci_disable_msi(pdev);
+
+	return ret;
+}
+
+static int ccp_get_irqs(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	int ret;
+
+	ret = ccp_get_msix_irqs(ccp);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI-X vectors, try MSI */
+	dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
+	ret = ccp_get_msi_irq(ccp);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI interrupt */
+	dev_notice(dev, "could not enable MSI (%d)\n", ret);
+
+	return ret;
+}
+
+static void ccp_free_irqs(struct ccp_device *ccp)
+{
+	struct ccp_pci *ccp_pci = ccp->dev_specific;
+	struct device *dev = ccp->dev;
+	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+
+	if (ccp_pci->msix_count) {
+		while (ccp_pci->msix_count--)
+			free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
+				 dev);
+		pci_disable_msix(pdev);
+	} else {
+		free_irq(ccp->irq, dev);
+		pci_disable_msi(pdev);
+	}
+}
+
+static int ccp_find_mmio_area(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+	resource_size_t io_len;
+	unsigned long io_flags;
+
+	io_flags = pci_resource_flags(pdev, IO_BAR);
+	io_len = pci_resource_len(pdev, IO_BAR);
+	if ((io_flags & IORESOURCE_MEM) && (io_len >= (IO_OFFSET + 0x800)))
+		return IO_BAR;
+
+	return -EIO;
+}
+
+static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct ccp_device *ccp;
+	struct ccp_pci *ccp_pci;
+	struct device *dev = &pdev->dev;
+	unsigned int bar;
+	int ret;
+
+	ret = -ENOMEM;
+	ccp = ccp_alloc_struct(dev);
+	if (!ccp)
+		goto e_err;
+
+	ccp_pci = devm_kzalloc(dev, sizeof(*ccp_pci), GFP_KERNEL);
+	if (!ccp_pci)
+		goto e_err;
+
+	ccp->dev_specific = ccp_pci;
+	ccp->get_irq = ccp_get_irqs;
+	ccp->free_irq = ccp_free_irqs;
+
+	ret = pci_request_regions(pdev, "ccp");
+	if (ret) {
+		dev_err(dev, "pci_request_regions failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		dev_err(dev, "pci_enable_device failed (%d)\n", ret);
+		goto e_regions;
+	}
+
+	pci_set_master(pdev);
+
+	ret = ccp_find_mmio_area(ccp);
+	if (ret < 0)
+		goto e_device;
+	bar = ret;
+
+	ret = -EIO;
+	ccp->io_map = pci_iomap(pdev, bar, 0);
+	if (!ccp->io_map) {
+		dev_err(dev, "pci_iomap failed\n");
+		goto e_device;
+	}
+	ccp->io_regs = ccp->io_map + IO_OFFSET;
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (ret) {
+		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+		if (ret) {
+			dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
+				ret);
+			goto e_iomap;
+		}
+	}
+
+	dev_set_drvdata(dev, ccp);
+
+	ret = ccp_init(ccp);
+	if (ret)
+		goto e_iomap;
+
+	dev_notice(dev, "enabled\n");
+
+	return 0;
+
+e_iomap:
+	pci_iounmap(pdev, ccp->io_map);
+
+e_device:
+	pci_disable_device(pdev);
+
+e_regions:
+	pci_release_regions(pdev);
+
+e_err:
+	dev_notice(dev, "initialization failed\n");
+	return ret;
+}
+
+static void ccp_pci_remove(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ccp_device *ccp = dev_get_drvdata(dev);
+
+	if (!ccp)
+		return;
+
+	ccp_destroy(ccp);
+
+	pci_iounmap(pdev, ccp->io_map);
+
+	pci_disable_device(pdev);
+
+	pci_release_regions(pdev);
+
+	dev_notice(dev, "disabled\n");
+}
+
+#ifdef CONFIG_PM
+static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct device *dev = &pdev->dev;
+	struct ccp_device *ccp = dev_get_drvdata(dev);
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->suspending = 1;
+
+	/* Wake all the queue kthreads to prepare for suspend */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		wake_up_process(ccp->cmd_q[i].kthread);
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	/* Wait for all queue kthreads to say they're done */
+	while (!ccp_queues_suspended(ccp))
+		wait_event_interruptible(ccp->suspend_queue,
+					 ccp_queues_suspended(ccp));
+
+	return 0;
+}
+
+static int ccp_pci_resume(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ccp_device *ccp = dev_get_drvdata(dev);
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->suspending = 0;
+
+	/* Wake up all the kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		ccp->cmd_q[i].suspended = 0;
+		wake_up_process(ccp->cmd_q[i].kthread);
+	}
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	return 0;
+}
+#endif
+
+static const struct pci_device_id ccp_pci_table[] = {
+	{ PCI_VDEVICE(AMD, 0x1537), },
+	/* Last entry must be zero */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ccp_pci_table);
+
+static struct pci_driver ccp_pci_driver = {
+	.name = "AMD Cryptographic Coprocessor",
+	.id_table = ccp_pci_table,
+	.probe = ccp_pci_probe,
+	.remove = ccp_pci_remove,
+#ifdef CONFIG_PM
+	.suspend = ccp_pci_suspend,
+	.resume = ccp_pci_resume,
+#endif
+};
+
+int ccp_pci_init(void)
+{
+	return pci_register_driver(&ccp_pci_driver);
+}
+
+void ccp_pci_exit(void)
+{
+	pci_unregister_driver(&ccp_pci_driver);
+}
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c
new file mode 100644
index 000000000..b1c20b2b5
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-platform.c
@@ -0,0 +1,314 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2014 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+#include <linux/dma-mapping.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/ccp.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/acpi.h>
+
+#include "ccp-dev.h"
+
+struct ccp_platform {
+	int use_acpi;
+	int coherent;
+};
+
+static int ccp_get_irq(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	struct platform_device *pdev = container_of(dev,
+					struct platform_device, dev);
+	int ret;
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		return ret;
+
+	ccp->irq = ret;
+	ret = request_irq(ccp->irq, ccp_irq_handler, 0, "ccp", dev);
+	if (ret) {
+		dev_notice(dev, "unable to allocate IRQ (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ccp_get_irqs(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	int ret;
+
+	ret = ccp_get_irq(ccp);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get an interrupt */
+	dev_notice(dev, "could not enable interrupts (%d)\n", ret);
+
+	return ret;
+}
+
+static void ccp_free_irqs(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+
+	free_irq(ccp->irq, dev);
+}
+
+static struct resource *ccp_find_mmio_area(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	struct platform_device *pdev = container_of(dev,
+					struct platform_device, dev);
+	struct resource *ior;
+
+	ior = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (ior && (resource_size(ior) >= 0x800))
+		return ior;
+
+	return NULL;
+}
+
+#ifdef CONFIG_ACPI
+static int ccp_acpi_support(struct ccp_device *ccp)
+{
+	struct ccp_platform *ccp_platform = ccp->dev_specific;
+	struct acpi_device *adev = ACPI_COMPANION(ccp->dev);
+	acpi_handle handle;
+	acpi_status status;
+	unsigned long long data;
+	int cca;
+
+	/* Retrieve the device cache coherency value */
+	handle = adev->handle;
+	do {
+		status = acpi_evaluate_integer(handle, "_CCA", NULL, &data);
+		if (!ACPI_FAILURE(status)) {
+			cca = data;
+			break;
+		}
+	} while (!ACPI_FAILURE(status));
+
+	if (ACPI_FAILURE(status)) {
+		dev_err(ccp->dev, "error obtaining acpi coherency value\n");
+		return -EINVAL;
+	}
+
+	ccp_platform->coherent = !!cca;
+
+	return 0;
+}
+#else	/* CONFIG_ACPI */
+static int ccp_acpi_support(struct ccp_device *ccp)
+{
+	return -EINVAL;
+}
+#endif
+
+#ifdef CONFIG_OF
+static int ccp_of_support(struct ccp_device *ccp)
+{
+	struct ccp_platform *ccp_platform = ccp->dev_specific;
+
+	ccp_platform->coherent = of_dma_is_coherent(ccp->dev->of_node);
+
+	return 0;
+}
+#else
+static int ccp_of_support(struct ccp_device *ccp)
+{
+	return -EINVAL;
+}
+#endif
+
+static int ccp_platform_probe(struct platform_device *pdev)
+{
+	struct ccp_device *ccp;
+	struct ccp_platform *ccp_platform;
+	struct device *dev = &pdev->dev;
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+	struct resource *ior;
+	int ret;
+
+	ret = -ENOMEM;
+	ccp = ccp_alloc_struct(dev);
+	if (!ccp)
+		goto e_err;
+
+	ccp_platform = devm_kzalloc(dev, sizeof(*ccp_platform), GFP_KERNEL);
+	if (!ccp_platform)
+		goto e_err;
+
+	ccp->dev_specific = ccp_platform;
+	ccp->get_irq = ccp_get_irqs;
+	ccp->free_irq = ccp_free_irqs;
+
+	ccp_platform->use_acpi = (!adev || acpi_disabled) ? 0 : 1;
+
+	ior = ccp_find_mmio_area(ccp);
+	ccp->io_map = devm_ioremap_resource(dev, ior);
+	if (IS_ERR(ccp->io_map)) {
+		ret = PTR_ERR(ccp->io_map);
+		goto e_err;
+	}
+	ccp->io_regs = ccp->io_map;
+
+	if (!dev->dma_mask)
+		dev->dma_mask = &dev->coherent_dma_mask;
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (ret) {
+		dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	if (ccp_platform->use_acpi)
+		ret = ccp_acpi_support(ccp);
+	else
+		ret = ccp_of_support(ccp);
+	if (ret)
+		goto e_err;
+
+	if (ccp_platform->coherent)
+		ccp->axcache = CACHE_WB_NO_ALLOC;
+	else
+		ccp->axcache = CACHE_NONE;
+
+	dev_set_drvdata(dev, ccp);
+
+	ret = ccp_init(ccp);
+	if (ret)
+		goto e_err;
+
+	dev_notice(dev, "enabled\n");
+
+	return 0;
+
+e_err:
+	dev_notice(dev, "initialization failed\n");
+	return ret;
+}
+
+static int ccp_platform_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ccp_device *ccp = dev_get_drvdata(dev);
+
+	ccp_destroy(ccp);
+
+	dev_notice(dev, "disabled\n");
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int ccp_platform_suspend(struct platform_device *pdev,
+				pm_message_t state)
+{
+	struct device *dev = &pdev->dev;
+	struct ccp_device *ccp = dev_get_drvdata(dev);
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->suspending = 1;
+
+	/* Wake all the queue kthreads to prepare for suspend */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		wake_up_process(ccp->cmd_q[i].kthread);
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	/* Wait for all queue kthreads to say they're done */
+	while (!ccp_queues_suspended(ccp))
+		wait_event_interruptible(ccp->suspend_queue,
+					 ccp_queues_suspended(ccp));
+
+	return 0;
+}
+
+static int ccp_platform_resume(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ccp_device *ccp = dev_get_drvdata(dev);
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->suspending = 0;
+
+	/* Wake up all the kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		ccp->cmd_q[i].suspended = 0;
+		wake_up_process(ccp->cmd_q[i].kthread);
+	}
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id ccp_acpi_match[] = {
+	{ "AMDI0C00", 0 },
+	{ },
+};
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id ccp_of_match[] = {
+	{ .compatible = "amd,ccp-seattle-v1a" },
+	{ },
+};
+#endif
+
+static struct platform_driver ccp_platform_driver = {
+	.driver = {
+		.name = "AMD Cryptographic Coprocessor",
+#ifdef CONFIG_ACPI
+		.acpi_match_table = ccp_acpi_match,
+#endif
+#ifdef CONFIG_OF
+		.of_match_table = ccp_of_match,
+#endif
+	},
+	.probe = ccp_platform_probe,
+	.remove = ccp_platform_remove,
+#ifdef CONFIG_PM
+	.suspend = ccp_platform_suspend,
+	.resume = ccp_platform_resume,
+#endif
+};
+
+int ccp_platform_init(void)
+{
+	return platform_driver_register(&ccp_platform_driver);
+}
+
+void ccp_platform_exit(void)
+{
+	platform_driver_unregister(&ccp_platform_driver);
+}