diff options
Diffstat (limited to 'drivers/crypto/ccp')
-rw-r--r-- | drivers/crypto/ccp/Kconfig | 24 | ||||
-rw-r--r-- | drivers/crypto/ccp/Makefile | 10 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 367 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-crypto-aes-xts.c | 277 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-crypto-aes.c | 368 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-crypto-main.c | 391 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-crypto-sha.c | 438 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-crypto.h | 194 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-dev.c | 654 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-dev.h | 276 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-ops.c | 2126 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-pci.c | 340 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-platform.c | 314 |
13 files changed, 5779 insertions, 0 deletions
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig new file mode 100644 index 000000000..7639ffc36 --- /dev/null +++ b/drivers/crypto/ccp/Kconfig @@ -0,0 +1,24 @@ +config CRYPTO_DEV_CCP_DD + tristate "Cryptographic Coprocessor device driver" + depends on CRYPTO_DEV_CCP + default m + select HW_RANDOM + help + Provides the interface to use the AMD Cryptographic Coprocessor + which can be used to accelerate or offload encryption operations + such as SHA, AES and more. If you choose 'M' here, this module + will be called ccp. + +config CRYPTO_DEV_CCP_CRYPTO + tristate "Encryption and hashing acceleration support" + depends on CRYPTO_DEV_CCP_DD + default m + select CRYPTO_ALGAPI + select CRYPTO_HASH + select CRYPTO_BLKCIPHER + select CRYPTO_AUTHENC + help + Support for using the cryptographic API with the AMD Cryptographic + Coprocessor. This module supports acceleration and offload of SHA + and AES algorithms. If you choose 'M' here, this module will be + called ccp_crypto. diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile new file mode 100644 index 000000000..55a1f3951 --- /dev/null +++ b/drivers/crypto/ccp/Makefile @@ -0,0 +1,10 @@ +obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o +ccp-objs := ccp-dev.o ccp-ops.o ccp-platform.o +ccp-$(CONFIG_PCI) += ccp-pci.o + +obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o +ccp-crypto-objs := ccp-crypto-main.o \ + ccp-crypto-aes.o \ + ccp-crypto-aes-cmac.o \ + ccp-crypto-aes-xts.o \ + ccp-crypto-sha.o diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c new file mode 100644 index 000000000..ea7e84469 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -0,0 +1,367 @@ +/* + * AMD Cryptographic Coprocessor (CCP) AES CMAC crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/hash.h> +#include <crypto/internal/hash.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + +static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, + int ret) +{ + struct ahash_request *req = ahash_request_cast(async_req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + unsigned int digest_size = crypto_ahash_digestsize(tfm); + + if (ret) + goto e_free; + + if (rctx->hash_rem) { + /* Save remaining data to buffer */ + unsigned int offset = rctx->nbytes - rctx->hash_rem; + + scatterwalk_map_and_copy(rctx->buf, rctx->src, + offset, rctx->hash_rem, 0); + rctx->buf_count = rctx->hash_rem; + } else { + rctx->buf_count = 0; + } + + /* Update result area if supplied */ + if (req->result) + memcpy(req->result, rctx->iv, digest_size); + +e_free: + sg_free_table(&rctx->data_sg); + + return ret; +} + +static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes, + unsigned int final) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct scatterlist *sg, *cmac_key_sg = NULL; + unsigned int block_size = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + unsigned int need_pad, sg_count; + gfp_t gfp; + u64 len; + int ret; + + if (!ctx->u.aes.key_len) + return -EINVAL; + + if (nbytes) + rctx->null_msg = 0; + + len = (u64)rctx->buf_count + (u64)nbytes; + + if (!final && (len <= block_size)) { + scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src, + 0, nbytes, 0); + rctx->buf_count += nbytes; + + return 0; + } + + rctx->src = req->src; + rctx->nbytes = nbytes; + + rctx->final = final; + rctx->hash_rem = final ? 0 : len & (block_size - 1); + rctx->hash_cnt = len - rctx->hash_rem; + if (!final && !rctx->hash_rem) { + /* CCP can't do zero length final, so keep some data around */ + rctx->hash_cnt -= block_size; + rctx->hash_rem = block_size; + } + + if (final && (rctx->null_msg || (len & (block_size - 1)))) + need_pad = 1; + else + need_pad = 0; + + sg_init_one(&rctx->iv_sg, rctx->iv, sizeof(rctx->iv)); + + /* Build the data scatterlist table - allocate enough entries for all + * possible data pieces (buffer, input data, padding) + */ + sg_count = (nbytes) ? sg_nents(req->src) + 2 : 2; + gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? + GFP_KERNEL : GFP_ATOMIC; + ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp); + if (ret) + return ret; + + sg = NULL; + if (rctx->buf_count) { + sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); + sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg); + } + + if (nbytes) + sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src); + + if (need_pad) { + int pad_length = block_size - (len & (block_size - 1)); + + rctx->hash_cnt += pad_length; + + memset(rctx->pad, 0, sizeof(rctx->pad)); + rctx->pad[0] = 0x80; + sg_init_one(&rctx->pad_sg, rctx->pad, pad_length); + sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->pad_sg); + } + if (sg) { + sg_mark_end(sg); + sg = rctx->data_sg.sgl; + } + + /* Initialize the K1/K2 scatterlist */ + if (final) + cmac_key_sg = (need_pad) ? &ctx->u.aes.k2_sg + : &ctx->u.aes.k1_sg; + + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_AES; + rctx->cmd.u.aes.type = ctx->u.aes.type; + rctx->cmd.u.aes.mode = ctx->u.aes.mode; + rctx->cmd.u.aes.action = CCP_AES_ACTION_ENCRYPT; + rctx->cmd.u.aes.key = &ctx->u.aes.key_sg; + rctx->cmd.u.aes.key_len = ctx->u.aes.key_len; + rctx->cmd.u.aes.iv = &rctx->iv_sg; + rctx->cmd.u.aes.iv_len = AES_BLOCK_SIZE; + rctx->cmd.u.aes.src = sg; + rctx->cmd.u.aes.src_len = rctx->hash_cnt; + rctx->cmd.u.aes.dst = NULL; + rctx->cmd.u.aes.cmac_key = cmac_key_sg; + rctx->cmd.u.aes.cmac_key_len = ctx->u.aes.kn_len; + rctx->cmd.u.aes.cmac_final = final; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; +} + +static int ccp_aes_cmac_init(struct ahash_request *req) +{ + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + + memset(rctx, 0, sizeof(*rctx)); + + rctx->null_msg = 1; + + return 0; +} + +static int ccp_aes_cmac_update(struct ahash_request *req) +{ + return ccp_do_cmac_update(req, req->nbytes, 0); +} + +static int ccp_aes_cmac_final(struct ahash_request *req) +{ + return ccp_do_cmac_update(req, 0, 1); +} + +static int ccp_aes_cmac_finup(struct ahash_request *req) +{ + return ccp_do_cmac_update(req, req->nbytes, 1); +} + +static int ccp_aes_cmac_digest(struct ahash_request *req) +{ + int ret; + + ret = ccp_aes_cmac_init(req); + if (ret) + return ret; + + return ccp_aes_cmac_finup(req); +} + +static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct ccp_crypto_ahash_alg *alg = + ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); + u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo; + u64 rb_hi = 0x00, rb_lo = 0x87; + __be64 *gk; + int ret; + + switch (key_len) { + case AES_KEYSIZE_128: + ctx->u.aes.type = CCP_AES_TYPE_128; + break; + case AES_KEYSIZE_192: + ctx->u.aes.type = CCP_AES_TYPE_192; + break; + case AES_KEYSIZE_256: + ctx->u.aes.type = CCP_AES_TYPE_256; + break; + default: + crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + ctx->u.aes.mode = alg->mode; + + /* Set to zero until complete */ + ctx->u.aes.key_len = 0; + + /* Set the key for the AES cipher used to generate the keys */ + ret = crypto_cipher_setkey(ctx->u.aes.tfm_cipher, key, key_len); + if (ret) + return ret; + + /* Encrypt a block of zeroes - use key area in context */ + memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key)); + crypto_cipher_encrypt_one(ctx->u.aes.tfm_cipher, ctx->u.aes.key, + ctx->u.aes.key); + + /* Generate K1 and K2 */ + k0_hi = be64_to_cpu(*((__be64 *)ctx->u.aes.key)); + k0_lo = be64_to_cpu(*((__be64 *)ctx->u.aes.key + 1)); + + k1_hi = (k0_hi << 1) | (k0_lo >> 63); + k1_lo = k0_lo << 1; + if (ctx->u.aes.key[0] & 0x80) { + k1_hi ^= rb_hi; + k1_lo ^= rb_lo; + } + gk = (__be64 *)ctx->u.aes.k1; + *gk = cpu_to_be64(k1_hi); + gk++; + *gk = cpu_to_be64(k1_lo); + + k2_hi = (k1_hi << 1) | (k1_lo >> 63); + k2_lo = k1_lo << 1; + if (ctx->u.aes.k1[0] & 0x80) { + k2_hi ^= rb_hi; + k2_lo ^= rb_lo; + } + gk = (__be64 *)ctx->u.aes.k2; + *gk = cpu_to_be64(k2_hi); + gk++; + *gk = cpu_to_be64(k2_lo); + + ctx->u.aes.kn_len = sizeof(ctx->u.aes.k1); + sg_init_one(&ctx->u.aes.k1_sg, ctx->u.aes.k1, sizeof(ctx->u.aes.k1)); + sg_init_one(&ctx->u.aes.k2_sg, ctx->u.aes.k2, sizeof(ctx->u.aes.k2)); + + /* Save the supplied key */ + memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key)); + memcpy(ctx->u.aes.key, key, key_len); + ctx->u.aes.key_len = key_len; + sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); + + return ret; +} + +static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); + struct crypto_cipher *cipher_tfm; + + ctx->complete = ccp_aes_cmac_complete; + ctx->u.aes.key_len = 0; + + crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx)); + + cipher_tfm = crypto_alloc_cipher("aes", 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(cipher_tfm)) { + pr_warn("could not load aes cipher driver\n"); + return PTR_ERR(cipher_tfm); + } + ctx->u.aes.tfm_cipher = cipher_tfm; + + return 0; +} + +static void ccp_aes_cmac_cra_exit(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->u.aes.tfm_cipher) + crypto_free_cipher(ctx->u.aes.tfm_cipher); + ctx->u.aes.tfm_cipher = NULL; +} + +int ccp_register_aes_cmac_algs(struct list_head *head) +{ + struct ccp_crypto_ahash_alg *ccp_alg; + struct ahash_alg *alg; + struct hash_alg_common *halg; + struct crypto_alg *base; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_alg->entry); + ccp_alg->mode = CCP_AES_MODE_CMAC; + + alg = &ccp_alg->alg; + alg->init = ccp_aes_cmac_init; + alg->update = ccp_aes_cmac_update; + alg->final = ccp_aes_cmac_final; + alg->finup = ccp_aes_cmac_finup; + alg->digest = ccp_aes_cmac_digest; + alg->setkey = ccp_aes_cmac_setkey; + + halg = &alg->halg; + halg->digestsize = AES_BLOCK_SIZE; + + base = &halg->base; + snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)"); + snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "cmac-aes-ccp"); + base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK; + base->cra_blocksize = AES_BLOCK_SIZE; + base->cra_ctxsize = sizeof(struct ccp_ctx); + base->cra_priority = CCP_CRA_PRIORITY; + base->cra_type = &crypto_ahash_type; + base->cra_init = ccp_aes_cmac_cra_init; + base->cra_exit = ccp_aes_cmac_cra_exit; + base->cra_module = THIS_MODULE; + + ret = crypto_register_ahash(alg); + if (ret) { + pr_err("%s ahash algorithm registration error (%d)\n", + base->cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c new file mode 100644 index 000000000..52c7395cb --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -0,0 +1,277 @@ +/* + * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + +struct ccp_aes_xts_def { + const char *name; + const char *drv_name; +}; + +static struct ccp_aes_xts_def aes_xts_algs[] = { + { + .name = "xts(aes)", + .drv_name = "xts-aes-ccp", + }, +}; + +struct ccp_unit_size_map { + unsigned int size; + u32 value; +}; + +static struct ccp_unit_size_map unit_size_map[] = { + { + .size = 4096, + .value = CCP_XTS_AES_UNIT_SIZE_4096, + }, + { + .size = 2048, + .value = CCP_XTS_AES_UNIT_SIZE_2048, + }, + { + .size = 1024, + .value = CCP_XTS_AES_UNIT_SIZE_1024, + }, + { + .size = 512, + .value = CCP_XTS_AES_UNIT_SIZE_512, + }, + { + .size = 256, + .value = CCP_XTS_AES_UNIT_SIZE__LAST, + }, + { + .size = 128, + .value = CCP_XTS_AES_UNIT_SIZE__LAST, + }, + { + .size = 64, + .value = CCP_XTS_AES_UNIT_SIZE__LAST, + }, + { + .size = 32, + .value = CCP_XTS_AES_UNIT_SIZE__LAST, + }, + { + .size = 16, + .value = CCP_XTS_AES_UNIT_SIZE_16, + }, + { + .size = 1, + .value = CCP_XTS_AES_UNIT_SIZE__LAST, + }, +}; + +static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret) +{ + struct ablkcipher_request *req = ablkcipher_request_cast(async_req); + struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); + + if (ret) + return ret; + + memcpy(req->info, rctx->iv, AES_BLOCK_SIZE); + + return 0; +} + +static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm)); + + /* Only support 128-bit AES key with a 128-bit Tweak key, + * otherwise use the fallback + */ + switch (key_len) { + case AES_KEYSIZE_128 * 2: + memcpy(ctx->u.aes.key, key, key_len); + break; + } + ctx->u.aes.key_len = key_len / 2; + sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); + + return crypto_ablkcipher_setkey(ctx->u.aes.tfm_ablkcipher, key, + key_len); +} + +static int ccp_aes_xts_crypt(struct ablkcipher_request *req, + unsigned int encrypt) +{ + struct crypto_tfm *tfm = + crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); + struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); + unsigned int unit; + int ret; + + if (!ctx->u.aes.key_len) + return -EINVAL; + + if (req->nbytes & (AES_BLOCK_SIZE - 1)) + return -EINVAL; + + if (!req->info) + return -EINVAL; + + for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) + if (!(req->nbytes & (unit_size_map[unit].size - 1))) + break; + + if ((unit_size_map[unit].value == CCP_XTS_AES_UNIT_SIZE__LAST) || + (ctx->u.aes.key_len != AES_KEYSIZE_128)) { + /* Use the fallback to process the request for any + * unsupported unit sizes or key sizes + */ + ablkcipher_request_set_tfm(req, ctx->u.aes.tfm_ablkcipher); + ret = (encrypt) ? crypto_ablkcipher_encrypt(req) : + crypto_ablkcipher_decrypt(req); + ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + + return ret; + } + + memcpy(rctx->iv, req->info, AES_BLOCK_SIZE); + sg_init_one(&rctx->iv_sg, rctx->iv, AES_BLOCK_SIZE); + + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_XTS_AES_128; + rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT + : CCP_AES_ACTION_DECRYPT; + rctx->cmd.u.xts.unit_size = unit_size_map[unit].value; + rctx->cmd.u.xts.key = &ctx->u.aes.key_sg; + rctx->cmd.u.xts.key_len = ctx->u.aes.key_len; + rctx->cmd.u.xts.iv = &rctx->iv_sg; + rctx->cmd.u.xts.iv_len = AES_BLOCK_SIZE; + rctx->cmd.u.xts.src = req->src; + rctx->cmd.u.xts.src_len = req->nbytes; + rctx->cmd.u.xts.dst = req->dst; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; +} + +static int ccp_aes_xts_encrypt(struct ablkcipher_request *req) +{ + return ccp_aes_xts_crypt(req, 1); +} + +static int ccp_aes_xts_decrypt(struct ablkcipher_request *req) +{ + return ccp_aes_xts_crypt(req, 0); +} + +static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_ablkcipher *fallback_tfm; + + ctx->complete = ccp_aes_xts_complete; + ctx->u.aes.key_len = 0; + + fallback_tfm = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback_tfm)) { + pr_warn("could not load fallback driver %s\n", + crypto_tfm_alg_name(tfm)); + return PTR_ERR(fallback_tfm); + } + ctx->u.aes.tfm_ablkcipher = fallback_tfm; + + tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx) + + fallback_tfm->base.crt_ablkcipher.reqsize; + + return 0; +} + +static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->u.aes.tfm_ablkcipher) + crypto_free_ablkcipher(ctx->u.aes.tfm_ablkcipher); + ctx->u.aes.tfm_ablkcipher = NULL; +} + +static int ccp_register_aes_xts_alg(struct list_head *head, + const struct ccp_aes_xts_def *def) +{ + struct ccp_crypto_ablkcipher_alg *ccp_alg; + struct crypto_alg *alg; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_alg->entry); + + alg = &ccp_alg->alg; + + snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); + snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + def->drv_name); + alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK; + alg->cra_blocksize = AES_BLOCK_SIZE; + alg->cra_ctxsize = sizeof(struct ccp_ctx); + alg->cra_priority = CCP_CRA_PRIORITY; + alg->cra_type = &crypto_ablkcipher_type; + alg->cra_ablkcipher.setkey = ccp_aes_xts_setkey; + alg->cra_ablkcipher.encrypt = ccp_aes_xts_encrypt; + alg->cra_ablkcipher.decrypt = ccp_aes_xts_decrypt; + alg->cra_ablkcipher.min_keysize = AES_MIN_KEY_SIZE * 2; + alg->cra_ablkcipher.max_keysize = AES_MAX_KEY_SIZE * 2; + alg->cra_ablkcipher.ivsize = AES_BLOCK_SIZE; + alg->cra_init = ccp_aes_xts_cra_init; + alg->cra_exit = ccp_aes_xts_cra_exit; + alg->cra_module = THIS_MODULE; + + ret = crypto_register_alg(alg); + if (ret) { + pr_err("%s ablkcipher algorithm registration error (%d)\n", + alg->cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + return 0; +} + +int ccp_register_aes_xts_algs(struct list_head *head) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(aes_xts_algs); i++) { + ret = ccp_register_aes_xts_alg(head, &aes_xts_algs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c new file mode 100644 index 000000000..7984f9108 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes.c @@ -0,0 +1,368 @@ +/* + * AMD Cryptographic Coprocessor (CCP) AES crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/ctr.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + +static int ccp_aes_complete(struct crypto_async_request *async_req, int ret) +{ + struct ablkcipher_request *req = ablkcipher_request_cast(async_req); + struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); + + if (ret) + return ret; + + if (ctx->u.aes.mode != CCP_AES_MODE_ECB) + memcpy(req->info, rctx->iv, AES_BLOCK_SIZE); + + return 0; +} + +static int ccp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm)); + struct ccp_crypto_ablkcipher_alg *alg = + ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm)); + + switch (key_len) { + case AES_KEYSIZE_128: + ctx->u.aes.type = CCP_AES_TYPE_128; + break; + case AES_KEYSIZE_192: + ctx->u.aes.type = CCP_AES_TYPE_192; + break; + case AES_KEYSIZE_256: + ctx->u.aes.type = CCP_AES_TYPE_256; + break; + default: + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + ctx->u.aes.mode = alg->mode; + ctx->u.aes.key_len = key_len; + + memcpy(ctx->u.aes.key, key, key_len); + sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); + + return 0; +} + +static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); + struct scatterlist *iv_sg = NULL; + unsigned int iv_len = 0; + int ret; + + if (!ctx->u.aes.key_len) + return -EINVAL; + + if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) || + (ctx->u.aes.mode == CCP_AES_MODE_CBC) || + (ctx->u.aes.mode == CCP_AES_MODE_CFB)) && + (req->nbytes & (AES_BLOCK_SIZE - 1))) + return -EINVAL; + + if (ctx->u.aes.mode != CCP_AES_MODE_ECB) { + if (!req->info) + return -EINVAL; + + memcpy(rctx->iv, req->info, AES_BLOCK_SIZE); + iv_sg = &rctx->iv_sg; + iv_len = AES_BLOCK_SIZE; + sg_init_one(iv_sg, rctx->iv, iv_len); + } + + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_AES; + rctx->cmd.u.aes.type = ctx->u.aes.type; + rctx->cmd.u.aes.mode = ctx->u.aes.mode; + rctx->cmd.u.aes.action = + (encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT; + rctx->cmd.u.aes.key = &ctx->u.aes.key_sg; + rctx->cmd.u.aes.key_len = ctx->u.aes.key_len; + rctx->cmd.u.aes.iv = iv_sg; + rctx->cmd.u.aes.iv_len = iv_len; + rctx->cmd.u.aes.src = req->src; + rctx->cmd.u.aes.src_len = req->nbytes; + rctx->cmd.u.aes.dst = req->dst; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; +} + +static int ccp_aes_encrypt(struct ablkcipher_request *req) +{ + return ccp_aes_crypt(req, true); +} + +static int ccp_aes_decrypt(struct ablkcipher_request *req) +{ + return ccp_aes_crypt(req, false); +} + +static int ccp_aes_cra_init(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->complete = ccp_aes_complete; + ctx->u.aes.key_len = 0; + + tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx); + + return 0; +} + +static void ccp_aes_cra_exit(struct crypto_tfm *tfm) +{ +} + +static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req, + int ret) +{ + struct ablkcipher_request *req = ablkcipher_request_cast(async_req); + struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); + + /* Restore the original pointer */ + req->info = rctx->rfc3686_info; + + return ccp_aes_complete(async_req, ret); +} + +static int ccp_aes_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm)); + + if (key_len < CTR_RFC3686_NONCE_SIZE) + return -EINVAL; + + key_len -= CTR_RFC3686_NONCE_SIZE; + memcpy(ctx->u.aes.nonce, key + key_len, CTR_RFC3686_NONCE_SIZE); + + return ccp_aes_setkey(tfm, key, key_len); +} + +static int ccp_aes_rfc3686_crypt(struct ablkcipher_request *req, bool encrypt) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); + u8 *iv; + + /* Initialize the CTR block */ + iv = rctx->rfc3686_iv; + memcpy(iv, ctx->u.aes.nonce, CTR_RFC3686_NONCE_SIZE); + + iv += CTR_RFC3686_NONCE_SIZE; + memcpy(iv, req->info, CTR_RFC3686_IV_SIZE); + + iv += CTR_RFC3686_IV_SIZE; + *(__be32 *)iv = cpu_to_be32(1); + + /* Point to the new IV */ + rctx->rfc3686_info = req->info; + req->info = rctx->rfc3686_iv; + + return ccp_aes_crypt(req, encrypt); +} + +static int ccp_aes_rfc3686_encrypt(struct ablkcipher_request *req) +{ + return ccp_aes_rfc3686_crypt(req, true); +} + +static int ccp_aes_rfc3686_decrypt(struct ablkcipher_request *req) +{ + return ccp_aes_rfc3686_crypt(req, false); +} + +static int ccp_aes_rfc3686_cra_init(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->complete = ccp_aes_rfc3686_complete; + ctx->u.aes.key_len = 0; + + tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx); + + return 0; +} + +static void ccp_aes_rfc3686_cra_exit(struct crypto_tfm *tfm) +{ +} + +static struct crypto_alg ccp_aes_defaults = { + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ccp_ctx), + .cra_priority = CCP_CRA_PRIORITY, + .cra_type = &crypto_ablkcipher_type, + .cra_init = ccp_aes_cra_init, + .cra_exit = ccp_aes_cra_exit, + .cra_module = THIS_MODULE, + .cra_ablkcipher = { + .setkey = ccp_aes_setkey, + .encrypt = ccp_aes_encrypt, + .decrypt = ccp_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + }, +}; + +static struct crypto_alg ccp_aes_rfc3686_defaults = { + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = CTR_RFC3686_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ccp_ctx), + .cra_priority = CCP_CRA_PRIORITY, + .cra_type = &crypto_ablkcipher_type, + .cra_init = ccp_aes_rfc3686_cra_init, + .cra_exit = ccp_aes_rfc3686_cra_exit, + .cra_module = THIS_MODULE, + .cra_ablkcipher = { + .setkey = ccp_aes_rfc3686_setkey, + .encrypt = ccp_aes_rfc3686_encrypt, + .decrypt = ccp_aes_rfc3686_decrypt, + .min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, + .max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, + }, +}; + +struct ccp_aes_def { + enum ccp_aes_mode mode; + const char *name; + const char *driver_name; + unsigned int blocksize; + unsigned int ivsize; + struct crypto_alg *alg_defaults; +}; + +static struct ccp_aes_def aes_algs[] = { + { + .mode = CCP_AES_MODE_ECB, + .name = "ecb(aes)", + .driver_name = "ecb-aes-ccp", + .blocksize = AES_BLOCK_SIZE, + .ivsize = 0, + .alg_defaults = &ccp_aes_defaults, + }, + { + .mode = CCP_AES_MODE_CBC, + .name = "cbc(aes)", + .driver_name = "cbc-aes-ccp", + .blocksize = AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .alg_defaults = &ccp_aes_defaults, + }, + { + .mode = CCP_AES_MODE_CFB, + .name = "cfb(aes)", + .driver_name = "cfb-aes-ccp", + .blocksize = AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .alg_defaults = &ccp_aes_defaults, + }, + { + .mode = CCP_AES_MODE_OFB, + .name = "ofb(aes)", + .driver_name = "ofb-aes-ccp", + .blocksize = 1, + .ivsize = AES_BLOCK_SIZE, + .alg_defaults = &ccp_aes_defaults, + }, + { + .mode = CCP_AES_MODE_CTR, + .name = "ctr(aes)", + .driver_name = "ctr-aes-ccp", + .blocksize = 1, + .ivsize = AES_BLOCK_SIZE, + .alg_defaults = &ccp_aes_defaults, + }, + { + .mode = CCP_AES_MODE_CTR, + .name = "rfc3686(ctr(aes))", + .driver_name = "rfc3686-ctr-aes-ccp", + .blocksize = 1, + .ivsize = CTR_RFC3686_IV_SIZE, + .alg_defaults = &ccp_aes_rfc3686_defaults, + }, +}; + +static int ccp_register_aes_alg(struct list_head *head, + const struct ccp_aes_def *def) +{ + struct ccp_crypto_ablkcipher_alg *ccp_alg; + struct crypto_alg *alg; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_alg->entry); + + ccp_alg->mode = def->mode; + + /* Copy the defaults and override as necessary */ + alg = &ccp_alg->alg; + *alg = *def->alg_defaults; + snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); + snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + def->driver_name); + alg->cra_blocksize = def->blocksize; + alg->cra_ablkcipher.ivsize = def->ivsize; + + ret = crypto_register_alg(alg); + if (ret) { + pr_err("%s ablkcipher algorithm registration error (%d)\n", + alg->cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + return 0; +} + +int ccp_register_aes_algs(struct list_head *head) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + ret = ccp_register_aes_alg(head, &aes_algs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c new file mode 100644 index 000000000..bdec01ec6 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -0,0 +1,391 @@ +/* + * AMD Cryptographic Coprocessor (CCP) crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/ccp.h> +#include <linux/scatterlist.h> +#include <crypto/internal/hash.h> + +#include "ccp-crypto.h" + +MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0.0"); +MODULE_DESCRIPTION("AMD Cryptographic Coprocessor crypto API support"); + +static unsigned int aes_disable; +module_param(aes_disable, uint, 0444); +MODULE_PARM_DESC(aes_disable, "Disable use of AES - any non-zero value"); + +static unsigned int sha_disable; +module_param(sha_disable, uint, 0444); +MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value"); + +/* List heads for the supported algorithms */ +static LIST_HEAD(hash_algs); +static LIST_HEAD(cipher_algs); + +/* For any tfm, requests for that tfm must be returned on the order + * received. With multiple queues available, the CCP can process more + * than one cmd at a time. Therefore we must maintain a cmd list to insure + * the proper ordering of requests on a given tfm. + */ +struct ccp_crypto_queue { + struct list_head cmds; + struct list_head *backlog; + unsigned int cmd_count; +}; + +#define CCP_CRYPTO_MAX_QLEN 100 + +static struct ccp_crypto_queue req_queue; +static spinlock_t req_queue_lock; + +struct ccp_crypto_cmd { + struct list_head entry; + + struct ccp_cmd *cmd; + + /* Save the crypto_tfm and crypto_async_request addresses + * separately to avoid any reference to a possibly invalid + * crypto_async_request structure after invoking the request + * callback + */ + struct crypto_async_request *req; + struct crypto_tfm *tfm; + + /* Used for held command processing to determine state */ + int ret; +}; + +struct ccp_crypto_cpu { + struct work_struct work; + struct completion completion; + struct ccp_crypto_cmd *crypto_cmd; + int err; +}; + +static inline bool ccp_crypto_success(int err) +{ + if (err && (err != -EINPROGRESS) && (err != -EBUSY)) + return false; + + return true; +} + +static struct ccp_crypto_cmd *ccp_crypto_cmd_complete( + struct ccp_crypto_cmd *crypto_cmd, struct ccp_crypto_cmd **backlog) +{ + struct ccp_crypto_cmd *held = NULL, *tmp; + unsigned long flags; + + *backlog = NULL; + + spin_lock_irqsave(&req_queue_lock, flags); + + /* Held cmds will be after the current cmd in the queue so start + * searching for a cmd with a matching tfm for submission. + */ + tmp = crypto_cmd; + list_for_each_entry_continue(tmp, &req_queue.cmds, entry) { + if (crypto_cmd->tfm != tmp->tfm) + continue; + held = tmp; + break; + } + + /* Process the backlog: + * Because cmds can be executed from any point in the cmd list + * special precautions have to be taken when handling the backlog. + */ + if (req_queue.backlog != &req_queue.cmds) { + /* Skip over this cmd if it is the next backlog cmd */ + if (req_queue.backlog == &crypto_cmd->entry) + req_queue.backlog = crypto_cmd->entry.next; + + *backlog = container_of(req_queue.backlog, + struct ccp_crypto_cmd, entry); + req_queue.backlog = req_queue.backlog->next; + + /* Skip over this cmd if it is now the next backlog cmd */ + if (req_queue.backlog == &crypto_cmd->entry) + req_queue.backlog = crypto_cmd->entry.next; + } + + /* Remove the cmd entry from the list of cmds */ + req_queue.cmd_count--; + list_del(&crypto_cmd->entry); + + spin_unlock_irqrestore(&req_queue_lock, flags); + + return held; +} + +static void ccp_crypto_complete(void *data, int err) +{ + struct ccp_crypto_cmd *crypto_cmd = data; + struct ccp_crypto_cmd *held, *next, *backlog; + struct crypto_async_request *req = crypto_cmd->req; + struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm); + int ret; + + if (err == -EINPROGRESS) { + /* Only propagate the -EINPROGRESS if necessary */ + if (crypto_cmd->ret == -EBUSY) { + crypto_cmd->ret = -EINPROGRESS; + req->complete(req, -EINPROGRESS); + } + + return; + } + + /* Operation has completed - update the queue before invoking + * the completion callbacks and retrieve the next cmd (cmd with + * a matching tfm) that can be submitted to the CCP. + */ + held = ccp_crypto_cmd_complete(crypto_cmd, &backlog); + if (backlog) { + backlog->ret = -EINPROGRESS; + backlog->req->complete(backlog->req, -EINPROGRESS); + } + + /* Transition the state from -EBUSY to -EINPROGRESS first */ + if (crypto_cmd->ret == -EBUSY) + req->complete(req, -EINPROGRESS); + + /* Completion callbacks */ + ret = err; + if (ctx->complete) + ret = ctx->complete(req, ret); + req->complete(req, ret); + + /* Submit the next cmd */ + while (held) { + /* Since we have already queued the cmd, we must indicate that + * we can backlog so as not to "lose" this request. + */ + held->cmd->flags |= CCP_CMD_MAY_BACKLOG; + ret = ccp_enqueue_cmd(held->cmd); + if (ccp_crypto_success(ret)) + break; + + /* Error occurred, report it and get the next entry */ + ctx = crypto_tfm_ctx(held->req->tfm); + if (ctx->complete) + ret = ctx->complete(held->req, ret); + held->req->complete(held->req, ret); + + next = ccp_crypto_cmd_complete(held, &backlog); + if (backlog) { + backlog->ret = -EINPROGRESS; + backlog->req->complete(backlog->req, -EINPROGRESS); + } + + kfree(held); + held = next; + } + + kfree(crypto_cmd); +} + +static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd) +{ + struct ccp_crypto_cmd *active = NULL, *tmp; + unsigned long flags; + bool free_cmd = true; + int ret; + + spin_lock_irqsave(&req_queue_lock, flags); + + /* Check if the cmd can/should be queued */ + if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) { + ret = -EBUSY; + if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) + goto e_lock; + } + + /* Look for an entry with the same tfm. If there is a cmd + * with the same tfm in the list then the current cmd cannot + * be submitted to the CCP yet. + */ + list_for_each_entry(tmp, &req_queue.cmds, entry) { + if (crypto_cmd->tfm != tmp->tfm) + continue; + active = tmp; + break; + } + + ret = -EINPROGRESS; + if (!active) { + ret = ccp_enqueue_cmd(crypto_cmd->cmd); + if (!ccp_crypto_success(ret)) + goto e_lock; /* Error, don't queue it */ + if ((ret == -EBUSY) && + !(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) + goto e_lock; /* Not backlogging, don't queue it */ + } + + if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) { + ret = -EBUSY; + if (req_queue.backlog == &req_queue.cmds) + req_queue.backlog = &crypto_cmd->entry; + } + crypto_cmd->ret = ret; + + req_queue.cmd_count++; + list_add_tail(&crypto_cmd->entry, &req_queue.cmds); + + free_cmd = false; + +e_lock: + spin_unlock_irqrestore(&req_queue_lock, flags); + + if (free_cmd) + kfree(crypto_cmd); + + return ret; +} + +/** + * ccp_crypto_enqueue_request - queue an crypto async request for processing + * by the CCP + * + * @req: crypto_async_request struct to be processed + * @cmd: ccp_cmd struct to be sent to the CCP + */ +int ccp_crypto_enqueue_request(struct crypto_async_request *req, + struct ccp_cmd *cmd) +{ + struct ccp_crypto_cmd *crypto_cmd; + gfp_t gfp; + + gfp = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; + + crypto_cmd = kzalloc(sizeof(*crypto_cmd), gfp); + if (!crypto_cmd) + return -ENOMEM; + + /* The tfm pointer must be saved and not referenced from the + * crypto_async_request (req) pointer because it is used after + * completion callback for the request and the req pointer + * might not be valid anymore. + */ + crypto_cmd->cmd = cmd; + crypto_cmd->req = req; + crypto_cmd->tfm = req->tfm; + + cmd->callback = ccp_crypto_complete; + cmd->data = crypto_cmd; + + if (req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + cmd->flags |= CCP_CMD_MAY_BACKLOG; + else + cmd->flags &= ~CCP_CMD_MAY_BACKLOG; + + return ccp_crypto_enqueue_cmd(crypto_cmd); +} + +struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table, + struct scatterlist *sg_add) +{ + struct scatterlist *sg, *sg_last = NULL; + + for (sg = table->sgl; sg; sg = sg_next(sg)) + if (!sg_page(sg)) + break; + BUG_ON(!sg); + + for (; sg && sg_add; sg = sg_next(sg), sg_add = sg_next(sg_add)) { + sg_set_page(sg, sg_page(sg_add), sg_add->length, + sg_add->offset); + sg_last = sg; + } + BUG_ON(sg_add); + + return sg_last; +} + +static int ccp_register_algs(void) +{ + int ret; + + if (!aes_disable) { + ret = ccp_register_aes_algs(&cipher_algs); + if (ret) + return ret; + + ret = ccp_register_aes_cmac_algs(&hash_algs); + if (ret) + return ret; + + ret = ccp_register_aes_xts_algs(&cipher_algs); + if (ret) + return ret; + } + + if (!sha_disable) { + ret = ccp_register_sha_algs(&hash_algs); + if (ret) + return ret; + } + + return 0; +} + +static void ccp_unregister_algs(void) +{ + struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp; + struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp; + + list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) { + crypto_unregister_ahash(&ahash_alg->alg); + list_del(&ahash_alg->entry); + kfree(ahash_alg); + } + + list_for_each_entry_safe(ablk_alg, ablk_tmp, &cipher_algs, entry) { + crypto_unregister_alg(&ablk_alg->alg); + list_del(&ablk_alg->entry); + kfree(ablk_alg); + } +} + +static int ccp_crypto_init(void) +{ + int ret; + + ret = ccp_present(); + if (ret) + return ret; + + spin_lock_init(&req_queue_lock); + INIT_LIST_HEAD(&req_queue.cmds); + req_queue.backlog = &req_queue.cmds; + req_queue.cmd_count = 0; + + ret = ccp_register_algs(); + if (ret) + ccp_unregister_algs(); + + return ret; +} + +static void ccp_crypto_exit(void) +{ + ccp_unregister_algs(); +} + +module_init(ccp_crypto_init); +module_exit(ccp_crypto_exit); diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c new file mode 100644 index 000000000..507b34e0c --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -0,0 +1,438 @@ +/* + * AMD Cryptographic Coprocessor (CCP) SHA crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/hash.h> +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + +static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) +{ + struct ahash_request *req = ahash_request_cast(async_req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + unsigned int digest_size = crypto_ahash_digestsize(tfm); + + if (ret) + goto e_free; + + if (rctx->hash_rem) { + /* Save remaining data to buffer */ + unsigned int offset = rctx->nbytes - rctx->hash_rem; + + scatterwalk_map_and_copy(rctx->buf, rctx->src, + offset, rctx->hash_rem, 0); + rctx->buf_count = rctx->hash_rem; + } else { + rctx->buf_count = 0; + } + + /* Update result area if supplied */ + if (req->result) + memcpy(req->result, rctx->ctx, digest_size); + +e_free: + sg_free_table(&rctx->data_sg); + + return ret; +} + +static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes, + unsigned int final) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct scatterlist *sg; + unsigned int block_size = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + unsigned int sg_count; + gfp_t gfp; + u64 len; + int ret; + + len = (u64)rctx->buf_count + (u64)nbytes; + + if (!final && (len <= block_size)) { + scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src, + 0, nbytes, 0); + rctx->buf_count += nbytes; + + return 0; + } + + rctx->src = req->src; + rctx->nbytes = nbytes; + + rctx->final = final; + rctx->hash_rem = final ? 0 : len & (block_size - 1); + rctx->hash_cnt = len - rctx->hash_rem; + if (!final && !rctx->hash_rem) { + /* CCP can't do zero length final, so keep some data around */ + rctx->hash_cnt -= block_size; + rctx->hash_rem = block_size; + } + + /* Initialize the context scatterlist */ + sg_init_one(&rctx->ctx_sg, rctx->ctx, sizeof(rctx->ctx)); + + sg = NULL; + if (rctx->buf_count && nbytes) { + /* Build the data scatterlist table - allocate enough entries + * for both data pieces (buffer and input data) + */ + gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? + GFP_KERNEL : GFP_ATOMIC; + sg_count = sg_nents(req->src) + 1; + ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp); + if (ret) + return ret; + + sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); + sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg); + sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src); + sg_mark_end(sg); + + sg = rctx->data_sg.sgl; + } else if (rctx->buf_count) { + sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); + + sg = &rctx->buf_sg; + } else if (nbytes) { + sg = req->src; + } + + rctx->msg_bits += (rctx->hash_cnt << 3); /* Total in bits */ + + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_SHA; + rctx->cmd.u.sha.type = rctx->type; + rctx->cmd.u.sha.ctx = &rctx->ctx_sg; + rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx); + rctx->cmd.u.sha.src = sg; + rctx->cmd.u.sha.src_len = rctx->hash_cnt; + rctx->cmd.u.sha.opad = ctx->u.sha.key_len ? + &ctx->u.sha.opad_sg : NULL; + rctx->cmd.u.sha.opad_len = ctx->u.sha.key_len ? + ctx->u.sha.opad_count : 0; + rctx->cmd.u.sha.first = rctx->first; + rctx->cmd.u.sha.final = rctx->final; + rctx->cmd.u.sha.msg_bits = rctx->msg_bits; + + rctx->first = 0; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; +} + +static int ccp_sha_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_crypto_ahash_alg *alg = + ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); + unsigned int block_size = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + + memset(rctx, 0, sizeof(*rctx)); + + rctx->type = alg->type; + rctx->first = 1; + + if (ctx->u.sha.key_len) { + /* Buffer the HMAC key for first update */ + memcpy(rctx->buf, ctx->u.sha.ipad, block_size); + rctx->buf_count = block_size; + } + + return 0; +} + +static int ccp_sha_update(struct ahash_request *req) +{ + return ccp_do_sha_update(req, req->nbytes, 0); +} + +static int ccp_sha_final(struct ahash_request *req) +{ + return ccp_do_sha_update(req, 0, 1); +} + +static int ccp_sha_finup(struct ahash_request *req) +{ + return ccp_do_sha_update(req, req->nbytes, 1); +} + +static int ccp_sha_digest(struct ahash_request *req) +{ + int ret; + + ret = ccp_sha_init(req); + if (ret) + return ret; + + return ccp_sha_finup(req); +} + +static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct crypto_shash *shash = ctx->u.sha.hmac_tfm; + + SHASH_DESC_ON_STACK(sdesc, shash); + + unsigned int block_size = crypto_shash_blocksize(shash); + unsigned int digest_size = crypto_shash_digestsize(shash); + int i, ret; + + /* Set to zero until complete */ + ctx->u.sha.key_len = 0; + + /* Clear key area to provide zero padding for keys smaller + * than the block size + */ + memset(ctx->u.sha.key, 0, sizeof(ctx->u.sha.key)); + + if (key_len > block_size) { + /* Must hash the input key */ + sdesc->tfm = shash; + sdesc->flags = crypto_ahash_get_flags(tfm) & + CRYPTO_TFM_REQ_MAY_SLEEP; + + ret = crypto_shash_digest(sdesc, key, key_len, + ctx->u.sha.key); + if (ret) { + crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + key_len = digest_size; + } else { + memcpy(ctx->u.sha.key, key, key_len); + } + + for (i = 0; i < block_size; i++) { + ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36; + ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c; + } + + sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size); + ctx->u.sha.opad_count = block_size; + + ctx->u.sha.key_len = key_len; + + return 0; +} + +static int ccp_sha_cra_init(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); + + ctx->complete = ccp_sha_complete; + ctx->u.sha.key_len = 0; + + crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_sha_req_ctx)); + + return 0; +} + +static void ccp_sha_cra_exit(struct crypto_tfm *tfm) +{ +} + +static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm); + struct crypto_shash *hmac_tfm; + + hmac_tfm = crypto_alloc_shash(alg->child_alg, 0, 0); + if (IS_ERR(hmac_tfm)) { + pr_warn("could not load driver %s need for HMAC support\n", + alg->child_alg); + return PTR_ERR(hmac_tfm); + } + + ctx->u.sha.hmac_tfm = hmac_tfm; + + return ccp_sha_cra_init(tfm); +} + +static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->u.sha.hmac_tfm) + crypto_free_shash(ctx->u.sha.hmac_tfm); + + ccp_sha_cra_exit(tfm); +} + +struct ccp_sha_def { + const char *name; + const char *drv_name; + enum ccp_sha_type type; + u32 digest_size; + u32 block_size; +}; + +static struct ccp_sha_def sha_algs[] = { + { + .name = "sha1", + .drv_name = "sha1-ccp", + .type = CCP_SHA_TYPE_1, + .digest_size = SHA1_DIGEST_SIZE, + .block_size = SHA1_BLOCK_SIZE, + }, + { + .name = "sha224", + .drv_name = "sha224-ccp", + .type = CCP_SHA_TYPE_224, + .digest_size = SHA224_DIGEST_SIZE, + .block_size = SHA224_BLOCK_SIZE, + }, + { + .name = "sha256", + .drv_name = "sha256-ccp", + .type = CCP_SHA_TYPE_256, + .digest_size = SHA256_DIGEST_SIZE, + .block_size = SHA256_BLOCK_SIZE, + }, +}; + +static int ccp_register_hmac_alg(struct list_head *head, + const struct ccp_sha_def *def, + const struct ccp_crypto_ahash_alg *base_alg) +{ + struct ccp_crypto_ahash_alg *ccp_alg; + struct ahash_alg *alg; + struct hash_alg_common *halg; + struct crypto_alg *base; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + /* Copy the base algorithm and only change what's necessary */ + *ccp_alg = *base_alg; + INIT_LIST_HEAD(&ccp_alg->entry); + + strncpy(ccp_alg->child_alg, def->name, CRYPTO_MAX_ALG_NAME); + + alg = &ccp_alg->alg; + alg->setkey = ccp_sha_setkey; + + halg = &alg->halg; + + base = &halg->base; + snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", def->name); + snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "hmac-%s", + def->drv_name); + base->cra_init = ccp_hmac_sha_cra_init; + base->cra_exit = ccp_hmac_sha_cra_exit; + + ret = crypto_register_ahash(alg); + if (ret) { + pr_err("%s ahash algorithm registration error (%d)\n", + base->cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + return ret; +} + +static int ccp_register_sha_alg(struct list_head *head, + const struct ccp_sha_def *def) +{ + struct ccp_crypto_ahash_alg *ccp_alg; + struct ahash_alg *alg; + struct hash_alg_common *halg; + struct crypto_alg *base; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_alg->entry); + + ccp_alg->type = def->type; + + alg = &ccp_alg->alg; + alg->init = ccp_sha_init; + alg->update = ccp_sha_update; + alg->final = ccp_sha_final; + alg->finup = ccp_sha_finup; + alg->digest = ccp_sha_digest; + + halg = &alg->halg; + halg->digestsize = def->digest_size; + + base = &halg->base; + snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); + snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + def->drv_name); + base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK; + base->cra_blocksize = def->block_size; + base->cra_ctxsize = sizeof(struct ccp_ctx); + base->cra_priority = CCP_CRA_PRIORITY; + base->cra_type = &crypto_ahash_type; + base->cra_init = ccp_sha_cra_init; + base->cra_exit = ccp_sha_cra_exit; + base->cra_module = THIS_MODULE; + + ret = crypto_register_ahash(alg); + if (ret) { + pr_err("%s ahash algorithm registration error (%d)\n", + base->cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + ret = ccp_register_hmac_alg(head, def, ccp_alg); + + return ret; +} + +int ccp_register_sha_algs(struct list_head *head) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(sha_algs); i++) { + ret = ccp_register_sha_alg(head, &sha_algs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h new file mode 100644 index 000000000..76a96f0f4 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -0,0 +1,194 @@ +/* + * AMD Cryptographic Coprocessor (CCP) crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CCP_CRYPTO_H__ +#define __CCP_CRYPTO_H__ + +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/pci.h> +#include <linux/ccp.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/ctr.h> +#include <crypto/hash.h> +#include <crypto/sha.h> + +#define CCP_CRA_PRIORITY 300 + +struct ccp_crypto_ablkcipher_alg { + struct list_head entry; + + u32 mode; + + struct crypto_alg alg; +}; + +struct ccp_crypto_ahash_alg { + struct list_head entry; + + const __be32 *init; + u32 type; + u32 mode; + + /* Child algorithm used for HMAC, CMAC, etc */ + char child_alg[CRYPTO_MAX_ALG_NAME]; + + struct ahash_alg alg; +}; + +static inline struct ccp_crypto_ablkcipher_alg * + ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + + return container_of(alg, struct ccp_crypto_ablkcipher_alg, alg); +} + +static inline struct ccp_crypto_ahash_alg * + ccp_crypto_ahash_alg(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct ahash_alg *ahash_alg; + + ahash_alg = container_of(alg, struct ahash_alg, halg.base); + + return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg); +} + +/***** AES related defines *****/ +struct ccp_aes_ctx { + /* Fallback cipher for XTS with unsupported unit sizes */ + struct crypto_ablkcipher *tfm_ablkcipher; + + /* Cipher used to generate CMAC K1/K2 keys */ + struct crypto_cipher *tfm_cipher; + + enum ccp_engine engine; + enum ccp_aes_type type; + enum ccp_aes_mode mode; + + struct scatterlist key_sg; + unsigned int key_len; + u8 key[AES_MAX_KEY_SIZE]; + + u8 nonce[CTR_RFC3686_NONCE_SIZE]; + + /* CMAC key structures */ + struct scatterlist k1_sg; + struct scatterlist k2_sg; + unsigned int kn_len; + u8 k1[AES_BLOCK_SIZE]; + u8 k2[AES_BLOCK_SIZE]; +}; + +struct ccp_aes_req_ctx { + struct scatterlist iv_sg; + u8 iv[AES_BLOCK_SIZE]; + + /* Fields used for RFC3686 requests */ + u8 *rfc3686_info; + u8 rfc3686_iv[AES_BLOCK_SIZE]; + + struct ccp_cmd cmd; +}; + +struct ccp_aes_cmac_req_ctx { + unsigned int null_msg; + unsigned int final; + + struct scatterlist *src; + unsigned int nbytes; + + u64 hash_cnt; + unsigned int hash_rem; + + struct sg_table data_sg; + + struct scatterlist iv_sg; + u8 iv[AES_BLOCK_SIZE]; + + struct scatterlist buf_sg; + unsigned int buf_count; + u8 buf[AES_BLOCK_SIZE]; + + struct scatterlist pad_sg; + unsigned int pad_count; + u8 pad[AES_BLOCK_SIZE]; + + struct ccp_cmd cmd; +}; + +/***** SHA related defines *****/ +#define MAX_SHA_CONTEXT_SIZE SHA256_DIGEST_SIZE +#define MAX_SHA_BLOCK_SIZE SHA256_BLOCK_SIZE + +struct ccp_sha_ctx { + struct scatterlist opad_sg; + unsigned int opad_count; + + unsigned int key_len; + u8 key[MAX_SHA_BLOCK_SIZE]; + u8 ipad[MAX_SHA_BLOCK_SIZE]; + u8 opad[MAX_SHA_BLOCK_SIZE]; + struct crypto_shash *hmac_tfm; +}; + +struct ccp_sha_req_ctx { + enum ccp_sha_type type; + + u64 msg_bits; + + unsigned int first; + unsigned int final; + + struct scatterlist *src; + unsigned int nbytes; + + u64 hash_cnt; + unsigned int hash_rem; + + struct sg_table data_sg; + + struct scatterlist ctx_sg; + u8 ctx[MAX_SHA_CONTEXT_SIZE]; + + struct scatterlist buf_sg; + unsigned int buf_count; + u8 buf[MAX_SHA_BLOCK_SIZE]; + + /* CCP driver command */ + struct ccp_cmd cmd; +}; + +/***** Common Context Structure *****/ +struct ccp_ctx { + int (*complete)(struct crypto_async_request *req, int ret); + + union { + struct ccp_aes_ctx aes; + struct ccp_sha_ctx sha; + } u; +}; + +int ccp_crypto_enqueue_request(struct crypto_async_request *req, + struct ccp_cmd *cmd); +struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table, + struct scatterlist *sg_add); + +int ccp_register_aes_algs(struct list_head *head); +int ccp_register_aes_cmac_algs(struct list_head *head); +int ccp_register_aes_xts_algs(struct list_head *head); +int ccp_register_sha_algs(struct list_head *head); + +#endif diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c new file mode 100644 index 000000000..861bacc1b --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev.c @@ -0,0 +1,654 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/hw_random.h> +#include <linux/cpu.h> +#ifdef CONFIG_X86 +#include <asm/cpu_device_id.h> +#endif +#include <linux/ccp.h> + +#include "ccp-dev.h" + +MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0.0"); +MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver"); + +struct ccp_tasklet_data { + struct completion completion; + struct ccp_cmd *cmd; +}; + +static struct ccp_device *ccp_dev; +static inline struct ccp_device *ccp_get_device(void) +{ + return ccp_dev; +} + +static inline void ccp_add_device(struct ccp_device *ccp) +{ + ccp_dev = ccp; +} + +static inline void ccp_del_device(struct ccp_device *ccp) +{ + ccp_dev = NULL; +} + +/** + * ccp_present - check if a CCP device is present + * + * Returns zero if a CCP device is present, -ENODEV otherwise. + */ +int ccp_present(void) +{ + if (ccp_get_device()) + return 0; + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(ccp_present); + +/** + * ccp_enqueue_cmd - queue an operation for processing by the CCP + * + * @cmd: ccp_cmd struct to be processed + * + * Queue a cmd to be processed by the CCP. If queueing the cmd + * would exceed the defined length of the cmd queue the cmd will + * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will + * result in a return code of -EBUSY. + * + * The callback routine specified in the ccp_cmd struct will be + * called to notify the caller of completion (if the cmd was not + * backlogged) or advancement out of the backlog. If the cmd has + * advanced out of the backlog the "err" value of the callback + * will be -EINPROGRESS. Any other "err" value during callback is + * the result of the operation. + * + * The cmd has been successfully queued if: + * the return code is -EINPROGRESS or + * the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set + */ +int ccp_enqueue_cmd(struct ccp_cmd *cmd) +{ + struct ccp_device *ccp = ccp_get_device(); + unsigned long flags; + unsigned int i; + int ret; + + if (!ccp) + return -ENODEV; + + /* Caller must supply a callback routine */ + if (!cmd->callback) + return -EINVAL; + + cmd->ccp = ccp; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + i = ccp->cmd_q_count; + + if (ccp->cmd_count >= MAX_CMD_QLEN) { + ret = -EBUSY; + if (cmd->flags & CCP_CMD_MAY_BACKLOG) + list_add_tail(&cmd->entry, &ccp->backlog); + } else { + ret = -EINPROGRESS; + ccp->cmd_count++; + list_add_tail(&cmd->entry, &ccp->cmd); + + /* Find an idle queue */ + if (!ccp->suspending) { + for (i = 0; i < ccp->cmd_q_count; i++) { + if (ccp->cmd_q[i].active) + continue; + + break; + } + } + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + /* If we found an idle queue, wake it up */ + if (i < ccp->cmd_q_count) + wake_up_process(ccp->cmd_q[i].kthread); + + return ret; +} +EXPORT_SYMBOL_GPL(ccp_enqueue_cmd); + +static void ccp_do_cmd_backlog(struct work_struct *work) +{ + struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work); + struct ccp_device *ccp = cmd->ccp; + unsigned long flags; + unsigned int i; + + cmd->callback(cmd->data, -EINPROGRESS); + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + ccp->cmd_count++; + list_add_tail(&cmd->entry, &ccp->cmd); + + /* Find an idle queue */ + for (i = 0; i < ccp->cmd_q_count; i++) { + if (ccp->cmd_q[i].active) + continue; + + break; + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + /* If we found an idle queue, wake it up */ + if (i < ccp->cmd_q_count) + wake_up_process(ccp->cmd_q[i].kthread); +} + +static struct ccp_cmd *ccp_dequeue_cmd(struct ccp_cmd_queue *cmd_q) +{ + struct ccp_device *ccp = cmd_q->ccp; + struct ccp_cmd *cmd = NULL; + struct ccp_cmd *backlog = NULL; + unsigned long flags; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + cmd_q->active = 0; + + if (ccp->suspending) { + cmd_q->suspended = 1; + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + wake_up_interruptible(&ccp->suspend_queue); + + return NULL; + } + + if (ccp->cmd_count) { + cmd_q->active = 1; + + cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); + list_del(&cmd->entry); + + ccp->cmd_count--; + } + + if (!list_empty(&ccp->backlog)) { + backlog = list_first_entry(&ccp->backlog, struct ccp_cmd, + entry); + list_del(&backlog->entry); + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + if (backlog) { + INIT_WORK(&backlog->work, ccp_do_cmd_backlog); + schedule_work(&backlog->work); + } + + return cmd; +} + +static void ccp_do_cmd_complete(unsigned long data) +{ + struct ccp_tasklet_data *tdata = (struct ccp_tasklet_data *)data; + struct ccp_cmd *cmd = tdata->cmd; + + cmd->callback(cmd->data, cmd->ret); + complete(&tdata->completion); +} + +static int ccp_cmd_queue_thread(void *data) +{ + struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data; + struct ccp_cmd *cmd; + struct ccp_tasklet_data tdata; + struct tasklet_struct tasklet; + + tasklet_init(&tasklet, ccp_do_cmd_complete, (unsigned long)&tdata); + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + + set_current_state(TASK_INTERRUPTIBLE); + + cmd = ccp_dequeue_cmd(cmd_q); + if (!cmd) + continue; + + __set_current_state(TASK_RUNNING); + + /* Execute the command */ + cmd->ret = ccp_run_cmd(cmd_q, cmd); + + /* Schedule the completion callback */ + tdata.cmd = cmd; + init_completion(&tdata.completion); + tasklet_schedule(&tasklet); + wait_for_completion(&tdata.completion); + } + + __set_current_state(TASK_RUNNING); + + return 0; +} + +static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait) +{ + struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng); + u32 trng_value; + int len = min_t(int, sizeof(trng_value), max); + + /* + * Locking is provided by the caller so we can update device + * hwrng-related fields safely + */ + trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG); + if (!trng_value) { + /* Zero is returned if not data is available or if a + * bad-entropy error is present. Assume an error if + * we exceed TRNG_RETRIES reads of zero. + */ + if (ccp->hwrng_retries++ > TRNG_RETRIES) + return -EIO; + + return 0; + } + + /* Reset the counter and save the rng value */ + ccp->hwrng_retries = 0; + memcpy(data, &trng_value, len); + + return len; +} + +/** + * ccp_alloc_struct - allocate and initialize the ccp_device struct + * + * @dev: device struct of the CCP + */ +struct ccp_device *ccp_alloc_struct(struct device *dev) +{ + struct ccp_device *ccp; + + ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL); + if (!ccp) + return NULL; + ccp->dev = dev; + + INIT_LIST_HEAD(&ccp->cmd); + INIT_LIST_HEAD(&ccp->backlog); + + spin_lock_init(&ccp->cmd_lock); + mutex_init(&ccp->req_mutex); + mutex_init(&ccp->ksb_mutex); + ccp->ksb_count = KSB_COUNT; + ccp->ksb_start = 0; + + return ccp; +} + +/** + * ccp_init - initialize the CCP device + * + * @ccp: ccp_device struct + */ +int ccp_init(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + struct ccp_cmd_queue *cmd_q; + struct dma_pool *dma_pool; + char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; + unsigned int qmr, qim, i; + int ret; + + /* Find available queues */ + qim = 0; + qmr = ioread32(ccp->io_regs + Q_MASK_REG); + for (i = 0; i < MAX_HW_QUEUES; i++) { + if (!(qmr & (1 << i))) + continue; + + /* Allocate a dma pool for this queue */ + snprintf(dma_pool_name, sizeof(dma_pool_name), "ccp_q%d", i); + dma_pool = dma_pool_create(dma_pool_name, dev, + CCP_DMAPOOL_MAX_SIZE, + CCP_DMAPOOL_ALIGN, 0); + if (!dma_pool) { + dev_err(dev, "unable to allocate dma pool\n"); + ret = -ENOMEM; + goto e_pool; + } + + cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; + ccp->cmd_q_count++; + + cmd_q->ccp = ccp; + cmd_q->id = i; + cmd_q->dma_pool = dma_pool; + + /* Reserve 2 KSB regions for the queue */ + cmd_q->ksb_key = KSB_START + ccp->ksb_start++; + cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++; + ccp->ksb_count -= 2; + + /* Preset some register values and masks that are queue + * number dependent + */ + cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE + + (CMD_Q_STATUS_INCR * i); + cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE + + (CMD_Q_STATUS_INCR * i); + cmd_q->int_ok = 1 << (i * 2); + cmd_q->int_err = 1 << ((i * 2) + 1); + + cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); + + init_waitqueue_head(&cmd_q->int_queue); + + /* Build queue interrupt mask (two interrupts per queue) */ + qim |= cmd_q->int_ok | cmd_q->int_err; + +#ifdef CONFIG_ARM64 + /* For arm64 set the recommended queue cache settings */ + iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE + + (CMD_Q_CACHE_INC * i)); +#endif + + dev_dbg(dev, "queue #%u available\n", i); + } + if (ccp->cmd_q_count == 0) { + dev_notice(dev, "no command queues available\n"); + ret = -EIO; + goto e_pool; + } + dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); + + /* Disable and clear interrupts until ready */ + iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + ioread32(cmd_q->reg_int_status); + ioread32(cmd_q->reg_status); + } + iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); + + /* Request an irq */ + ret = ccp->get_irq(ccp); + if (ret) { + dev_err(dev, "unable to allocate an IRQ\n"); + goto e_pool; + } + + /* Initialize the queues used to wait for KSB space and suspend */ + init_waitqueue_head(&ccp->ksb_queue); + init_waitqueue_head(&ccp->suspend_queue); + + /* Create a kthread for each queue */ + for (i = 0; i < ccp->cmd_q_count; i++) { + struct task_struct *kthread; + + cmd_q = &ccp->cmd_q[i]; + + kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, + "ccp-q%u", cmd_q->id); + if (IS_ERR(kthread)) { + dev_err(dev, "error creating queue thread (%ld)\n", + PTR_ERR(kthread)); + ret = PTR_ERR(kthread); + goto e_kthread; + } + + cmd_q->kthread = kthread; + wake_up_process(kthread); + } + + /* Register the RNG */ + ccp->hwrng.name = "ccp-rng"; + ccp->hwrng.read = ccp_trng_read; + ret = hwrng_register(&ccp->hwrng); + if (ret) { + dev_err(dev, "error registering hwrng (%d)\n", ret); + goto e_kthread; + } + + /* Make the device struct available before enabling interrupts */ + ccp_add_device(ccp); + + /* Enable interrupts */ + iowrite32(qim, ccp->io_regs + IRQ_MASK_REG); + + return 0; + +e_kthread: + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].kthread) + kthread_stop(ccp->cmd_q[i].kthread); + + ccp->free_irq(ccp); + +e_pool: + for (i = 0; i < ccp->cmd_q_count; i++) + dma_pool_destroy(ccp->cmd_q[i].dma_pool); + + return ret; +} + +/** + * ccp_destroy - tear down the CCP device + * + * @ccp: ccp_device struct + */ +void ccp_destroy(struct ccp_device *ccp) +{ + struct ccp_cmd_queue *cmd_q; + struct ccp_cmd *cmd; + unsigned int qim, i; + + /* Remove general access to the device struct */ + ccp_del_device(ccp); + + /* Unregister the RNG */ + hwrng_unregister(&ccp->hwrng); + + /* Stop the queue kthreads */ + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].kthread) + kthread_stop(ccp->cmd_q[i].kthread); + + /* Build queue interrupt mask (two interrupt masks per queue) */ + qim = 0; + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + qim |= cmd_q->int_ok | cmd_q->int_err; + } + + /* Disable and clear interrupts */ + iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + ioread32(cmd_q->reg_int_status); + ioread32(cmd_q->reg_status); + } + iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); + + ccp->free_irq(ccp); + + for (i = 0; i < ccp->cmd_q_count; i++) + dma_pool_destroy(ccp->cmd_q[i].dma_pool); + + /* Flush the cmd and backlog queue */ + while (!list_empty(&ccp->cmd)) { + /* Invoke the callback directly with an error code */ + cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); + list_del(&cmd->entry); + cmd->callback(cmd->data, -ENODEV); + } + while (!list_empty(&ccp->backlog)) { + /* Invoke the callback directly with an error code */ + cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); + list_del(&cmd->entry); + cmd->callback(cmd->data, -ENODEV); + } +} + +/** + * ccp_irq_handler - handle interrupts generated by the CCP device + * + * @irq: the irq associated with the interrupt + * @data: the data value supplied when the irq was created + */ +irqreturn_t ccp_irq_handler(int irq, void *data) +{ + struct device *dev = data; + struct ccp_device *ccp = dev_get_drvdata(dev); + struct ccp_cmd_queue *cmd_q; + u32 q_int, status; + unsigned int i; + + status = ioread32(ccp->io_regs + IRQ_STATUS_REG); + + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + q_int = status & (cmd_q->int_ok | cmd_q->int_err); + if (q_int) { + cmd_q->int_status = status; + cmd_q->q_status = ioread32(cmd_q->reg_status); + cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); + + /* On error, only save the first error value */ + if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error) + cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); + + cmd_q->int_rcvd = 1; + + /* Acknowledge the interrupt and wake the kthread */ + iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG); + wake_up_interruptible(&cmd_q->int_queue); + } + } + + return IRQ_HANDLED; +} + +#ifdef CONFIG_PM +bool ccp_queues_suspended(struct ccp_device *ccp) +{ + unsigned int suspended = 0; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].suspended) + suspended++; + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + return ccp->cmd_q_count == suspended; +} +#endif + +#ifdef CONFIG_X86 +static const struct x86_cpu_id ccp_support[] = { + { X86_VENDOR_AMD, 22, }, + { }, +}; +#endif + +static int __init ccp_mod_init(void) +{ +#ifdef CONFIG_X86 + struct cpuinfo_x86 *cpuinfo = &boot_cpu_data; + int ret; + + if (!x86_match_cpu(ccp_support)) + return -ENODEV; + + switch (cpuinfo->x86) { + case 22: + if ((cpuinfo->x86_model < 48) || (cpuinfo->x86_model > 63)) + return -ENODEV; + + ret = ccp_pci_init(); + if (ret) + return ret; + + /* Don't leave the driver loaded if init failed */ + if (!ccp_get_device()) { + ccp_pci_exit(); + return -ENODEV; + } + + return 0; + + break; + } +#endif + +#ifdef CONFIG_ARM64 + int ret; + + ret = ccp_platform_init(); + if (ret) + return ret; + + /* Don't leave the driver loaded if init failed */ + if (!ccp_get_device()) { + ccp_platform_exit(); + return -ENODEV; + } + + return 0; +#endif + + return -ENODEV; +} + +static void __exit ccp_mod_exit(void) +{ +#ifdef CONFIG_X86 + struct cpuinfo_x86 *cpuinfo = &boot_cpu_data; + + switch (cpuinfo->x86) { + case 22: + ccp_pci_exit(); + break; + } +#endif + +#ifdef CONFIG_ARM64 + ccp_platform_exit(); +#endif +} + +module_init(ccp_mod_init); +module_exit(ccp_mod_exit); diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h new file mode 100644 index 000000000..6ff89031f --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev.h @@ -0,0 +1,276 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CCP_DEV_H__ +#define __CCP_DEV_H__ + +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/dmapool.h> +#include <linux/hw_random.h> +#include <linux/bitops.h> + +#define MAX_DMAPOOL_NAME_LEN 32 + +#define MAX_HW_QUEUES 5 +#define MAX_CMD_QLEN 100 + +#define TRNG_RETRIES 10 + +#define CACHE_NONE 0x00 +#define CACHE_WB_NO_ALLOC 0xb7 + +/****** Register Mappings ******/ +#define Q_MASK_REG 0x000 +#define TRNG_OUT_REG 0x00c +#define IRQ_MASK_REG 0x040 +#define IRQ_STATUS_REG 0x200 + +#define DEL_CMD_Q_JOB 0x124 +#define DEL_Q_ACTIVE 0x00000200 +#define DEL_Q_ID_SHIFT 6 + +#define CMD_REQ0 0x180 +#define CMD_REQ_INCR 0x04 + +#define CMD_Q_STATUS_BASE 0x210 +#define CMD_Q_INT_STATUS_BASE 0x214 +#define CMD_Q_STATUS_INCR 0x20 + +#define CMD_Q_CACHE_BASE 0x228 +#define CMD_Q_CACHE_INC 0x20 + +#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f) +#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f) + +/****** REQ0 Related Values ******/ +#define REQ0_WAIT_FOR_WRITE 0x00000004 +#define REQ0_INT_ON_COMPLETE 0x00000002 +#define REQ0_STOP_ON_COMPLETE 0x00000001 + +#define REQ0_CMD_Q_SHIFT 9 +#define REQ0_JOBID_SHIFT 3 + +/****** REQ1 Related Values ******/ +#define REQ1_PROTECT_SHIFT 27 +#define REQ1_ENGINE_SHIFT 23 +#define REQ1_KEY_KSB_SHIFT 2 + +#define REQ1_EOM 0x00000002 +#define REQ1_INIT 0x00000001 + +/* AES Related Values */ +#define REQ1_AES_TYPE_SHIFT 21 +#define REQ1_AES_MODE_SHIFT 18 +#define REQ1_AES_ACTION_SHIFT 17 +#define REQ1_AES_CFB_SIZE_SHIFT 10 + +/* XTS-AES Related Values */ +#define REQ1_XTS_AES_SIZE_SHIFT 10 + +/* SHA Related Values */ +#define REQ1_SHA_TYPE_SHIFT 21 + +/* RSA Related Values */ +#define REQ1_RSA_MOD_SIZE_SHIFT 10 + +/* Pass-Through Related Values */ +#define REQ1_PT_BW_SHIFT 12 +#define REQ1_PT_BS_SHIFT 10 + +/* ECC Related Values */ +#define REQ1_ECC_AFFINE_CONVERT 0x00200000 +#define REQ1_ECC_FUNCTION_SHIFT 18 + +/****** REQ4 Related Values ******/ +#define REQ4_KSB_SHIFT 18 +#define REQ4_MEMTYPE_SHIFT 16 + +/****** REQ6 Related Values ******/ +#define REQ6_MEMTYPE_SHIFT 16 + +/****** Key Storage Block ******/ +#define KSB_START 77 +#define KSB_END 127 +#define KSB_COUNT (KSB_END - KSB_START + 1) +#define CCP_KSB_BITS 256 +#define CCP_KSB_BYTES 32 + +#define CCP_JOBID_MASK 0x0000003f + +#define CCP_DMAPOOL_MAX_SIZE 64 +#define CCP_DMAPOOL_ALIGN BIT(5) + +#define CCP_REVERSE_BUF_SIZE 64 + +#define CCP_AES_KEY_KSB_COUNT 1 +#define CCP_AES_CTX_KSB_COUNT 1 + +#define CCP_XTS_AES_KEY_KSB_COUNT 1 +#define CCP_XTS_AES_CTX_KSB_COUNT 1 + +#define CCP_SHA_KSB_COUNT 1 + +#define CCP_RSA_MAX_WIDTH 4096 + +#define CCP_PASSTHRU_BLOCKSIZE 256 +#define CCP_PASSTHRU_MASKSIZE 32 +#define CCP_PASSTHRU_KSB_COUNT 1 + +#define CCP_ECC_MODULUS_BYTES 48 /* 384-bits */ +#define CCP_ECC_MAX_OPERANDS 6 +#define CCP_ECC_MAX_OUTPUTS 3 +#define CCP_ECC_SRC_BUF_SIZE 448 +#define CCP_ECC_DST_BUF_SIZE 192 +#define CCP_ECC_OPERAND_SIZE 64 +#define CCP_ECC_OUTPUT_SIZE 64 +#define CCP_ECC_RESULT_OFFSET 60 +#define CCP_ECC_RESULT_SUCCESS 0x0001 + +struct ccp_device; +struct ccp_cmd; + +struct ccp_cmd_queue { + struct ccp_device *ccp; + + /* Queue identifier */ + u32 id; + + /* Queue dma pool */ + struct dma_pool *dma_pool; + + /* Queue reserved KSB regions */ + u32 ksb_key; + u32 ksb_ctx; + + /* Queue processing thread */ + struct task_struct *kthread; + unsigned int active; + unsigned int suspended; + + /* Number of free command slots available */ + unsigned int free_slots; + + /* Interrupt masks */ + u32 int_ok; + u32 int_err; + + /* Register addresses for queue */ + void __iomem *reg_status; + void __iomem *reg_int_status; + + /* Status values from job */ + u32 int_status; + u32 q_status; + u32 q_int_status; + u32 cmd_error; + + /* Interrupt wait queue */ + wait_queue_head_t int_queue; + unsigned int int_rcvd; +} ____cacheline_aligned; + +struct ccp_device { + struct device *dev; + + /* + * Bus specific device information + */ + void *dev_specific; + int (*get_irq)(struct ccp_device *ccp); + void (*free_irq)(struct ccp_device *ccp); + unsigned int irq; + + /* + * I/O area used for device communication. The register mapping + * starts at an offset into the mapped bar. + * The CMD_REQx registers and the Delete_Cmd_Queue_Job register + * need to be protected while a command queue thread is accessing + * them. + */ + struct mutex req_mutex ____cacheline_aligned; + void __iomem *io_map; + void __iomem *io_regs; + + /* + * Master lists that all cmds are queued on. Because there can be + * more than one CCP command queue that can process a cmd a separate + * backlog list is neeeded so that the backlog completion call + * completes before the cmd is available for execution. + */ + spinlock_t cmd_lock ____cacheline_aligned; + unsigned int cmd_count; + struct list_head cmd; + struct list_head backlog; + + /* + * The command queues. These represent the queues available on the + * CCP that are available for processing cmds + */ + struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES]; + unsigned int cmd_q_count; + + /* + * Support for the CCP True RNG + */ + struct hwrng hwrng; + unsigned int hwrng_retries; + + /* + * A counter used to generate job-ids for cmds submitted to the CCP + */ + atomic_t current_id ____cacheline_aligned; + + /* + * The CCP uses key storage blocks (KSB) to maintain context for certain + * operations. To prevent multiple cmds from using the same KSB range + * a command queue reserves a KSB range for the duration of the cmd. + * Each queue, will however, reserve 2 KSB blocks for operations that + * only require single KSB entries (eg. AES context/iv and key) in order + * to avoid allocation contention. This will reserve at most 10 KSB + * entries, leaving 40 KSB entries available for dynamic allocation. + */ + struct mutex ksb_mutex ____cacheline_aligned; + DECLARE_BITMAP(ksb, KSB_COUNT); + wait_queue_head_t ksb_queue; + unsigned int ksb_avail; + unsigned int ksb_count; + u32 ksb_start; + + /* Suspend support */ + unsigned int suspending; + wait_queue_head_t suspend_queue; + + /* DMA caching attribute support */ + unsigned int axcache; +}; + +int ccp_pci_init(void); +void ccp_pci_exit(void); + +int ccp_platform_init(void); +void ccp_platform_exit(void); + +struct ccp_device *ccp_alloc_struct(struct device *dev); +int ccp_init(struct ccp_device *ccp); +void ccp_destroy(struct ccp_device *ccp); +bool ccp_queues_suspended(struct ccp_device *ccp); + +irqreturn_t ccp_irq_handler(int irq, void *data); + +int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd); + +#endif diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c new file mode 100644 index 000000000..71f2e3c89 --- /dev/null +++ b/drivers/crypto/ccp/ccp-ops.c @@ -0,0 +1,2126 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/ccp.h> +#include <linux/scatterlist.h> +#include <crypto/scatterwalk.h> +#include <crypto/sha.h> + +#include "ccp-dev.h" + +enum ccp_memtype { + CCP_MEMTYPE_SYSTEM = 0, + CCP_MEMTYPE_KSB, + CCP_MEMTYPE_LOCAL, + CCP_MEMTYPE__LAST, +}; + +struct ccp_dma_info { + dma_addr_t address; + unsigned int offset; + unsigned int length; + enum dma_data_direction dir; +}; + +struct ccp_dm_workarea { + struct device *dev; + struct dma_pool *dma_pool; + unsigned int length; + + u8 *address; + struct ccp_dma_info dma; +}; + +struct ccp_sg_workarea { + struct scatterlist *sg; + unsigned int nents; + unsigned int length; + + struct scatterlist *dma_sg; + struct device *dma_dev; + unsigned int dma_count; + enum dma_data_direction dma_dir; + + unsigned int sg_used; + + u64 bytes_left; +}; + +struct ccp_data { + struct ccp_sg_workarea sg_wa; + struct ccp_dm_workarea dm_wa; +}; + +struct ccp_mem { + enum ccp_memtype type; + union { + struct ccp_dma_info dma; + u32 ksb; + } u; +}; + +struct ccp_aes_op { + enum ccp_aes_type type; + enum ccp_aes_mode mode; + enum ccp_aes_action action; +}; + +struct ccp_xts_aes_op { + enum ccp_aes_action action; + enum ccp_xts_aes_unit_size unit_size; +}; + +struct ccp_sha_op { + enum ccp_sha_type type; + u64 msg_bits; +}; + +struct ccp_rsa_op { + u32 mod_size; + u32 input_len; +}; + +struct ccp_passthru_op { + enum ccp_passthru_bitwise bit_mod; + enum ccp_passthru_byteswap byte_swap; +}; + +struct ccp_ecc_op { + enum ccp_ecc_function function; +}; + +struct ccp_op { + struct ccp_cmd_queue *cmd_q; + + u32 jobid; + u32 ioc; + u32 soc; + u32 ksb_key; + u32 ksb_ctx; + u32 init; + u32 eom; + + struct ccp_mem src; + struct ccp_mem dst; + + union { + struct ccp_aes_op aes; + struct ccp_xts_aes_op xts; + struct ccp_sha_op sha; + struct ccp_rsa_op rsa; + struct ccp_passthru_op passthru; + struct ccp_ecc_op ecc; + } u; +}; + +/* SHA initial context values */ +static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { + cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1), + cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3), + cpu_to_be32(SHA1_H4), 0, 0, 0, +}; + +static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { + cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1), + cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3), + cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5), + cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7), +}; + +static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { + cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1), + cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3), + cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5), + cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7), +}; + +/* The CCP cannot perform zero-length sha operations so the caller + * is required to buffer data for the final operation. However, a + * sha operation for a message with a total length of zero is valid + * so known values are required to supply the result. + */ +static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = { + 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, + 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, + 0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = { + 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, + 0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, + 0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, + 0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = { + 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, + 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, + 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, + 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55, +}; + +static u32 ccp_addr_lo(struct ccp_dma_info *info) +{ + return lower_32_bits(info->address + info->offset); +} + +static u32 ccp_addr_hi(struct ccp_dma_info *info) +{ + return upper_32_bits(info->address + info->offset) & 0x0000ffff; +} + +static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count) +{ + struct ccp_cmd_queue *cmd_q = op->cmd_q; + struct ccp_device *ccp = cmd_q->ccp; + void __iomem *cr_addr; + u32 cr0, cmd; + unsigned int i; + int ret = 0; + + /* We could read a status register to see how many free slots + * are actually available, but reading that register resets it + * and you could lose some error information. + */ + cmd_q->free_slots--; + + cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT) + | (op->jobid << REQ0_JOBID_SHIFT) + | REQ0_WAIT_FOR_WRITE; + + if (op->soc) + cr0 |= REQ0_STOP_ON_COMPLETE + | REQ0_INT_ON_COMPLETE; + + if (op->ioc || !cmd_q->free_slots) + cr0 |= REQ0_INT_ON_COMPLETE; + + /* Start at CMD_REQ1 */ + cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR; + + mutex_lock(&ccp->req_mutex); + + /* Write CMD_REQ1 through CMD_REQx first */ + for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR) + iowrite32(*(cr + i), cr_addr); + + /* Tell the CCP to start */ + wmb(); + iowrite32(cr0, ccp->io_regs + CMD_REQ0); + + mutex_unlock(&ccp->req_mutex); + + if (cr0 & REQ0_INT_ON_COMPLETE) { + /* Wait for the job to complete */ + ret = wait_event_interruptible(cmd_q->int_queue, + cmd_q->int_rcvd); + if (ret || cmd_q->cmd_error) { + /* On error delete all related jobs from the queue */ + cmd = (cmd_q->id << DEL_Q_ID_SHIFT) + | op->jobid; + + iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); + + if (!ret) + ret = -EIO; + } else if (op->soc) { + /* Delete just head job from the queue on SoC */ + cmd = DEL_Q_ACTIVE + | (cmd_q->id << DEL_Q_ID_SHIFT) + | op->jobid; + + iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); + } + + cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status); + + cmd_q->int_rcvd = 0; + } + + return ret; +} + +static int ccp_perform_aes(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT) + | (op->u.aes.type << REQ1_AES_TYPE_SHIFT) + | (op->u.aes.mode << REQ1_AES_MODE_SHIFT) + | (op->u.aes.action << REQ1_AES_ACTION_SHIFT) + | (op->ksb_key << REQ1_KEY_KSB_SHIFT); + cr[1] = op->src.u.dma.length - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) + | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + + if (op->u.aes.mode == CCP_AES_MODE_CFB) + cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT); + + if (op->eom) + cr[0] |= REQ1_EOM; + + if (op->init) + cr[0] |= REQ1_INIT; + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_xts_aes(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT) + | (op->u.xts.action << REQ1_AES_ACTION_SHIFT) + | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT) + | (op->ksb_key << REQ1_KEY_KSB_SHIFT); + cr[1] = op->src.u.dma.length - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) + | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + + if (op->eom) + cr[0] |= REQ1_EOM; + + if (op->init) + cr[0] |= REQ1_INIT; + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_sha(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT) + | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT) + | REQ1_INIT; + cr[1] = op->src.u.dma.length - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) + | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + + if (op->eom) { + cr[0] |= REQ1_EOM; + cr[4] = lower_32_bits(op->u.sha.msg_bits); + cr[5] = upper_32_bits(op->u.sha.msg_bits); + } else { + cr[4] = 0; + cr[5] = 0; + } + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_rsa(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT) + | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT) + | (op->ksb_key << REQ1_KEY_KSB_SHIFT) + | REQ1_EOM; + cr[1] = op->u.rsa.input_len - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) + | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_passthru(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT) + | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT) + | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT); + + if (op->src.type == CCP_MEMTYPE_SYSTEM) + cr[1] = op->src.u.dma.length - 1; + else + cr[1] = op->dst.u.dma.length - 1; + + if (op->src.type == CCP_MEMTYPE_SYSTEM) { + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + + if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) + cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT); + } else { + cr[2] = op->src.u.ksb * CCP_KSB_BYTES; + cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT); + } + + if (op->dst.type == CCP_MEMTYPE_SYSTEM) { + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + } else { + cr[4] = op->dst.u.ksb * CCP_KSB_BYTES; + cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT); + } + + if (op->eom) + cr[0] |= REQ1_EOM; + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_ecc(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = REQ1_ECC_AFFINE_CONVERT + | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT) + | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT) + | REQ1_EOM; + cr[1] = op->src.u.dma.length - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count) +{ + int start; + + for (;;) { + mutex_lock(&ccp->ksb_mutex); + + start = (u32)bitmap_find_next_zero_area(ccp->ksb, + ccp->ksb_count, + ccp->ksb_start, + count, 0); + if (start <= ccp->ksb_count) { + bitmap_set(ccp->ksb, start, count); + + mutex_unlock(&ccp->ksb_mutex); + break; + } + + ccp->ksb_avail = 0; + + mutex_unlock(&ccp->ksb_mutex); + + /* Wait for KSB entries to become available */ + if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail)) + return 0; + } + + return KSB_START + start; +} + +static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start, + unsigned int count) +{ + if (!start) + return; + + mutex_lock(&ccp->ksb_mutex); + + bitmap_clear(ccp->ksb, start - KSB_START, count); + + ccp->ksb_avail = 1; + + mutex_unlock(&ccp->ksb_mutex); + + wake_up_interruptible_all(&ccp->ksb_queue); +} + +static u32 ccp_gen_jobid(struct ccp_device *ccp) +{ + return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK; +} + +static void ccp_sg_free(struct ccp_sg_workarea *wa) +{ + if (wa->dma_count) + dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir); + + wa->dma_count = 0; +} + +static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev, + struct scatterlist *sg, u64 len, + enum dma_data_direction dma_dir) +{ + memset(wa, 0, sizeof(*wa)); + + wa->sg = sg; + if (!sg) + return 0; + + wa->nents = sg_nents(sg); + wa->length = sg->length; + wa->bytes_left = len; + wa->sg_used = 0; + + if (len == 0) + return 0; + + if (dma_dir == DMA_NONE) + return 0; + + wa->dma_sg = sg; + wa->dma_dev = dev; + wa->dma_dir = dma_dir; + wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir); + if (!wa->dma_count) + return -ENOMEM; + + return 0; +} + +static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len) +{ + unsigned int nbytes = min_t(u64, len, wa->bytes_left); + + if (!wa->sg) + return; + + wa->sg_used += nbytes; + wa->bytes_left -= nbytes; + if (wa->sg_used == wa->sg->length) { + wa->sg = sg_next(wa->sg); + wa->sg_used = 0; + } +} + +static void ccp_dm_free(struct ccp_dm_workarea *wa) +{ + if (wa->length <= CCP_DMAPOOL_MAX_SIZE) { + if (wa->address) + dma_pool_free(wa->dma_pool, wa->address, + wa->dma.address); + } else { + if (wa->dma.address) + dma_unmap_single(wa->dev, wa->dma.address, wa->length, + wa->dma.dir); + kfree(wa->address); + } + + wa->address = NULL; + wa->dma.address = 0; +} + +static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa, + struct ccp_cmd_queue *cmd_q, + unsigned int len, + enum dma_data_direction dir) +{ + memset(wa, 0, sizeof(*wa)); + + if (!len) + return 0; + + wa->dev = cmd_q->ccp->dev; + wa->length = len; + + if (len <= CCP_DMAPOOL_MAX_SIZE) { + wa->dma_pool = cmd_q->dma_pool; + + wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL, + &wa->dma.address); + if (!wa->address) + return -ENOMEM; + + wa->dma.length = CCP_DMAPOOL_MAX_SIZE; + + memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE); + } else { + wa->address = kzalloc(len, GFP_KERNEL); + if (!wa->address) + return -ENOMEM; + + wa->dma.address = dma_map_single(wa->dev, wa->address, len, + dir); + if (!wa->dma.address) + return -ENOMEM; + + wa->dma.length = len; + } + wa->dma.dir = dir; + + return 0; +} + +static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset, + struct scatterlist *sg, unsigned int sg_offset, + unsigned int len) +{ + WARN_ON(!wa->address); + + scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len, + 0); +} + +static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset, + struct scatterlist *sg, unsigned int sg_offset, + unsigned int len) +{ + WARN_ON(!wa->address); + + scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len, + 1); +} + +static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa, + struct scatterlist *sg, + unsigned int len, unsigned int se_len, + bool sign_extend) +{ + unsigned int nbytes, sg_offset, dm_offset, ksb_len, i; + u8 buffer[CCP_REVERSE_BUF_SIZE]; + + BUG_ON(se_len > sizeof(buffer)); + + sg_offset = len; + dm_offset = 0; + nbytes = len; + while (nbytes) { + ksb_len = min_t(unsigned int, nbytes, se_len); + sg_offset -= ksb_len; + + scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0); + for (i = 0; i < ksb_len; i++) + wa->address[dm_offset + i] = buffer[ksb_len - i - 1]; + + dm_offset += ksb_len; + nbytes -= ksb_len; + + if ((ksb_len != se_len) && sign_extend) { + /* Must sign-extend to nearest sign-extend length */ + if (wa->address[dm_offset - 1] & 0x80) + memset(wa->address + dm_offset, 0xff, + se_len - ksb_len); + } + } +} + +static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa, + struct scatterlist *sg, + unsigned int len) +{ + unsigned int nbytes, sg_offset, dm_offset, ksb_len, i; + u8 buffer[CCP_REVERSE_BUF_SIZE]; + + sg_offset = 0; + dm_offset = len; + nbytes = len; + while (nbytes) { + ksb_len = min_t(unsigned int, nbytes, sizeof(buffer)); + dm_offset -= ksb_len; + + for (i = 0; i < ksb_len; i++) + buffer[ksb_len - i - 1] = wa->address[dm_offset + i]; + scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1); + + sg_offset += ksb_len; + nbytes -= ksb_len; + } +} + +static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q) +{ + ccp_dm_free(&data->dm_wa); + ccp_sg_free(&data->sg_wa); +} + +static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q, + struct scatterlist *sg, u64 sg_len, + unsigned int dm_len, + enum dma_data_direction dir) +{ + int ret; + + memset(data, 0, sizeof(*data)); + + ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len, + dir); + if (ret) + goto e_err; + + ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir); + if (ret) + goto e_err; + + return 0; + +e_err: + ccp_free_data(data, cmd_q); + + return ret; +} + +static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from) +{ + struct ccp_sg_workarea *sg_wa = &data->sg_wa; + struct ccp_dm_workarea *dm_wa = &data->dm_wa; + unsigned int buf_count, nbytes; + + /* Clear the buffer if setting it */ + if (!from) + memset(dm_wa->address, 0, dm_wa->length); + + if (!sg_wa->sg) + return 0; + + /* Perform the copy operation + * nbytes will always be <= UINT_MAX because dm_wa->length is + * an unsigned int + */ + nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length); + scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used, + nbytes, from); + + /* Update the structures and generate the count */ + buf_count = 0; + while (sg_wa->bytes_left && (buf_count < dm_wa->length)) { + nbytes = min(sg_wa->sg->length - sg_wa->sg_used, + dm_wa->length - buf_count); + nbytes = min_t(u64, sg_wa->bytes_left, nbytes); + + buf_count += nbytes; + ccp_update_sg_workarea(sg_wa, nbytes); + } + + return buf_count; +} + +static unsigned int ccp_fill_queue_buf(struct ccp_data *data) +{ + return ccp_queue_buf(data, 0); +} + +static unsigned int ccp_empty_queue_buf(struct ccp_data *data) +{ + return ccp_queue_buf(data, 1); +} + +static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst, + struct ccp_op *op, unsigned int block_size, + bool blocksize_op) +{ + unsigned int sg_src_len, sg_dst_len, op_len; + + /* The CCP can only DMA from/to one address each per operation. This + * requires that we find the smallest DMA area between the source + * and destination. The resulting len values will always be <= UINT_MAX + * because the dma length is an unsigned int. + */ + sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used; + sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len); + + if (dst) { + sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used; + sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len); + op_len = min(sg_src_len, sg_dst_len); + } else { + op_len = sg_src_len; + } + + /* The data operation length will be at least block_size in length + * or the smaller of available sg room remaining for the source or + * the destination + */ + op_len = max(op_len, block_size); + + /* Unless we have to buffer data, there's no reason to wait */ + op->soc = 0; + + if (sg_src_len < block_size) { + /* Not enough data in the sg element, so it + * needs to be buffered into a blocksize chunk + */ + int cp_len = ccp_fill_queue_buf(src); + + op->soc = 1; + op->src.u.dma.address = src->dm_wa.dma.address; + op->src.u.dma.offset = 0; + op->src.u.dma.length = (blocksize_op) ? block_size : cp_len; + } else { + /* Enough data in the sg element, but we need to + * adjust for any previously copied data + */ + op->src.u.dma.address = sg_dma_address(src->sg_wa.sg); + op->src.u.dma.offset = src->sg_wa.sg_used; + op->src.u.dma.length = op_len & ~(block_size - 1); + + ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length); + } + + if (dst) { + if (sg_dst_len < block_size) { + /* Not enough room in the sg element or we're on the + * last piece of data (when using padding), so the + * output needs to be buffered into a blocksize chunk + */ + op->soc = 1; + op->dst.u.dma.address = dst->dm_wa.dma.address; + op->dst.u.dma.offset = 0; + op->dst.u.dma.length = op->src.u.dma.length; + } else { + /* Enough room in the sg element, but we need to + * adjust for any previously used area + */ + op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg); + op->dst.u.dma.offset = dst->sg_wa.sg_used; + op->dst.u.dma.length = op->src.u.dma.length; + } + } +} + +static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst, + struct ccp_op *op) +{ + op->init = 0; + + if (dst) { + if (op->dst.u.dma.address == dst->dm_wa.dma.address) + ccp_empty_queue_buf(dst); + else + ccp_update_sg_workarea(&dst->sg_wa, + op->dst.u.dma.length); + } +} + +static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q, + struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, + u32 byte_swap, bool from) +{ + struct ccp_op op; + + memset(&op, 0, sizeof(op)); + + op.cmd_q = cmd_q; + op.jobid = jobid; + op.eom = 1; + + if (from) { + op.soc = 1; + op.src.type = CCP_MEMTYPE_KSB; + op.src.u.ksb = ksb; + op.dst.type = CCP_MEMTYPE_SYSTEM; + op.dst.u.dma.address = wa->dma.address; + op.dst.u.dma.length = wa->length; + } else { + op.src.type = CCP_MEMTYPE_SYSTEM; + op.src.u.dma.address = wa->dma.address; + op.src.u.dma.length = wa->length; + op.dst.type = CCP_MEMTYPE_KSB; + op.dst.u.ksb = ksb; + } + + op.u.passthru.byte_swap = byte_swap; + + return ccp_perform_passthru(&op); +} + +static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q, + struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, + u32 byte_swap) +{ + return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false); +} + +static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q, + struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, + u32 byte_swap) +{ + return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true); +} + +static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, + struct ccp_cmd *cmd) +{ + struct ccp_aes_engine *aes = &cmd->u.aes; + struct ccp_dm_workarea key, ctx; + struct ccp_data src; + struct ccp_op op; + unsigned int dm_offset; + int ret; + + if (!((aes->key_len == AES_KEYSIZE_128) || + (aes->key_len == AES_KEYSIZE_192) || + (aes->key_len == AES_KEYSIZE_256))) + return -EINVAL; + + if (aes->src_len & (AES_BLOCK_SIZE - 1)) + return -EINVAL; + + if (aes->iv_len != AES_BLOCK_SIZE) + return -EINVAL; + + if (!aes->key || !aes->iv || !aes->src) + return -EINVAL; + + if (aes->cmac_final) { + if (aes->cmac_key_len != AES_BLOCK_SIZE) + return -EINVAL; + + if (!aes->cmac_key) + return -EINVAL; + } + + BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1); + BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1); + + ret = -EIO; + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = ccp_gen_jobid(cmd_q->ccp); + op.ksb_key = cmd_q->ksb_key; + op.ksb_ctx = cmd_q->ksb_ctx; + op.init = 1; + op.u.aes.type = aes->type; + op.u.aes.mode = aes->mode; + op.u.aes.action = aes->action; + + /* All supported key sizes fit in a single (32-byte) KSB entry + * and must be in little endian format. Use the 256-bit byte + * swap passthru option to convert from big endian to little + * endian. + */ + ret = ccp_init_dm_workarea(&key, cmd_q, + CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, + DMA_TO_DEVICE); + if (ret) + return ret; + + dm_offset = CCP_KSB_BYTES - aes->key_len; + ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); + ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_key; + } + + /* The AES context fits in a single (32-byte) KSB entry and + * must be in little endian format. Use the 256-bit byte swap + * passthru option to convert from big endian to little endian. + */ + ret = ccp_init_dm_workarea(&ctx, cmd_q, + CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, + DMA_BIDIRECTIONAL); + if (ret) + goto e_key; + + dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; + ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_ctx; + } + + /* Send data to the CCP AES engine */ + ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len, + AES_BLOCK_SIZE, DMA_TO_DEVICE); + if (ret) + goto e_ctx; + + while (src.sg_wa.bytes_left) { + ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true); + if (aes->cmac_final && !src.sg_wa.bytes_left) { + op.eom = 1; + + /* Push the K1/K2 key to the CCP now */ + ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, + op.ksb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_src; + } + + ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0, + aes->cmac_key_len); + ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_src; + } + } + + ret = ccp_perform_aes(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_src; + } + + ccp_process_data(&src, NULL, &op); + } + + /* Retrieve the AES context - convert from LE to BE using + * 32-byte (256-bit) byteswapping + */ + ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_src; + } + + /* ...but we only need AES_BLOCK_SIZE bytes */ + dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; + ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + +e_src: + ccp_free_data(&src, cmd_q); + +e_ctx: + ccp_dm_free(&ctx); + +e_key: + ccp_dm_free(&key); + + return ret; +} + +static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_aes_engine *aes = &cmd->u.aes; + struct ccp_dm_workarea key, ctx; + struct ccp_data src, dst; + struct ccp_op op; + unsigned int dm_offset; + bool in_place = false; + int ret; + + if (aes->mode == CCP_AES_MODE_CMAC) + return ccp_run_aes_cmac_cmd(cmd_q, cmd); + + if (!((aes->key_len == AES_KEYSIZE_128) || + (aes->key_len == AES_KEYSIZE_192) || + (aes->key_len == AES_KEYSIZE_256))) + return -EINVAL; + + if (((aes->mode == CCP_AES_MODE_ECB) || + (aes->mode == CCP_AES_MODE_CBC) || + (aes->mode == CCP_AES_MODE_CFB)) && + (aes->src_len & (AES_BLOCK_SIZE - 1))) + return -EINVAL; + + if (!aes->key || !aes->src || !aes->dst) + return -EINVAL; + + if (aes->mode != CCP_AES_MODE_ECB) { + if (aes->iv_len != AES_BLOCK_SIZE) + return -EINVAL; + + if (!aes->iv) + return -EINVAL; + } + + BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1); + BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1); + + ret = -EIO; + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = ccp_gen_jobid(cmd_q->ccp); + op.ksb_key = cmd_q->ksb_key; + op.ksb_ctx = cmd_q->ksb_ctx; + op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1; + op.u.aes.type = aes->type; + op.u.aes.mode = aes->mode; + op.u.aes.action = aes->action; + + /* All supported key sizes fit in a single (32-byte) KSB entry + * and must be in little endian format. Use the 256-bit byte + * swap passthru option to convert from big endian to little + * endian. + */ + ret = ccp_init_dm_workarea(&key, cmd_q, + CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, + DMA_TO_DEVICE); + if (ret) + return ret; + + dm_offset = CCP_KSB_BYTES - aes->key_len; + ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); + ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_key; + } + + /* The AES context fits in a single (32-byte) KSB entry and + * must be in little endian format. Use the 256-bit byte swap + * passthru option to convert from big endian to little endian. + */ + ret = ccp_init_dm_workarea(&ctx, cmd_q, + CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, + DMA_BIDIRECTIONAL); + if (ret) + goto e_key; + + if (aes->mode != CCP_AES_MODE_ECB) { + /* Load the AES context - conver to LE */ + dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; + ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_ctx; + } + } + + /* Prepare the input and output data workareas. For in-place + * operations we need to set the dma direction to BIDIRECTIONAL + * and copy the src workarea to the dst workarea. + */ + if (sg_virt(aes->src) == sg_virt(aes->dst)) + in_place = true; + + ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len, + AES_BLOCK_SIZE, + in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); + if (ret) + goto e_ctx; + + if (in_place) { + dst = src; + } else { + ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len, + AES_BLOCK_SIZE, DMA_FROM_DEVICE); + if (ret) + goto e_src; + } + + /* Send data to the CCP AES engine */ + while (src.sg_wa.bytes_left) { + ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true); + if (!src.sg_wa.bytes_left) { + op.eom = 1; + + /* Since we don't retrieve the AES context in ECB + * mode we have to wait for the operation to complete + * on the last piece of data + */ + if (aes->mode == CCP_AES_MODE_ECB) + op.soc = 1; + } + + ret = ccp_perform_aes(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ccp_process_data(&src, &dst, &op); + } + + if (aes->mode != CCP_AES_MODE_ECB) { + /* Retrieve the AES context - convert from LE to BE using + * 32-byte (256-bit) byteswapping + */ + ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + /* ...but we only need AES_BLOCK_SIZE bytes */ + dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; + ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + } + +e_dst: + if (!in_place) + ccp_free_data(&dst, cmd_q); + +e_src: + ccp_free_data(&src, cmd_q); + +e_ctx: + ccp_dm_free(&ctx); + +e_key: + ccp_dm_free(&key); + + return ret; +} + +static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, + struct ccp_cmd *cmd) +{ + struct ccp_xts_aes_engine *xts = &cmd->u.xts; + struct ccp_dm_workarea key, ctx; + struct ccp_data src, dst; + struct ccp_op op; + unsigned int unit_size, dm_offset; + bool in_place = false; + int ret; + + switch (xts->unit_size) { + case CCP_XTS_AES_UNIT_SIZE_16: + unit_size = 16; + break; + case CCP_XTS_AES_UNIT_SIZE_512: + unit_size = 512; + break; + case CCP_XTS_AES_UNIT_SIZE_1024: + unit_size = 1024; + break; + case CCP_XTS_AES_UNIT_SIZE_2048: + unit_size = 2048; + break; + case CCP_XTS_AES_UNIT_SIZE_4096: + unit_size = 4096; + break; + + default: + return -EINVAL; + } + + if (xts->key_len != AES_KEYSIZE_128) + return -EINVAL; + + if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1))) + return -EINVAL; + + if (xts->iv_len != AES_BLOCK_SIZE) + return -EINVAL; + + if (!xts->key || !xts->iv || !xts->src || !xts->dst) + return -EINVAL; + + BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1); + BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1); + + ret = -EIO; + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = ccp_gen_jobid(cmd_q->ccp); + op.ksb_key = cmd_q->ksb_key; + op.ksb_ctx = cmd_q->ksb_ctx; + op.init = 1; + op.u.xts.action = xts->action; + op.u.xts.unit_size = xts->unit_size; + + /* All supported key sizes fit in a single (32-byte) KSB entry + * and must be in little endian format. Use the 256-bit byte + * swap passthru option to convert from big endian to little + * endian. + */ + ret = ccp_init_dm_workarea(&key, cmd_q, + CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, + DMA_TO_DEVICE); + if (ret) + return ret; + + dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128; + ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len); + ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len); + ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_key; + } + + /* The AES context fits in a single (32-byte) KSB entry and + * for XTS is already in little endian format so no byte swapping + * is needed. + */ + ret = ccp_init_dm_workarea(&ctx, cmd_q, + CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, + DMA_BIDIRECTIONAL); + if (ret) + goto e_key; + + ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len); + ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, + CCP_PASSTHRU_BYTESWAP_NOOP); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_ctx; + } + + /* Prepare the input and output data workareas. For in-place + * operations we need to set the dma direction to BIDIRECTIONAL + * and copy the src workarea to the dst workarea. + */ + if (sg_virt(xts->src) == sg_virt(xts->dst)) + in_place = true; + + ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len, + unit_size, + in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); + if (ret) + goto e_ctx; + + if (in_place) { + dst = src; + } else { + ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len, + unit_size, DMA_FROM_DEVICE); + if (ret) + goto e_src; + } + + /* Send data to the CCP AES engine */ + while (src.sg_wa.bytes_left) { + ccp_prepare_data(&src, &dst, &op, unit_size, true); + if (!src.sg_wa.bytes_left) + op.eom = 1; + + ret = ccp_perform_xts_aes(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ccp_process_data(&src, &dst, &op); + } + + /* Retrieve the AES context - convert from LE to BE using + * 32-byte (256-bit) byteswapping + */ + ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + /* ...but we only need AES_BLOCK_SIZE bytes */ + dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; + ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len); + +e_dst: + if (!in_place) + ccp_free_data(&dst, cmd_q); + +e_src: + ccp_free_data(&src, cmd_q); + +e_ctx: + ccp_dm_free(&ctx); + +e_key: + ccp_dm_free(&key); + + return ret; +} + +static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_sha_engine *sha = &cmd->u.sha; + struct ccp_dm_workarea ctx; + struct ccp_data src; + struct ccp_op op; + int ret; + + if (sha->ctx_len != CCP_SHA_CTXSIZE) + return -EINVAL; + + if (!sha->ctx) + return -EINVAL; + + if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1))) + return -EINVAL; + + if (!sha->src_len) { + const u8 *sha_zero; + + /* Not final, just return */ + if (!sha->final) + return 0; + + /* CCP can't do a zero length sha operation so the caller + * must buffer the data. + */ + if (sha->msg_bits) + return -EINVAL; + + /* A sha operation for a message with a total length of zero, + * return known result. + */ + switch (sha->type) { + case CCP_SHA_TYPE_1: + sha_zero = ccp_sha1_zero; + break; + case CCP_SHA_TYPE_224: + sha_zero = ccp_sha224_zero; + break; + case CCP_SHA_TYPE_256: + sha_zero = ccp_sha256_zero; + break; + default: + return -EINVAL; + } + + scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0, + sha->ctx_len, 1); + + return 0; + } + + if (!sha->src) + return -EINVAL; + + BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1); + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = ccp_gen_jobid(cmd_q->ccp); + op.ksb_ctx = cmd_q->ksb_ctx; + op.u.sha.type = sha->type; + op.u.sha.msg_bits = sha->msg_bits; + + /* The SHA context fits in a single (32-byte) KSB entry and + * must be in little endian format. Use the 256-bit byte swap + * passthru option to convert from big endian to little endian. + */ + ret = ccp_init_dm_workarea(&ctx, cmd_q, + CCP_SHA_KSB_COUNT * CCP_KSB_BYTES, + DMA_BIDIRECTIONAL); + if (ret) + return ret; + + if (sha->first) { + const __be32 *init; + + switch (sha->type) { + case CCP_SHA_TYPE_1: + init = ccp_sha1_init; + break; + case CCP_SHA_TYPE_224: + init = ccp_sha224_init; + break; + case CCP_SHA_TYPE_256: + init = ccp_sha256_init; + break; + default: + ret = -EINVAL; + goto e_ctx; + } + memcpy(ctx.address, init, CCP_SHA_CTXSIZE); + } else { + ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); + } + + ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_ctx; + } + + /* Send data to the CCP SHA engine */ + ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len, + CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE); + if (ret) + goto e_ctx; + + while (src.sg_wa.bytes_left) { + ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false); + if (sha->final && !src.sg_wa.bytes_left) + op.eom = 1; + + ret = ccp_perform_sha(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_data; + } + + ccp_process_data(&src, NULL, &op); + } + + /* Retrieve the SHA context - convert from LE to BE using + * 32-byte (256-bit) byteswapping to BE + */ + ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_data; + } + + ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); + + if (sha->final && sha->opad) { + /* HMAC operation, recursively perform final SHA */ + struct ccp_cmd hmac_cmd; + struct scatterlist sg; + u64 block_size, digest_size; + u8 *hmac_buf; + + switch (sha->type) { + case CCP_SHA_TYPE_1: + block_size = SHA1_BLOCK_SIZE; + digest_size = SHA1_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_224: + block_size = SHA224_BLOCK_SIZE; + digest_size = SHA224_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_256: + block_size = SHA256_BLOCK_SIZE; + digest_size = SHA256_DIGEST_SIZE; + break; + default: + ret = -EINVAL; + goto e_data; + } + + if (sha->opad_len != block_size) { + ret = -EINVAL; + goto e_data; + } + + hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL); + if (!hmac_buf) { + ret = -ENOMEM; + goto e_data; + } + sg_init_one(&sg, hmac_buf, block_size + digest_size); + + scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0); + memcpy(hmac_buf + block_size, ctx.address, digest_size); + + memset(&hmac_cmd, 0, sizeof(hmac_cmd)); + hmac_cmd.engine = CCP_ENGINE_SHA; + hmac_cmd.u.sha.type = sha->type; + hmac_cmd.u.sha.ctx = sha->ctx; + hmac_cmd.u.sha.ctx_len = sha->ctx_len; + hmac_cmd.u.sha.src = &sg; + hmac_cmd.u.sha.src_len = block_size + digest_size; + hmac_cmd.u.sha.opad = NULL; + hmac_cmd.u.sha.opad_len = 0; + hmac_cmd.u.sha.first = 1; + hmac_cmd.u.sha.final = 1; + hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3; + + ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd); + if (ret) + cmd->engine_error = hmac_cmd.engine_error; + + kfree(hmac_buf); + } + +e_data: + ccp_free_data(&src, cmd_q); + +e_ctx: + ccp_dm_free(&ctx); + + return ret; +} + +static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_rsa_engine *rsa = &cmd->u.rsa; + struct ccp_dm_workarea exp, src; + struct ccp_data dst; + struct ccp_op op; + unsigned int ksb_count, i_len, o_len; + int ret; + + if (rsa->key_size > CCP_RSA_MAX_WIDTH) + return -EINVAL; + + if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst) + return -EINVAL; + + /* The RSA modulus must precede the message being acted upon, so + * it must be copied to a DMA area where the message and the + * modulus can be concatenated. Therefore the input buffer + * length required is twice the output buffer length (which + * must be a multiple of 256-bits). + */ + o_len = ((rsa->key_size + 255) / 256) * 32; + i_len = o_len * 2; + + ksb_count = o_len / CCP_KSB_BYTES; + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = ccp_gen_jobid(cmd_q->ccp); + op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count); + if (!op.ksb_key) + return -EIO; + + /* The RSA exponent may span multiple (32-byte) KSB entries and must + * be in little endian format. Reverse copy each 32-byte chunk + * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk) + * and each byte within that chunk and do not perform any byte swap + * operations on the passthru operation. + */ + ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE); + if (ret) + goto e_ksb; + + ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES, + false); + ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key, + CCP_PASSTHRU_BYTESWAP_NOOP); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_exp; + } + + /* Concatenate the modulus and the message. Both the modulus and + * the operands must be in little endian format. Since the input + * is in big endian format it must be converted. + */ + ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE); + if (ret) + goto e_exp; + + ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES, + false); + src.address += o_len; /* Adjust the address for the copy operation */ + ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES, + false); + src.address -= o_len; /* Reset the address to original value */ + + /* Prepare the output area for the operation */ + ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len, + o_len, DMA_FROM_DEVICE); + if (ret) + goto e_src; + + op.soc = 1; + op.src.u.dma.address = src.dma.address; + op.src.u.dma.offset = 0; + op.src.u.dma.length = i_len; + op.dst.u.dma.address = dst.dm_wa.dma.address; + op.dst.u.dma.offset = 0; + op.dst.u.dma.length = o_len; + + op.u.rsa.mod_size = rsa->key_size; + op.u.rsa.input_len = i_len; + + ret = ccp_perform_rsa(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len); + +e_dst: + ccp_free_data(&dst, cmd_q); + +e_src: + ccp_dm_free(&src); + +e_exp: + ccp_dm_free(&exp); + +e_ksb: + ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count); + + return ret; +} + +static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, + struct ccp_cmd *cmd) +{ + struct ccp_passthru_engine *pt = &cmd->u.passthru; + struct ccp_dm_workarea mask; + struct ccp_data src, dst; + struct ccp_op op; + bool in_place = false; + unsigned int i; + int ret; + + if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1))) + return -EINVAL; + + if (!pt->src || !pt->dst) + return -EINVAL; + + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { + if (pt->mask_len != CCP_PASSTHRU_MASKSIZE) + return -EINVAL; + if (!pt->mask) + return -EINVAL; + } + + BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1); + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = ccp_gen_jobid(cmd_q->ccp); + + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { + /* Load the mask */ + op.ksb_key = cmd_q->ksb_key; + + ret = ccp_init_dm_workarea(&mask, cmd_q, + CCP_PASSTHRU_KSB_COUNT * + CCP_KSB_BYTES, + DMA_TO_DEVICE); + if (ret) + return ret; + + ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len); + ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key, + CCP_PASSTHRU_BYTESWAP_NOOP); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_mask; + } + } + + /* Prepare the input and output data workareas. For in-place + * operations we need to set the dma direction to BIDIRECTIONAL + * and copy the src workarea to the dst workarea. + */ + if (sg_virt(pt->src) == sg_virt(pt->dst)) + in_place = true; + + ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len, + CCP_PASSTHRU_MASKSIZE, + in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); + if (ret) + goto e_mask; + + if (in_place) { + dst = src; + } else { + ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len, + CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE); + if (ret) + goto e_src; + } + + /* Send data to the CCP Passthru engine + * Because the CCP engine works on a single source and destination + * dma address at a time, each entry in the source scatterlist + * (after the dma_map_sg call) must be less than or equal to the + * (remaining) length in the destination scatterlist entry and the + * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE + */ + dst.sg_wa.sg_used = 0; + for (i = 1; i <= src.sg_wa.dma_count; i++) { + if (!dst.sg_wa.sg || + (dst.sg_wa.sg->length < src.sg_wa.sg->length)) { + ret = -EINVAL; + goto e_dst; + } + + if (i == src.sg_wa.dma_count) { + op.eom = 1; + op.soc = 1; + } + + op.src.type = CCP_MEMTYPE_SYSTEM; + op.src.u.dma.address = sg_dma_address(src.sg_wa.sg); + op.src.u.dma.offset = 0; + op.src.u.dma.length = sg_dma_len(src.sg_wa.sg); + + op.dst.type = CCP_MEMTYPE_SYSTEM; + op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg); + op.dst.u.dma.offset = dst.sg_wa.sg_used; + op.dst.u.dma.length = op.src.u.dma.length; + + ret = ccp_perform_passthru(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + dst.sg_wa.sg_used += src.sg_wa.sg->length; + if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) { + dst.sg_wa.sg = sg_next(dst.sg_wa.sg); + dst.sg_wa.sg_used = 0; + } + src.sg_wa.sg = sg_next(src.sg_wa.sg); + } + +e_dst: + if (!in_place) + ccp_free_data(&dst, cmd_q); + +e_src: + ccp_free_data(&src, cmd_q); + +e_mask: + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) + ccp_dm_free(&mask); + + return ret; +} + +static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_ecc_engine *ecc = &cmd->u.ecc; + struct ccp_dm_workarea src, dst; + struct ccp_op op; + int ret; + u8 *save; + + if (!ecc->u.mm.operand_1 || + (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) + if (!ecc->u.mm.operand_2 || + (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + if (!ecc->u.mm.result || + (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = ccp_gen_jobid(cmd_q->ccp); + + /* Concatenate the modulus and the operands. Both the modulus and + * the operands must be in little endian format. Since the input + * is in big endian format it must be converted and placed in a + * fixed length buffer. + */ + ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE, + DMA_TO_DEVICE); + if (ret) + return ret; + + /* Save the workarea address since it is updated in order to perform + * the concatenation + */ + save = src.address; + + /* Copy the ECC modulus */ + ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len, + CCP_ECC_OPERAND_SIZE, false); + src.address += CCP_ECC_OPERAND_SIZE; + + /* Copy the first operand */ + ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1, + ecc->u.mm.operand_1_len, + CCP_ECC_OPERAND_SIZE, false); + src.address += CCP_ECC_OPERAND_SIZE; + + if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) { + /* Copy the second operand */ + ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2, + ecc->u.mm.operand_2_len, + CCP_ECC_OPERAND_SIZE, false); + src.address += CCP_ECC_OPERAND_SIZE; + } + + /* Restore the workarea address */ + src.address = save; + + /* Prepare the output area for the operation */ + ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE, + DMA_FROM_DEVICE); + if (ret) + goto e_src; + + op.soc = 1; + op.src.u.dma.address = src.dma.address; + op.src.u.dma.offset = 0; + op.src.u.dma.length = src.length; + op.dst.u.dma.address = dst.dma.address; + op.dst.u.dma.offset = 0; + op.dst.u.dma.length = dst.length; + + op.u.ecc.function = cmd->u.ecc.function; + + ret = ccp_perform_ecc(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ecc->ecc_result = le16_to_cpup( + (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET)); + if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) { + ret = -EIO; + goto e_dst; + } + + /* Save the ECC result */ + ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES); + +e_dst: + ccp_dm_free(&dst); + +e_src: + ccp_dm_free(&src); + + return ret; +} + +static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_ecc_engine *ecc = &cmd->u.ecc; + struct ccp_dm_workarea src, dst; + struct ccp_op op; + int ret; + u8 *save; + + if (!ecc->u.pm.point_1.x || + (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) || + !ecc->u.pm.point_1.y || + (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { + if (!ecc->u.pm.point_2.x || + (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) || + !ecc->u.pm.point_2.y || + (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + } else { + if (!ecc->u.pm.domain_a || + (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) + if (!ecc->u.pm.scalar || + (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + } + + if (!ecc->u.pm.result.x || + (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) || + !ecc->u.pm.result.y || + (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = ccp_gen_jobid(cmd_q->ccp); + + /* Concatenate the modulus and the operands. Both the modulus and + * the operands must be in little endian format. Since the input + * is in big endian format it must be converted and placed in a + * fixed length buffer. + */ + ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE, + DMA_TO_DEVICE); + if (ret) + return ret; + + /* Save the workarea address since it is updated in order to perform + * the concatenation + */ + save = src.address; + + /* Copy the ECC modulus */ + ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len, + CCP_ECC_OPERAND_SIZE, false); + src.address += CCP_ECC_OPERAND_SIZE; + + /* Copy the first point X and Y coordinate */ + ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x, + ecc->u.pm.point_1.x_len, + CCP_ECC_OPERAND_SIZE, false); + src.address += CCP_ECC_OPERAND_SIZE; + ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y, + ecc->u.pm.point_1.y_len, + CCP_ECC_OPERAND_SIZE, false); + src.address += CCP_ECC_OPERAND_SIZE; + + /* Set the first point Z coordianate to 1 */ + *src.address = 0x01; + src.address += CCP_ECC_OPERAND_SIZE; + + if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { + /* Copy the second point X and Y coordinate */ + ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x, + ecc->u.pm.point_2.x_len, + CCP_ECC_OPERAND_SIZE, false); + src.address += CCP_ECC_OPERAND_SIZE; + ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y, + ecc->u.pm.point_2.y_len, + CCP_ECC_OPERAND_SIZE, false); + src.address += CCP_ECC_OPERAND_SIZE; + + /* Set the second point Z coordianate to 1 */ + *src.address = 0x01; + src.address += CCP_ECC_OPERAND_SIZE; + } else { + /* Copy the Domain "a" parameter */ + ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a, + ecc->u.pm.domain_a_len, + CCP_ECC_OPERAND_SIZE, false); + src.address += CCP_ECC_OPERAND_SIZE; + + if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) { + /* Copy the scalar value */ + ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar, + ecc->u.pm.scalar_len, + CCP_ECC_OPERAND_SIZE, false); + src.address += CCP_ECC_OPERAND_SIZE; + } + } + + /* Restore the workarea address */ + src.address = save; + + /* Prepare the output area for the operation */ + ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE, + DMA_FROM_DEVICE); + if (ret) + goto e_src; + + op.soc = 1; + op.src.u.dma.address = src.dma.address; + op.src.u.dma.offset = 0; + op.src.u.dma.length = src.length; + op.dst.u.dma.address = dst.dma.address; + op.dst.u.dma.offset = 0; + op.dst.u.dma.length = dst.length; + + op.u.ecc.function = cmd->u.ecc.function; + + ret = ccp_perform_ecc(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ecc->ecc_result = le16_to_cpup( + (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET)); + if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) { + ret = -EIO; + goto e_dst; + } + + /* Save the workarea address since it is updated as we walk through + * to copy the point math result + */ + save = dst.address; + + /* Save the ECC result X and Y coordinates */ + ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x, + CCP_ECC_MODULUS_BYTES); + dst.address += CCP_ECC_OUTPUT_SIZE; + ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y, + CCP_ECC_MODULUS_BYTES); + dst.address += CCP_ECC_OUTPUT_SIZE; + + /* Restore the workarea address */ + dst.address = save; + +e_dst: + ccp_dm_free(&dst); + +e_src: + ccp_dm_free(&src); + + return ret; +} + +static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_ecc_engine *ecc = &cmd->u.ecc; + + ecc->ecc_result = 0; + + if (!ecc->mod || + (ecc->mod_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + switch (ecc->function) { + case CCP_ECC_FUNCTION_MMUL_384BIT: + case CCP_ECC_FUNCTION_MADD_384BIT: + case CCP_ECC_FUNCTION_MINV_384BIT: + return ccp_run_ecc_mm_cmd(cmd_q, cmd); + + case CCP_ECC_FUNCTION_PADD_384BIT: + case CCP_ECC_FUNCTION_PMUL_384BIT: + case CCP_ECC_FUNCTION_PDBL_384BIT: + return ccp_run_ecc_pm_cmd(cmd_q, cmd); + + default: + return -EINVAL; + } +} + +int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + int ret; + + cmd->engine_error = 0; + cmd_q->cmd_error = 0; + cmd_q->int_rcvd = 0; + cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); + + switch (cmd->engine) { + case CCP_ENGINE_AES: + ret = ccp_run_aes_cmd(cmd_q, cmd); + break; + case CCP_ENGINE_XTS_AES_128: + ret = ccp_run_xts_aes_cmd(cmd_q, cmd); + break; + case CCP_ENGINE_SHA: + ret = ccp_run_sha_cmd(cmd_q, cmd); + break; + case CCP_ENGINE_RSA: + ret = ccp_run_rsa_cmd(cmd_q, cmd); + break; + case CCP_ENGINE_PASSTHRU: + ret = ccp_run_passthru_cmd(cmd_q, cmd); + break; + case CCP_ENGINE_ECC: + ret = ccp_run_ecc_cmd(cmd_q, cmd); + break; + default: + ret = -EINVAL; + } + + return ret; +} diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c new file mode 100644 index 000000000..af190d479 --- /dev/null +++ b/drivers/crypto/ccp/ccp-pci.c @@ -0,0 +1,340 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/dma-mapping.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" + +#define IO_BAR 2 +#define IO_OFFSET 0x20000 + +#define MSIX_VECTORS 2 + +struct ccp_msix { + u32 vector; + char name[16]; +}; + +struct ccp_pci { + int msix_count; + struct ccp_msix msix[MSIX_VECTORS]; +}; + +static int ccp_get_msix_irqs(struct ccp_device *ccp) +{ + struct ccp_pci *ccp_pci = ccp->dev_specific; + struct device *dev = ccp->dev; + struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + struct msix_entry msix_entry[MSIX_VECTORS]; + unsigned int name_len = sizeof(ccp_pci->msix[0].name) - 1; + int v, ret; + + for (v = 0; v < ARRAY_SIZE(msix_entry); v++) + msix_entry[v].entry = v; + + ret = pci_enable_msix_range(pdev, msix_entry, 1, v); + if (ret < 0) + return ret; + + ccp_pci->msix_count = ret; + for (v = 0; v < ccp_pci->msix_count; v++) { + /* Set the interrupt names and request the irqs */ + snprintf(ccp_pci->msix[v].name, name_len, "ccp-%u", v); + ccp_pci->msix[v].vector = msix_entry[v].vector; + ret = request_irq(ccp_pci->msix[v].vector, ccp_irq_handler, + 0, ccp_pci->msix[v].name, dev); + if (ret) { + dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n", + ret); + goto e_irq; + } + } + + return 0; + +e_irq: + while (v--) + free_irq(ccp_pci->msix[v].vector, dev); + + pci_disable_msix(pdev); + + ccp_pci->msix_count = 0; + + return ret; +} + +static int ccp_get_msi_irq(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + int ret; + + ret = pci_enable_msi(pdev); + if (ret) + return ret; + + ccp->irq = pdev->irq; + ret = request_irq(ccp->irq, ccp_irq_handler, 0, "ccp", dev); + if (ret) { + dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret); + goto e_msi; + } + + return 0; + +e_msi: + pci_disable_msi(pdev); + + return ret; +} + +static int ccp_get_irqs(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + int ret; + + ret = ccp_get_msix_irqs(ccp); + if (!ret) + return 0; + + /* Couldn't get MSI-X vectors, try MSI */ + dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret); + ret = ccp_get_msi_irq(ccp); + if (!ret) + return 0; + + /* Couldn't get MSI interrupt */ + dev_notice(dev, "could not enable MSI (%d)\n", ret); + + return ret; +} + +static void ccp_free_irqs(struct ccp_device *ccp) +{ + struct ccp_pci *ccp_pci = ccp->dev_specific; + struct device *dev = ccp->dev; + struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + + if (ccp_pci->msix_count) { + while (ccp_pci->msix_count--) + free_irq(ccp_pci->msix[ccp_pci->msix_count].vector, + dev); + pci_disable_msix(pdev); + } else { + free_irq(ccp->irq, dev); + pci_disable_msi(pdev); + } +} + +static int ccp_find_mmio_area(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + resource_size_t io_len; + unsigned long io_flags; + + io_flags = pci_resource_flags(pdev, IO_BAR); + io_len = pci_resource_len(pdev, IO_BAR); + if ((io_flags & IORESOURCE_MEM) && (io_len >= (IO_OFFSET + 0x800))) + return IO_BAR; + + return -EIO; +} + +static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct ccp_device *ccp; + struct ccp_pci *ccp_pci; + struct device *dev = &pdev->dev; + unsigned int bar; + int ret; + + ret = -ENOMEM; + ccp = ccp_alloc_struct(dev); + if (!ccp) + goto e_err; + + ccp_pci = devm_kzalloc(dev, sizeof(*ccp_pci), GFP_KERNEL); + if (!ccp_pci) + goto e_err; + + ccp->dev_specific = ccp_pci; + ccp->get_irq = ccp_get_irqs; + ccp->free_irq = ccp_free_irqs; + + ret = pci_request_regions(pdev, "ccp"); + if (ret) { + dev_err(dev, "pci_request_regions failed (%d)\n", ret); + goto e_err; + } + + ret = pci_enable_device(pdev); + if (ret) { + dev_err(dev, "pci_enable_device failed (%d)\n", ret); + goto e_regions; + } + + pci_set_master(pdev); + + ret = ccp_find_mmio_area(ccp); + if (ret < 0) + goto e_device; + bar = ret; + + ret = -EIO; + ccp->io_map = pci_iomap(pdev, bar, 0); + if (!ccp->io_map) { + dev_err(dev, "pci_iomap failed\n"); + goto e_device; + } + ccp->io_regs = ccp->io_map + IO_OFFSET; + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", + ret); + goto e_iomap; + } + } + + dev_set_drvdata(dev, ccp); + + ret = ccp_init(ccp); + if (ret) + goto e_iomap; + + dev_notice(dev, "enabled\n"); + + return 0; + +e_iomap: + pci_iounmap(pdev, ccp->io_map); + +e_device: + pci_disable_device(pdev); + +e_regions: + pci_release_regions(pdev); + +e_err: + dev_notice(dev, "initialization failed\n"); + return ret; +} + +static void ccp_pci_remove(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct ccp_device *ccp = dev_get_drvdata(dev); + + if (!ccp) + return; + + ccp_destroy(ccp); + + pci_iounmap(pdev, ccp->io_map); + + pci_disable_device(pdev); + + pci_release_regions(pdev); + + dev_notice(dev, "disabled\n"); +} + +#ifdef CONFIG_PM +static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct device *dev = &pdev->dev; + struct ccp_device *ccp = dev_get_drvdata(dev); + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + ccp->suspending = 1; + + /* Wake all the queue kthreads to prepare for suspend */ + for (i = 0; i < ccp->cmd_q_count; i++) + wake_up_process(ccp->cmd_q[i].kthread); + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + /* Wait for all queue kthreads to say they're done */ + while (!ccp_queues_suspended(ccp)) + wait_event_interruptible(ccp->suspend_queue, + ccp_queues_suspended(ccp)); + + return 0; +} + +static int ccp_pci_resume(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct ccp_device *ccp = dev_get_drvdata(dev); + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + ccp->suspending = 0; + + /* Wake up all the kthreads */ + for (i = 0; i < ccp->cmd_q_count; i++) { + ccp->cmd_q[i].suspended = 0; + wake_up_process(ccp->cmd_q[i].kthread); + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + return 0; +} +#endif + +static const struct pci_device_id ccp_pci_table[] = { + { PCI_VDEVICE(AMD, 0x1537), }, + /* Last entry must be zero */ + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ccp_pci_table); + +static struct pci_driver ccp_pci_driver = { + .name = "AMD Cryptographic Coprocessor", + .id_table = ccp_pci_table, + .probe = ccp_pci_probe, + .remove = ccp_pci_remove, +#ifdef CONFIG_PM + .suspend = ccp_pci_suspend, + .resume = ccp_pci_resume, +#endif +}; + +int ccp_pci_init(void) +{ + return pci_register_driver(&ccp_pci_driver); +} + +void ccp_pci_exit(void) +{ + pci_unregister_driver(&ccp_pci_driver); +} diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c new file mode 100644 index 000000000..b1c20b2b5 --- /dev/null +++ b/drivers/crypto/ccp/ccp-platform.c @@ -0,0 +1,314 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2014 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/platform_device.h> +#include <linux/ioport.h> +#include <linux/dma-mapping.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/ccp.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/acpi.h> + +#include "ccp-dev.h" + +struct ccp_platform { + int use_acpi; + int coherent; +}; + +static int ccp_get_irq(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + struct platform_device *pdev = container_of(dev, + struct platform_device, dev); + int ret; + + ret = platform_get_irq(pdev, 0); + if (ret < 0) + return ret; + + ccp->irq = ret; + ret = request_irq(ccp->irq, ccp_irq_handler, 0, "ccp", dev); + if (ret) { + dev_notice(dev, "unable to allocate IRQ (%d)\n", ret); + return ret; + } + + return 0; +} + +static int ccp_get_irqs(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + int ret; + + ret = ccp_get_irq(ccp); + if (!ret) + return 0; + + /* Couldn't get an interrupt */ + dev_notice(dev, "could not enable interrupts (%d)\n", ret); + + return ret; +} + +static void ccp_free_irqs(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + + free_irq(ccp->irq, dev); +} + +static struct resource *ccp_find_mmio_area(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + struct platform_device *pdev = container_of(dev, + struct platform_device, dev); + struct resource *ior; + + ior = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (ior && (resource_size(ior) >= 0x800)) + return ior; + + return NULL; +} + +#ifdef CONFIG_ACPI +static int ccp_acpi_support(struct ccp_device *ccp) +{ + struct ccp_platform *ccp_platform = ccp->dev_specific; + struct acpi_device *adev = ACPI_COMPANION(ccp->dev); + acpi_handle handle; + acpi_status status; + unsigned long long data; + int cca; + + /* Retrieve the device cache coherency value */ + handle = adev->handle; + do { + status = acpi_evaluate_integer(handle, "_CCA", NULL, &data); + if (!ACPI_FAILURE(status)) { + cca = data; + break; + } + } while (!ACPI_FAILURE(status)); + + if (ACPI_FAILURE(status)) { + dev_err(ccp->dev, "error obtaining acpi coherency value\n"); + return -EINVAL; + } + + ccp_platform->coherent = !!cca; + + return 0; +} +#else /* CONFIG_ACPI */ +static int ccp_acpi_support(struct ccp_device *ccp) +{ + return -EINVAL; +} +#endif + +#ifdef CONFIG_OF +static int ccp_of_support(struct ccp_device *ccp) +{ + struct ccp_platform *ccp_platform = ccp->dev_specific; + + ccp_platform->coherent = of_dma_is_coherent(ccp->dev->of_node); + + return 0; +} +#else +static int ccp_of_support(struct ccp_device *ccp) +{ + return -EINVAL; +} +#endif + +static int ccp_platform_probe(struct platform_device *pdev) +{ + struct ccp_device *ccp; + struct ccp_platform *ccp_platform; + struct device *dev = &pdev->dev; + struct acpi_device *adev = ACPI_COMPANION(dev); + struct resource *ior; + int ret; + + ret = -ENOMEM; + ccp = ccp_alloc_struct(dev); + if (!ccp) + goto e_err; + + ccp_platform = devm_kzalloc(dev, sizeof(*ccp_platform), GFP_KERNEL); + if (!ccp_platform) + goto e_err; + + ccp->dev_specific = ccp_platform; + ccp->get_irq = ccp_get_irqs; + ccp->free_irq = ccp_free_irqs; + + ccp_platform->use_acpi = (!adev || acpi_disabled) ? 0 : 1; + + ior = ccp_find_mmio_area(ccp); + ccp->io_map = devm_ioremap_resource(dev, ior); + if (IS_ERR(ccp->io_map)) { + ret = PTR_ERR(ccp->io_map); + goto e_err; + } + ccp->io_regs = ccp->io_map; + + if (!dev->dma_mask) + dev->dma_mask = &dev->coherent_dma_mask; + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); + goto e_err; + } + + if (ccp_platform->use_acpi) + ret = ccp_acpi_support(ccp); + else + ret = ccp_of_support(ccp); + if (ret) + goto e_err; + + if (ccp_platform->coherent) + ccp->axcache = CACHE_WB_NO_ALLOC; + else + ccp->axcache = CACHE_NONE; + + dev_set_drvdata(dev, ccp); + + ret = ccp_init(ccp); + if (ret) + goto e_err; + + dev_notice(dev, "enabled\n"); + + return 0; + +e_err: + dev_notice(dev, "initialization failed\n"); + return ret; +} + +static int ccp_platform_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ccp_device *ccp = dev_get_drvdata(dev); + + ccp_destroy(ccp); + + dev_notice(dev, "disabled\n"); + + return 0; +} + +#ifdef CONFIG_PM +static int ccp_platform_suspend(struct platform_device *pdev, + pm_message_t state) +{ + struct device *dev = &pdev->dev; + struct ccp_device *ccp = dev_get_drvdata(dev); + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + ccp->suspending = 1; + + /* Wake all the queue kthreads to prepare for suspend */ + for (i = 0; i < ccp->cmd_q_count; i++) + wake_up_process(ccp->cmd_q[i].kthread); + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + /* Wait for all queue kthreads to say they're done */ + while (!ccp_queues_suspended(ccp)) + wait_event_interruptible(ccp->suspend_queue, + ccp_queues_suspended(ccp)); + + return 0; +} + +static int ccp_platform_resume(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ccp_device *ccp = dev_get_drvdata(dev); + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + ccp->suspending = 0; + + /* Wake up all the kthreads */ + for (i = 0; i < ccp->cmd_q_count; i++) { + ccp->cmd_q[i].suspended = 0; + wake_up_process(ccp->cmd_q[i].kthread); + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + return 0; +} +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id ccp_acpi_match[] = { + { "AMDI0C00", 0 }, + { }, +}; +#endif + +#ifdef CONFIG_OF +static const struct of_device_id ccp_of_match[] = { + { .compatible = "amd,ccp-seattle-v1a" }, + { }, +}; +#endif + +static struct platform_driver ccp_platform_driver = { + .driver = { + .name = "AMD Cryptographic Coprocessor", +#ifdef CONFIG_ACPI + .acpi_match_table = ccp_acpi_match, +#endif +#ifdef CONFIG_OF + .of_match_table = ccp_of_match, +#endif + }, + .probe = ccp_platform_probe, + .remove = ccp_platform_remove, +#ifdef CONFIG_PM + .suspend = ccp_platform_suspend, + .resume = ccp_platform_resume, +#endif +}; + +int ccp_platform_init(void) +{ + return platform_driver_register(&ccp_platform_driver); +} + +void ccp_platform_exit(void) +{ + platform_driver_unregister(&ccp_platform_driver); +} |