diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-10-20 00:10:27 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-10-20 00:10:27 -0300 |
commit | d0b2f91bede3bd5e3d24dd6803e56eee959c1797 (patch) | |
tree | 7fee4ab0509879c373c4f2cbd5b8a5be5b4041ee /drivers/crypto | |
parent | e914f8eb445e8f74b00303c19c2ffceaedd16a05 (diff) |
Linux-libre 4.8.2-gnupck-4.8.2-gnu
Diffstat (limited to 'drivers/crypto')
54 files changed, 5159 insertions, 1067 deletions
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index d77ba2f12..1af94e2d1 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -159,6 +159,19 @@ config CRYPTO_GHASH_S390 It is available as of z196. +config CRYPTO_CRC32_S390 + tristate "CRC-32 algorithms" + depends on S390 + select CRYPTO_HASH + select CRC32 + help + Select this option if you want to use hardware accelerated + implementations of CRC algorithms. With this option, you + can optimize the computation of CRC-32 (IEEE 802.3 Ethernet) + and CRC-32C (Castagnoli). + + It is available with IBM z13 or later. + config CRYPTO_DEV_MV_CESA tristate "Marvell's Cryptographic Engine" depends on PLAT_ORION diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index 95b73968c..10db7df36 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -588,11 +588,6 @@ static int bfin_crypto_crc_probe(struct platform_device *pdev) crypto_init_queue(&crc->queue, CRC_CCRYPTO_QUEUE_LENGTH); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res == NULL) { - dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n"); - return -ENOENT; - } - crc->regs = devm_ioremap_resource(dev, res); if (IS_ERR((void *)crc->regs)) { dev_err(&pdev->dev, "Cannot map CRC IO\n"); diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index 5652a5341..64bf3024b 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -1,6 +1,6 @@ config CRYPTO_DEV_FSL_CAAM tristate "Freescale CAAM-Multicore driver backend" - depends on FSL_SOC || ARCH_MXC + depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE help Enables the driver module for Freescale's Cryptographic Accelerator and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). @@ -99,6 +99,18 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API To compile this as a module, choose M here: the module will be called caamhash. +config CRYPTO_DEV_FSL_CAAM_PKC_API + tristate "Register public key cryptography implementations with Crypto API" + depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR + default y + select CRYPTO_RSA + help + Selecting this will allow SEC Public key support for RSA. + Supported cryptographic primitives: encryption, decryption, + signature and verification. + To compile this as a module, choose M here: the module + will be called caam_pkc. + config CRYPTO_DEV_FSL_CAAM_RNG_API tristate "Register caam device for hwrng API" depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR @@ -116,10 +128,6 @@ config CRYPTO_DEV_FSL_CAAM_IMX def_bool SOC_IMX6 || SOC_IMX7D depends on CRYPTO_DEV_FSL_CAAM -config CRYPTO_DEV_FSL_CAAM_LE - def_bool CRYPTO_DEV_FSL_CAAM_IMX || SOC_LS1021A - depends on CRYPTO_DEV_FSL_CAAM - config CRYPTO_DEV_FSL_CAAM_DEBUG bool "Enable debug output in CAAM driver" depends on CRYPTO_DEV_FSL_CAAM diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index 550758a33..08bf5515a 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -2,7 +2,7 @@ # Makefile for the CAAM backend and dependent components # ifeq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_DEBUG), y) - EXTRA_CFLAGS := -DDEBUG + ccflags-y := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o @@ -10,6 +10,8 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_JR) += caam_jr.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o +obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o caam-objs := ctrl.o caam_jr-objs := jr.o key_gen.o error.o +caam_pkc-y := caampkc.o pkc_desc.o diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index e9703f9d1..36365b3ef 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -847,7 +847,7 @@ static int ahash_update_ctx(struct ahash_request *req) *next_buflen, 0); } else { (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= - SEC4_SG_LEN_FIN; + cpu_to_caam32(SEC4_SG_LEN_FIN); } state->current_buf = !state->current_buf; @@ -949,7 +949,8 @@ static int ahash_final_ctx(struct ahash_request *req) state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, buf, state->buf_dma, buflen, last_buflen); - (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= SEC4_SG_LEN_FIN; + (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= + cpu_to_caam32(SEC4_SG_LEN_FIN); edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, sec4_sg_bytes, DMA_TO_DEVICE); diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c new file mode 100644 index 000000000..851015e65 --- /dev/null +++ b/drivers/crypto/caam/caampkc.c @@ -0,0 +1,607 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ +#include "compat.h" +#include "regs.h" +#include "intern.h" +#include "jr.h" +#include "error.h" +#include "desc_constr.h" +#include "sg_sw_sec4.h" +#include "caampkc.h" + +#define DESC_RSA_PUB_LEN (2 * CAAM_CMD_SZ + sizeof(struct rsa_pub_pdb)) +#define DESC_RSA_PRIV_F1_LEN (2 * CAAM_CMD_SZ + \ + sizeof(struct rsa_priv_f1_pdb)) + +static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + dma_unmap_sg(dev, req->dst, edesc->dst_nents, DMA_FROM_DEVICE); + dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); + + if (edesc->sec4_sg_bytes) + dma_unmap_single(dev, edesc->sec4_sg_dma, edesc->sec4_sg_bytes, + DMA_TO_DEVICE); +} + +static void rsa_pub_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_pub_pdb *pdb = &edesc->pdb.pub; + + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->e_dma, key->e_sz, DMA_TO_DEVICE); +} + +static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; + + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); +} + +/* RSA Job Completion handler */ +static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_pub_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + +static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err, + void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_priv_f1_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + +static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, + size_t desclen) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *dev = ctx->dev; + struct rsa_edesc *edesc; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + int sgc; + int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; + int src_nents, dst_nents; + + src_nents = sg_nents_for_len(req->src, req->src_len); + dst_nents = sg_nents_for_len(req->dst, req->dst_len); + + if (src_nents > 1) + sec4_sg_len = src_nents; + if (dst_nents > 1) + sec4_sg_len += dst_nents; + + sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc, hw desc commands and link tables */ + edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes, + GFP_DMA | flags); + if (!edesc) + return ERR_PTR(-ENOMEM); + + sgc = dma_map_sg(dev, req->src, src_nents, DMA_TO_DEVICE); + if (unlikely(!sgc)) { + dev_err(dev, "unable to map source\n"); + goto src_fail; + } + + sgc = dma_map_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE); + if (unlikely(!sgc)) { + dev_err(dev, "unable to map destination\n"); + goto dst_fail; + } + + edesc->sec4_sg = (void *)edesc + sizeof(*edesc) + desclen; + + sec4_sg_index = 0; + if (src_nents > 1) { + sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0); + sec4_sg_index += src_nents; + } + if (dst_nents > 1) + sg_to_sec4_sg_last(req->dst, dst_nents, + edesc->sec4_sg + sec4_sg_index, 0); + + /* Save nents for later use in Job Descriptor */ + edesc->src_nents = src_nents; + edesc->dst_nents = dst_nents; + + if (!sec4_sg_bytes) + return edesc; + + edesc->sec4_sg_dma = dma_map_single(dev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); + if (dma_mapping_error(dev, edesc->sec4_sg_dma)) { + dev_err(dev, "unable to map S/G table\n"); + goto sec4_sg_fail; + } + + edesc->sec4_sg_bytes = sec4_sg_bytes; + + return edesc; + +sec4_sg_fail: + dma_unmap_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE); +dst_fail: + dma_unmap_sg(dev, req->src, src_nents, DMA_TO_DEVICE); +src_fail: + kfree(edesc); + return ERR_PTR(-ENOMEM); +} + +static int set_rsa_pub_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_pub_pdb *pdb = &edesc->pdb.pub; + int sec4_sg_index = 0; + + pdb->n_dma = dma_map_single(dev, key->n, key->n_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->n_dma)) { + dev_err(dev, "Unable to map RSA modulus memory\n"); + return -ENOMEM; + } + + pdb->e_dma = dma_map_single(dev, key->e, key->e_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->e_dma)) { + dev_err(dev, "Unable to map RSA public exponent memory\n"); + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + return -ENOMEM; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->f_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->g_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= (key->e_sz << RSA_PDB_E_SHIFT) | key->n_sz; + pdb->f_len = req->src_len; + + return 0; +} + +static int set_rsa_priv_f1_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; + int sec4_sg_index = 0; + + pdb->n_dma = dma_map_single(dev, key->n, key->n_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->n_dma)) { + dev_err(dev, "Unable to map modulus memory\n"); + return -ENOMEM; + } + + pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->d_dma)) { + dev_err(dev, "Unable to map RSA private exponent memory\n"); + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + return -ENOMEM; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->g_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->f_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= (key->d_sz << RSA_PDB_D_SHIFT) | key->n_sz; + + return 0; +} + +static int caam_rsa_enc(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + if (unlikely(!key->n || !key->e)) + return -EINVAL; + + if (req->dst_len < key->n_sz) { + req->dst_len = key->n_sz; + dev_err(jrdev, "Output buffer length less than parameter n\n"); + return -EOVERFLOW; + } + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PUB_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Encrypt Protocol Data Block */ + ret = set_rsa_pub_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_pub_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + +static int caam_rsa_dec(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + if (unlikely(!key->n || !key->d)) + return -EINVAL; + + if (req->dst_len < key->n_sz) { + req->dst_len = key->n_sz; + dev_err(jrdev, "Output buffer length less than parameter n\n"); + return -EOVERFLOW; + } + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F1_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Decrypt Protocol Data Block - Private Key Form #1 */ + ret = set_rsa_priv_f1_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f1_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_priv_f1_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + +static void caam_rsa_free_key(struct caam_rsa_key *key) +{ + kzfree(key->d); + kfree(key->e); + kfree(key->n); + key->d = NULL; + key->e = NULL; + key->n = NULL; + key->d_sz = 0; + key->e_sz = 0; + key->n_sz = 0; +} + +/** + * caam_read_raw_data - Read a raw byte stream as a positive integer. + * The function skips buffer's leading zeros, copies the remained data + * to a buffer allocated in the GFP_DMA | GFP_KERNEL zone and returns + * the address of the new buffer. + * + * @buf : The data to read + * @nbytes: The amount of data to read + */ +static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes) +{ + u8 *val; + + while (!*buf && *nbytes) { + buf++; + (*nbytes)--; + } + + val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL); + if (!val) + return NULL; + + memcpy(val, buf, *nbytes); + + return val; +} + +static int caam_rsa_check_key_length(unsigned int len) +{ + if (len > 4096) + return -EINVAL; + return 0; +} + +static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key = {0}; + struct caam_rsa_key *rsa_key = &ctx->key; + int ret; + + /* Free the old RSA key if any */ + caam_rsa_free_key(rsa_key); + + ret = rsa_parse_pub_key(&raw_key, key, keylen); + if (ret) + return ret; + + /* Copy key in DMA zone */ + rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->e) + goto err; + + /* + * Skip leading zeros and copy the positive integer to a buffer + * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor + * expects a positive integer for the RSA modulus and uses its length as + * decryption output length. + */ + rsa_key->n = caam_read_raw_data(raw_key.n, &raw_key.n_sz); + if (!rsa_key->n) + goto err; + + if (caam_rsa_check_key_length(raw_key.n_sz << 3)) { + caam_rsa_free_key(rsa_key); + return -EINVAL; + } + + rsa_key->e_sz = raw_key.e_sz; + rsa_key->n_sz = raw_key.n_sz; + + memcpy(rsa_key->e, raw_key.e, raw_key.e_sz); + + return 0; +err: + caam_rsa_free_key(rsa_key); + return -ENOMEM; +} + +static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key = {0}; + struct caam_rsa_key *rsa_key = &ctx->key; + int ret; + + /* Free the old RSA key if any */ + caam_rsa_free_key(rsa_key); + + ret = rsa_parse_priv_key(&raw_key, key, keylen); + if (ret) + return ret; + + /* Copy key in DMA zone */ + rsa_key->d = kzalloc(raw_key.d_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->d) + goto err; + + rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->e) + goto err; + + /* + * Skip leading zeros and copy the positive integer to a buffer + * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor + * expects a positive integer for the RSA modulus and uses its length as + * decryption output length. + */ + rsa_key->n = caam_read_raw_data(raw_key.n, &raw_key.n_sz); + if (!rsa_key->n) + goto err; + + if (caam_rsa_check_key_length(raw_key.n_sz << 3)) { + caam_rsa_free_key(rsa_key); + return -EINVAL; + } + + rsa_key->d_sz = raw_key.d_sz; + rsa_key->e_sz = raw_key.e_sz; + rsa_key->n_sz = raw_key.n_sz; + + memcpy(rsa_key->d, raw_key.d, raw_key.d_sz); + memcpy(rsa_key->e, raw_key.e, raw_key.e_sz); + + return 0; + +err: + caam_rsa_free_key(rsa_key); + return -ENOMEM; +} + +static int caam_rsa_max_size(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + + return (key->n) ? key->n_sz : -EINVAL; +} + +/* Per session pkc's driver context creation function */ +static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + + ctx->dev = caam_jr_alloc(); + + if (IS_ERR(ctx->dev)) { + dev_err(ctx->dev, "Job Ring Device allocation for transform failed\n"); + return PTR_ERR(ctx->dev); + } + + return 0; +} + +/* Per session pkc's driver context cleanup function */ +static void caam_rsa_exit_tfm(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + + caam_rsa_free_key(key); + caam_jr_free(ctx->dev); +} + +static struct akcipher_alg caam_rsa = { + .encrypt = caam_rsa_enc, + .decrypt = caam_rsa_dec, + .sign = caam_rsa_dec, + .verify = caam_rsa_enc, + .set_pub_key = caam_rsa_set_pub_key, + .set_priv_key = caam_rsa_set_priv_key, + .max_size = caam_rsa_max_size, + .init = caam_rsa_init_tfm, + .exit = caam_rsa_exit_tfm, + .base = { + .cra_name = "rsa", + .cra_driver_name = "rsa-caam", + .cra_priority = 3000, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct caam_rsa_ctx), + }, +}; + +/* Public Key Cryptography module initialization handler */ +static int __init caam_pkc_init(void) +{ + struct device_node *dev_node; + struct platform_device *pdev; + struct device *ctrldev; + struct caam_drv_private *priv; + u32 cha_inst, pk_inst; + int err; + + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); + if (!dev_node) { + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0"); + if (!dev_node) + return -ENODEV; + } + + pdev = of_find_device_by_node(dev_node); + if (!pdev) { + of_node_put(dev_node); + return -ENODEV; + } + + ctrldev = &pdev->dev; + priv = dev_get_drvdata(ctrldev); + of_node_put(dev_node); + + /* + * If priv is NULL, it's probably because the caam driver wasn't + * properly initialized (e.g. RNG4 init failed). Thus, bail out here. + */ + if (!priv) + return -ENODEV; + + /* Determine public key hardware accelerator presence. */ + cha_inst = rd_reg32(&priv->ctrl->perfmon.cha_num_ls); + pk_inst = (cha_inst & CHA_ID_LS_PK_MASK) >> CHA_ID_LS_PK_SHIFT; + + /* Do not register algorithms if PKHA is not present. */ + if (!pk_inst) + return -ENODEV; + + err = crypto_register_akcipher(&caam_rsa); + if (err) + dev_warn(ctrldev, "%s alg registration failed\n", + caam_rsa.base.cra_driver_name); + else + dev_info(ctrldev, "caam pkc algorithms registered in /proc/crypto\n"); + + return err; +} + +static void __exit caam_pkc_exit(void) +{ + crypto_unregister_akcipher(&caam_rsa); +} + +module_init(caam_pkc_init); +module_exit(caam_pkc_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("FSL CAAM support for PKC functions of crypto API"); +MODULE_AUTHOR("Freescale Semiconductor"); diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h new file mode 100644 index 000000000..f595d159b --- /dev/null +++ b/drivers/crypto/caam/caampkc.h @@ -0,0 +1,70 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography descriptors + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ + +#ifndef _PKC_DESC_H_ +#define _PKC_DESC_H_ +#include "compat.h" +#include "pdb.h" + +/** + * caam_rsa_key - CAAM RSA key structure. Keys are allocated in DMA zone. + * @n : RSA modulus raw byte stream + * @e : RSA public exponent raw byte stream + * @d : RSA private exponent raw byte stream + * @n_sz : length in bytes of RSA modulus n + * @e_sz : length in bytes of RSA public exponent + * @d_sz : length in bytes of RSA private exponent + */ +struct caam_rsa_key { + u8 *n; + u8 *e; + u8 *d; + size_t n_sz; + size_t e_sz; + size_t d_sz; +}; + +/** + * caam_rsa_ctx - per session context. + * @key : RSA key in DMA zone + * @dev : device structure + */ +struct caam_rsa_ctx { + struct caam_rsa_key key; + struct device *dev; +}; + +/** + * rsa_edesc - s/w-extended rsa descriptor + * @src_nents : number of segments in input scatterlist + * @dst_nents : number of segments in output scatterlist + * @sec4_sg_bytes : length of h/w link table + * @sec4_sg_dma : dma address of h/w link table + * @sec4_sg : pointer to h/w link table + * @pdb : specific RSA Protocol Data Block (PDB) + * @hw_desc : descriptor followed by link tables if any + */ +struct rsa_edesc { + int src_nents; + int dst_nents; + int sec4_sg_bytes; + dma_addr_t sec4_sg_dma; + struct sec4_sg_entry *sec4_sg; + union { + struct rsa_pub_pdb pub; + struct rsa_priv_f1_pdb priv_f1; + } pdb; + u32 hw_desc[]; +}; + +/* Descriptor construction primitives. */ +void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb); +void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb); + +#endif diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h index b6955ecdf..7149cd249 100644 --- a/drivers/crypto/caam/compat.h +++ b/drivers/crypto/caam/compat.h @@ -35,8 +35,11 @@ #include <crypto/md5.h> #include <crypto/internal/aead.h> #include <crypto/authenc.h> +#include <crypto/akcipher.h> #include <crypto/scatterwalk.h> #include <crypto/internal/skcipher.h> #include <crypto/internal/hash.h> +#include <crypto/internal/rsa.h> +#include <crypto/internal/akcipher.h> #endif /* !defined(CAAM_COMPAT_H) */ diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 5ad5f3009..0ec112ee5 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -15,6 +15,9 @@ #include "desc_constr.h" #include "error.h" +bool caam_little_end; +EXPORT_SYMBOL(caam_little_end); + /* * i.MX targets tend to have clock control subsystems that can * enable/disable clocking to our device. @@ -106,7 +109,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, if (ctrlpriv->virt_en == 1) { - setbits32(&ctrl->deco_rsr, DECORSR_JR0); + clrsetbits_32(&ctrl->deco_rsr, 0, DECORSR_JR0); while (!(rd_reg32(&ctrl->deco_rsr) & DECORSR_VALID) && --timeout) @@ -115,7 +118,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, timeout = 100000; } - setbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, 0, DECORR_RQD0ENABLE); while (!(rd_reg32(&ctrl->deco_rq) & DECORR_DEN0) && --timeout) @@ -123,12 +126,12 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, if (!timeout) { dev_err(ctrldev, "failed to acquire DECO 0\n"); - clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0); return -ENODEV; } for (i = 0; i < desc_len(desc); i++) - wr_reg32(&deco->descbuf[i], *(desc + i)); + wr_reg32(&deco->descbuf[i], caam32_to_cpu(*(desc + i))); flags = DECO_JQCR_WHL; /* @@ -139,7 +142,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, flags |= DECO_JQCR_FOUR; /* Instruct the DECO to execute it */ - setbits32(&deco->jr_ctl_hi, flags); + clrsetbits_32(&deco->jr_ctl_hi, 0, flags); timeout = 10000000; do { @@ -158,10 +161,10 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, DECO_OP_STATUS_HI_ERR_MASK; if (ctrlpriv->virt_en == 1) - clrbits32(&ctrl->deco_rsr, DECORSR_JR0); + clrsetbits_32(&ctrl->deco_rsr, DECORSR_JR0, 0); /* Mark the DECO as free */ - clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0); if (!timeout) return -EAGAIN; @@ -349,7 +352,7 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) r4tst = &ctrl->r4tst[0]; /* put RNG4 into program mode */ - setbits32(&r4tst->rtmctl, RTMCTL_PRGM); + clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM); /* * Performance-wise, it does not make sense to @@ -363,7 +366,7 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) >> RTSDCTL_ENT_DLY_SHIFT; if (ent_delay <= val) { /* put RNG4 into run mode */ - clrbits32(&r4tst->rtmctl, RTMCTL_PRGM); + clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, 0); return; } @@ -381,9 +384,9 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) * select raw sampling in both entropy shifter * and statistical checker */ - setbits32(&val, RTMCTL_SAMP_MODE_RAW_ES_SC); + clrsetbits_32(&val, 0, RTMCTL_SAMP_MODE_RAW_ES_SC); /* put RNG4 into run mode */ - clrbits32(&val, RTMCTL_PRGM); + clrsetbits_32(&val, RTMCTL_PRGM, 0); /* write back the control register */ wr_reg32(&r4tst->rtmctl, val); } @@ -406,6 +409,23 @@ int caam_get_era(void) } EXPORT_SYMBOL(caam_get_era); +#ifdef CONFIG_DEBUG_FS +static int caam_debugfs_u64_get(void *data, u64 *val) +{ + *val = caam64_to_cpu(*(u64 *)data); + return 0; +} + +static int caam_debugfs_u32_get(void *data, u64 *val) +{ + *val = caam32_to_cpu(*(u32 *)data); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n"); +#endif + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { @@ -504,6 +524,10 @@ static int caam_probe(struct platform_device *pdev) ret = -ENOMEM; goto disable_caam_emi_slow; } + + caam_little_end = !(bool)(rd_reg32(&ctrl->perfmon.status) & + (CSTA_PLEND | CSTA_ALT_PLEND)); + /* Finding the page size for using the CTPR_MS register */ comp_params = rd_reg32(&ctrl->perfmon.comp_parms_ms); pg_size = (comp_params & CTPR_MS_PG_SZ_MASK) >> CTPR_MS_PG_SZ_SHIFT; @@ -559,9 +583,9 @@ static int caam_probe(struct platform_device *pdev) } if (ctrlpriv->virt_en == 1) - setbits32(&ctrl->jrstart, JRSTART_JR0_START | - JRSTART_JR1_START | JRSTART_JR2_START | - JRSTART_JR3_START); + clrsetbits_32(&ctrl->jrstart, 0, JRSTART_JR0_START | + JRSTART_JR1_START | JRSTART_JR2_START | + JRSTART_JR3_START); if (sizeof(dma_addr_t) == sizeof(u64)) if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) @@ -693,7 +717,7 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK; /* Enable RDB bit so that RNG works faster */ - setbits32(&ctrl->scfgr, SCFGR_RDBENABLE); + clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE); } /* NOTE: RTIC detection ought to go here, around Si time */ @@ -719,48 +743,59 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->ctl = debugfs_create_dir("ctl", ctrlpriv->dfs_root); /* Controller-level - performance monitor counters */ + ctrlpriv->ctl_rq_dequeued = - debugfs_create_u64("rq_dequeued", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->req_dequeued); + debugfs_create_file("rq_dequeued", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->req_dequeued, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_enc_req = - debugfs_create_u64("ob_rq_encrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_enc_req); + debugfs_create_file("ob_rq_encrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_enc_req, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_dec_req = - debugfs_create_u64("ib_rq_decrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_dec_req); + debugfs_create_file("ib_rq_decrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_dec_req, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_enc_bytes = - debugfs_create_u64("ob_bytes_encrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_enc_bytes); + debugfs_create_file("ob_bytes_encrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_enc_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_prot_bytes = - debugfs_create_u64("ob_bytes_protected", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_prot_bytes); + debugfs_create_file("ob_bytes_protected", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_prot_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_dec_bytes = - debugfs_create_u64("ib_bytes_decrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_dec_bytes); + debugfs_create_file("ib_bytes_decrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_dec_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_valid_bytes = - debugfs_create_u64("ib_bytes_validated", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_valid_bytes); + debugfs_create_file("ib_bytes_validated", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_valid_bytes, + &caam_fops_u64_ro); /* Controller level - global status values */ ctrlpriv->ctl_faultaddr = - debugfs_create_u64("fault_addr", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->faultaddr); + debugfs_create_file("fault_addr", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->faultaddr, + &caam_fops_u32_ro); ctrlpriv->ctl_faultdetail = - debugfs_create_u32("fault_detail", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->faultdetail); + debugfs_create_file("fault_detail", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->faultdetail, + &caam_fops_u32_ro); ctrlpriv->ctl_faultstatus = - debugfs_create_u32("fault_status", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->status); + debugfs_create_file("fault_status", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->status, + &caam_fops_u32_ro); /* Internal covering keys (useful in non-secure mode only) */ ctrlpriv->ctl_kek_wrap.data = &ctrlpriv->ctrl->kek[0]; diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 1e93c6af2..26427c11a 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -20,19 +20,18 @@ #define SEC4_SG_BPID_MASK 0x000000ff #define SEC4_SG_BPID_SHIFT 16 #define SEC4_SG_LEN_MASK 0x3fffffff /* Excludes EXT and FINAL */ -#define SEC4_SG_OFFS_MASK 0x00001fff +#define SEC4_SG_OFFSET_MASK 0x00001fff struct sec4_sg_entry { -#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX +#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \ + defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX) u32 rsvd1; dma_addr_t ptr; #else u64 ptr; #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */ u32 len; - u8 rsvd2; - u8 buf_pool_id; - u16 offset; + u32 bpid_offset; }; /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */ @@ -454,6 +453,8 @@ struct sec4_sg_entry { #define OP_PCLID_PUBLICKEYPAIR (0x14 << OP_PCLID_SHIFT) #define OP_PCLID_DSASIGN (0x15 << OP_PCLID_SHIFT) #define OP_PCLID_DSAVERIFY (0x16 << OP_PCLID_SHIFT) +#define OP_PCLID_RSAENC_PUBKEY (0x18 << OP_PCLID_SHIFT) +#define OP_PCLID_RSADEC_PRVKEY (0x19 << OP_PCLID_SHIFT) /* Assuming OP_TYPE = OP_TYPE_DECAP_PROTOCOL/ENCAP_PROTOCOL */ #define OP_PCLID_IPSEC (0x01 << OP_PCLID_SHIFT) diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index 98d07de24..d3869b95e 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -5,6 +5,7 @@ */ #include "desc.h" +#include "regs.h" #define IMMEDIATE (1 << 23) #define CAAM_CMD_SZ sizeof(u32) @@ -30,9 +31,11 @@ LDST_SRCDST_WORD_DECOCTRL | \ (LDOFF_ENABLE_AUTO_NFIFO << LDST_OFFSET_SHIFT)) +extern bool caam_little_end; + static inline int desc_len(u32 *desc) { - return *desc & HDR_DESCLEN_MASK; + return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK; } static inline int desc_bytes(void *desc) @@ -52,7 +55,7 @@ static inline void *sh_desc_pdb(u32 *desc) static inline void init_desc(u32 *desc, u32 options) { - *desc = (options | HDR_ONE) + 1; + *desc = cpu_to_caam32((options | HDR_ONE) + 1); } static inline void init_sh_desc(u32 *desc, u32 options) @@ -74,13 +77,21 @@ static inline void init_job_desc(u32 *desc, u32 options) init_desc(desc, CMD_DESC_HDR | options); } +static inline void init_job_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes) +{ + u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; + + init_job_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT)) | options); +} + static inline void append_ptr(u32 *desc, dma_addr_t ptr) { dma_addr_t *offset = (dma_addr_t *)desc_end(desc); - *offset = ptr; + *offset = cpu_to_caam_dma(ptr); - (*desc) += CAAM_PTR_SZ / CAAM_CMD_SZ; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + + CAAM_PTR_SZ / CAAM_CMD_SZ); } static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len, @@ -99,16 +110,17 @@ static inline void append_data(u32 *desc, void *data, int len) if (len) /* avoid sparse warning: memcpy with byte count of 0 */ memcpy(offset, data, len); - (*desc) += (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + + (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ); } static inline void append_cmd(u32 *desc, u32 command) { u32 *cmd = desc_end(desc); - *cmd = command; + *cmd = cpu_to_caam32(command); - (*desc)++; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 1); } #define append_u32 append_cmd @@ -117,16 +129,22 @@ static inline void append_u64(u32 *desc, u64 data) { u32 *offset = desc_end(desc); - *offset = upper_32_bits(data); - *(++offset) = lower_32_bits(data); + /* Only 32-bit alignment is guaranteed in descriptor buffer */ + if (caam_little_end) { + *offset = cpu_to_caam32(lower_32_bits(data)); + *(++offset) = cpu_to_caam32(upper_32_bits(data)); + } else { + *offset = cpu_to_caam32(upper_32_bits(data)); + *(++offset) = cpu_to_caam32(lower_32_bits(data)); + } - (*desc) += 2; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 2); } /* Write command without affecting header, and return pointer to next word */ static inline u32 *write_cmd(u32 *desc, u32 command) { - *desc = command; + *desc = cpu_to_caam32(command); return desc + 1; } @@ -168,14 +186,17 @@ APPEND_CMD_RET(move, MOVE) static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd) { - *jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc)); + *jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) | + (desc_len(desc) - (jump_cmd - desc))); } static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd) { - *move_cmd &= ~MOVE_OFFSET_MASK; - *move_cmd = *move_cmd | ((desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & - MOVE_OFFSET_MASK); + u32 val = caam32_to_cpu(*move_cmd); + + val &= ~MOVE_OFFSET_MASK; + val |= (desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & MOVE_OFFSET_MASK; + *move_cmd = cpu_to_caam32(val); } #define APPEND_CMD(cmd, op) \ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 5ef4be22e..a81f551ac 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -31,7 +31,7 @@ static int caam_reset_hw_jr(struct device *dev) * mask interrupts since we are going to poll * for reset completion status */ - setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JRCFG_IMSK); /* initiate flush (required prior to reset) */ wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET); @@ -57,7 +57,7 @@ static int caam_reset_hw_jr(struct device *dev) } /* unmask interrupts */ - clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0); return 0; } @@ -147,7 +147,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) } /* mask valid interrupts */ - setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JRCFG_IMSK); /* Have valid interrupt at this point, just ACK and trigger */ wr_reg32(&jrp->rregs->jrintstatus, irqstate); @@ -182,7 +182,7 @@ static void caam_jr_dequeue(unsigned long devarg) sw_idx = (tail + i) & (JOBR_DEPTH - 1); if (jrp->outring[hw_idx].desc == - jrp->entinfo[sw_idx].desc_addr_dma) + caam_dma_to_cpu(jrp->entinfo[sw_idx].desc_addr_dma)) break; /* found */ } /* we should never fail to find a matching descriptor */ @@ -200,7 +200,7 @@ static void caam_jr_dequeue(unsigned long devarg) usercall = jrp->entinfo[sw_idx].callbk; userarg = jrp->entinfo[sw_idx].cbkarg; userdesc = jrp->entinfo[sw_idx].desc_addr_virt; - userstatus = jrp->outring[hw_idx].jrstatus; + userstatus = caam32_to_cpu(jrp->outring[hw_idx].jrstatus); /* * Make sure all information from the job has been obtained @@ -236,7 +236,7 @@ static void caam_jr_dequeue(unsigned long devarg) } /* reenable / unmask IRQs */ - clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0); } /** @@ -330,7 +330,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, int head, tail, desc_size; dma_addr_t desc_dma; - desc_size = (*desc & HDR_JD_LENGTH_MASK) * sizeof(u32); + desc_size = (caam32_to_cpu(*desc) & HDR_JD_LENGTH_MASK) * sizeof(u32); desc_dma = dma_map_single(dev, desc, desc_size, DMA_TO_DEVICE); if (dma_mapping_error(dev, desc_dma)) { dev_err(dev, "caam_jr_enqueue(): can't map jobdesc\n"); @@ -356,7 +356,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, head_entry->cbkarg = areq; head_entry->desc_addr_dma = desc_dma; - jrp->inpring[jrp->inp_ring_write_index] = desc_dma; + jrp->inpring[jrp->inp_ring_write_index] = cpu_to_caam_dma(desc_dma); /* * Guarantee that the descriptor's DMA address has been written to @@ -444,9 +444,9 @@ static int caam_jr_init(struct device *dev) spin_lock_init(&jrp->outlock); /* Select interrupt coalescing parameters */ - setbits32(&jrp->rregs->rconfig_lo, JOBR_INTC | - (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) | - (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT)); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JOBR_INTC | + (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) | + (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT)); return 0; diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index 3a87c0cf8..aaa00dd1c 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h @@ -1,18 +1,19 @@ /* * CAAM Protocol Data Block (PDB) definition header file * - * Copyright 2008-2012 Freescale Semiconductor, Inc. + * Copyright 2008-2016 Freescale Semiconductor, Inc. * */ #ifndef CAAM_PDB_H #define CAAM_PDB_H +#include "compat.h" /* * PDB- IPSec ESP Header Modification Options */ -#define PDBHMO_ESP_DECAP_SHIFT 12 -#define PDBHMO_ESP_ENCAP_SHIFT 4 +#define PDBHMO_ESP_DECAP_SHIFT 28 +#define PDBHMO_ESP_ENCAP_SHIFT 28 /* * Encap and Decap - Decrement TTL (Hop Limit) - Based on the value of the * Options Byte IP version (IPvsn) field: @@ -32,12 +33,23 @@ */ #define PDBHMO_ESP_DFBIT (0x04 << PDBHMO_ESP_ENCAP_SHIFT) +#define PDBNH_ESP_ENCAP_SHIFT 16 +#define PDBNH_ESP_ENCAP_MASK (0xff << PDBNH_ESP_ENCAP_SHIFT) + +#define PDBHDRLEN_ESP_DECAP_SHIFT 16 +#define PDBHDRLEN_MASK (0x0fff << PDBHDRLEN_ESP_DECAP_SHIFT) + +#define PDB_NH_OFFSET_SHIFT 8 +#define PDB_NH_OFFSET_MASK (0xff << PDB_NH_OFFSET_SHIFT) + /* * PDB - IPSec ESP Encap/Decap Options */ #define PDBOPTS_ESP_ARSNONE 0x00 /* no antireplay window */ #define PDBOPTS_ESP_ARS32 0x40 /* 32-entry antireplay window */ +#define PDBOPTS_ESP_ARS128 0x80 /* 128-entry antireplay window */ #define PDBOPTS_ESP_ARS64 0xc0 /* 64-entry antireplay window */ +#define PDBOPTS_ESP_ARS_MASK 0xc0 /* antireplay window mask */ #define PDBOPTS_ESP_IVSRC 0x20 /* IV comes from internal random gen */ #define PDBOPTS_ESP_ESN 0x10 /* extended sequence included */ #define PDBOPTS_ESP_OUTFMT 0x08 /* output only decapsulation (decap) */ @@ -54,35 +66,73 @@ /* * General IPSec encap/decap PDB definitions */ + +/** + * ipsec_encap_cbc - PDB part for IPsec CBC encapsulation + * @iv: 16-byte array initialization vector + */ struct ipsec_encap_cbc { - u32 iv[4]; + u8 iv[16]; }; +/** + * ipsec_encap_ctr - PDB part for IPsec CTR encapsulation + * @ctr_nonce: 4-byte array nonce + * @ctr_initial: initial count constant + * @iv: initialization vector + */ struct ipsec_encap_ctr { - u32 ctr_nonce; + u8 ctr_nonce[4]; u32 ctr_initial; - u32 iv[2]; + u64 iv; }; +/** + * ipsec_encap_ccm - PDB part for IPsec CCM encapsulation + * @salt: 3-byte array salt (lower 24 bits) + * @ccm_opt: CCM algorithm options - MSB-LSB description: + * b0_flags (8b) - CCM B0; use 0x5B for 8-byte ICV, 0x6B for 12-byte ICV, + * 0x7B for 16-byte ICV (cf. RFC4309, RFC3610) + * ctr_flags (8b) - counter flags; constant equal to 0x3 + * ctr_initial (16b) - initial count constant + * @iv: initialization vector + */ struct ipsec_encap_ccm { - u32 salt; /* lower 24 bits */ - u8 b0_flags; - u8 ctr_flags; - u16 ctr_initial; - u32 iv[2]; + u8 salt[4]; + u32 ccm_opt; + u64 iv; }; +/** + * ipsec_encap_gcm - PDB part for IPsec GCM encapsulation + * @salt: 3-byte array salt (lower 24 bits) + * @rsvd: reserved, do not use + * @iv: initialization vector + */ struct ipsec_encap_gcm { - u32 salt; /* lower 24 bits */ + u8 salt[4]; u32 rsvd1; - u32 iv[2]; + u64 iv; }; +/** + * ipsec_encap_pdb - PDB for IPsec encapsulation + * @options: MSB-LSB description + * hmo (header manipulation options) - 4b + * reserved - 4b + * next header - 8b + * next header offset - 8b + * option flags (depend on selected algorithm) - 8b + * @seq_num_ext_hi: (optional) IPsec Extended Sequence Number (ESN) + * @seq_num: IPsec sequence number + * @spi: IPsec SPI (Security Parameters Index) + * @ip_hdr_len: optional IP Header length (in bytes) + * reserved - 16b + * Opt. IP Hdr Len - 16b + * @ip_hdr: optional IP Header content + */ struct ipsec_encap_pdb { - u8 hmo_rsvd; - u8 ip_nh; - u8 ip_nh_offset; - u8 options; + u32 options; u32 seq_num_ext_hi; u32 seq_num; union { @@ -92,36 +142,65 @@ struct ipsec_encap_pdb { struct ipsec_encap_gcm gcm; }; u32 spi; - u16 rsvd1; - u16 ip_hdr_len; - u32 ip_hdr[0]; /* optional IP Header content */ + u32 ip_hdr_len; + u32 ip_hdr[0]; }; +/** + * ipsec_decap_cbc - PDB part for IPsec CBC decapsulation + * @rsvd: reserved, do not use + */ struct ipsec_decap_cbc { u32 rsvd[2]; }; +/** + * ipsec_decap_ctr - PDB part for IPsec CTR decapsulation + * @ctr_nonce: 4-byte array nonce + * @ctr_initial: initial count constant + */ struct ipsec_decap_ctr { - u32 salt; + u8 ctr_nonce[4]; u32 ctr_initial; }; +/** + * ipsec_decap_ccm - PDB part for IPsec CCM decapsulation + * @salt: 3-byte salt (lower 24 bits) + * @ccm_opt: CCM algorithm options - MSB-LSB description: + * b0_flags (8b) - CCM B0; use 0x5B for 8-byte ICV, 0x6B for 12-byte ICV, + * 0x7B for 16-byte ICV (cf. RFC4309, RFC3610) + * ctr_flags (8b) - counter flags; constant equal to 0x3 + * ctr_initial (16b) - initial count constant + */ struct ipsec_decap_ccm { - u32 salt; - u8 iv_flags; - u8 ctr_flags; - u16 ctr_initial; + u8 salt[4]; + u32 ccm_opt; }; +/** + * ipsec_decap_gcm - PDB part for IPsec GCN decapsulation + * @salt: 4-byte salt + * @rsvd: reserved, do not use + */ struct ipsec_decap_gcm { - u32 salt; + u8 salt[4]; u32 resvd; }; +/** + * ipsec_decap_pdb - PDB for IPsec decapsulation + * @options: MSB-LSB description + * hmo (header manipulation options) - 4b + * IP header length - 12b + * next header offset - 8b + * option flags (depend on selected algorithm) - 8b + * @seq_num_ext_hi: (optional) IPsec Extended Sequence Number (ESN) + * @seq_num: IPsec sequence number + * @anti_replay: Anti-replay window; size depends on ARS (option flags) + */ struct ipsec_decap_pdb { - u16 hmo_ip_hdr_len; - u8 ip_nh_offset; - u8 options; + u32 options; union { struct ipsec_decap_cbc cbc; struct ipsec_decap_ctr ctr; @@ -130,8 +209,7 @@ struct ipsec_decap_pdb { }; u32 seq_num_ext_hi; u32 seq_num; - u32 anti_replay[2]; - u32 end_index[0]; + __be32 anti_replay[4]; }; /* @@ -399,4 +477,52 @@ struct dsa_verify_pdb { u8 *ab; /* only used if ECC processing */ }; +/* RSA Protocol Data Block */ +#define RSA_PDB_SGF_SHIFT 28 +#define RSA_PDB_E_SHIFT 12 +#define RSA_PDB_E_MASK (0xFFF << RSA_PDB_E_SHIFT) +#define RSA_PDB_D_SHIFT 12 +#define RSA_PDB_D_MASK (0xFFF << RSA_PDB_D_SHIFT) + +#define RSA_PDB_SGF_F (0x8 << RSA_PDB_SGF_SHIFT) +#define RSA_PDB_SGF_G (0x4 << RSA_PDB_SGF_SHIFT) +#define RSA_PRIV_PDB_SGF_F (0x4 << RSA_PDB_SGF_SHIFT) +#define RSA_PRIV_PDB_SGF_G (0x8 << RSA_PDB_SGF_SHIFT) + +#define RSA_PRIV_KEY_FRM_1 0 + +/** + * RSA Encrypt Protocol Data Block + * @sgf: scatter-gather field + * @f_dma: dma address of input data + * @g_dma: dma address of encrypted output data + * @n_dma: dma address of RSA modulus + * @e_dma: dma address of RSA public exponent + * @f_len: length in octets of the input data + */ +struct rsa_pub_pdb { + u32 sgf; + dma_addr_t f_dma; + dma_addr_t g_dma; + dma_addr_t n_dma; + dma_addr_t e_dma; + u32 f_len; +} __packed; + +/** + * RSA Decrypt PDB - Private Key Form #1 + * @sgf: scatter-gather field + * @g_dma: dma address of encrypted input data + * @f_dma: dma address of output data + * @n_dma: dma address of RSA modulus + * @d_dma: dma address of RSA private exponent + */ +struct rsa_priv_f1_pdb { + u32 sgf; + dma_addr_t g_dma; + dma_addr_t f_dma; + dma_addr_t n_dma; + dma_addr_t d_dma; +} __packed; + #endif diff --git a/drivers/crypto/caam/pkc_desc.c b/drivers/crypto/caam/pkc_desc.c new file mode 100644 index 000000000..4e4183e61 --- /dev/null +++ b/drivers/crypto/caam/pkc_desc.c @@ -0,0 +1,36 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography descriptors + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ +#include "caampkc.h" +#include "desc_constr.h" + +/* Descriptor for RSA Public operation */ +void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->n_dma); + append_ptr(desc, pdb->e_dma); + append_cmd(desc, pdb->f_len); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSAENC_PUBKEY); +} + +/* Descriptor for RSA Private operation - Private Key Form #1 */ +void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->n_dma); + append_ptr(desc, pdb->d_dma); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY | + RSA_PRIV_KEY_FRM_1); +} diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 0ba9c4059..b3c5016f6 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -8,6 +8,7 @@ #define REGS_H #include <linux/types.h> +#include <linux/bitops.h> #include <linux/io.h> /* @@ -65,46 +66,56 @@ * */ -#ifdef CONFIG_ARM -/* These are common macros for Power, put here for ARM */ -#define setbits32(_addr, _v) writel((readl(_addr) | (_v)), (_addr)) -#define clrbits32(_addr, _v) writel((readl(_addr) & ~(_v)), (_addr)) +extern bool caam_little_end; -#define out_arch(type, endian, a, v) __raw_write##type(cpu_to_##endian(v), a) -#define in_arch(type, endian, a) endian##_to_cpu(__raw_read##type(a)) +#define caam_to_cpu(len) \ +static inline u##len caam##len ## _to_cpu(u##len val) \ +{ \ + if (caam_little_end) \ + return le##len ## _to_cpu(val); \ + else \ + return be##len ## _to_cpu(val); \ +} -#define out_le32(a, v) out_arch(l, le32, a, v) -#define in_le32(a) in_arch(l, le32, a) +#define cpu_to_caam(len) \ +static inline u##len cpu_to_caam##len(u##len val) \ +{ \ + if (caam_little_end) \ + return cpu_to_le##len(val); \ + else \ + return cpu_to_be##len(val); \ +} -#define out_be32(a, v) out_arch(l, be32, a, v) -#define in_be32(a) in_arch(l, be32, a) +caam_to_cpu(16) +caam_to_cpu(32) +caam_to_cpu(64) +cpu_to_caam(16) +cpu_to_caam(32) +cpu_to_caam(64) -#define clrsetbits(type, addr, clear, set) \ - out_##type((addr), (in_##type(addr) & ~(clear)) | (set)) +static inline void wr_reg32(void __iomem *reg, u32 data) +{ + if (caam_little_end) + iowrite32(data, reg); + else + iowrite32be(data, reg); +} -#define clrsetbits_be32(addr, clear, set) clrsetbits(be32, addr, clear, set) -#define clrsetbits_le32(addr, clear, set) clrsetbits(le32, addr, clear, set) -#endif +static inline u32 rd_reg32(void __iomem *reg) +{ + if (caam_little_end) + return ioread32(reg); -#ifdef __BIG_ENDIAN -#define wr_reg32(reg, data) out_be32(reg, data) -#define rd_reg32(reg) in_be32(reg) -#define clrsetbits_32(addr, clear, set) clrsetbits_be32(addr, clear, set) -#ifdef CONFIG_64BIT -#define wr_reg64(reg, data) out_be64(reg, data) -#define rd_reg64(reg) in_be64(reg) -#endif -#else -#ifdef __LITTLE_ENDIAN -#define wr_reg32(reg, data) __raw_writel(data, reg) -#define rd_reg32(reg) __raw_readl(reg) -#define clrsetbits_32(addr, clear, set) clrsetbits_le32(addr, clear, set) -#ifdef CONFIG_64BIT -#define wr_reg64(reg, data) __raw_writeq(data, reg) -#define rd_reg64(reg) __raw_readq(reg) -#endif -#endif -#endif + return ioread32be(reg); +} + +static inline void clrsetbits_32(void __iomem *reg, u32 clear, u32 set) +{ + if (caam_little_end) + iowrite32((ioread32(reg) & ~clear) | set, reg); + else + iowrite32be((ioread32be(reg) & ~clear) | set, reg); +} /* * The only users of these wr/rd_reg64 functions is the Job Ring (JR). @@ -123,29 +134,67 @@ * base + 0x0000 : least-significant 32 bits * base + 0x0004 : most-significant 32 bits */ +#ifdef CONFIG_64BIT +static inline void wr_reg64(void __iomem *reg, u64 data) +{ + if (caam_little_end) + iowrite64(data, reg); + else + iowrite64be(data, reg); +} -#ifndef CONFIG_64BIT -#if !defined(CONFIG_CRYPTO_DEV_FSL_CAAM_LE) || \ - defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX) -#define REG64_MS32(reg) ((u32 __iomem *)(reg)) -#define REG64_LS32(reg) ((u32 __iomem *)(reg) + 1) -#else -#define REG64_MS32(reg) ((u32 __iomem *)(reg) + 1) -#define REG64_LS32(reg) ((u32 __iomem *)(reg)) -#endif - -static inline void wr_reg64(u64 __iomem *reg, u64 data) +static inline u64 rd_reg64(void __iomem *reg) { - wr_reg32(REG64_MS32(reg), data >> 32); - wr_reg32(REG64_LS32(reg), data); + if (caam_little_end) + return ioread64(reg); + else + return ioread64be(reg); } -static inline u64 rd_reg64(u64 __iomem *reg) +#else /* CONFIG_64BIT */ +static inline void wr_reg64(void __iomem *reg, u64 data) { - return ((u64)rd_reg32(REG64_MS32(reg)) << 32 | - (u64)rd_reg32(REG64_LS32(reg))); +#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX + if (caam_little_end) { + wr_reg32((u32 __iomem *)(reg) + 1, data >> 32); + wr_reg32((u32 __iomem *)(reg), data); + } else +#endif + { + wr_reg32((u32 __iomem *)(reg), data >> 32); + wr_reg32((u32 __iomem *)(reg) + 1, data); + } } + +static inline u64 rd_reg64(void __iomem *reg) +{ +#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX + if (caam_little_end) + return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 | + (u64)rd_reg32((u32 __iomem *)(reg))); + else #endif + return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | + (u64)rd_reg32((u32 __iomem *)(reg) + 1)); +} +#endif /* CONFIG_64BIT */ + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +#ifdef CONFIG_SOC_IMX7D +#define cpu_to_caam_dma(value) \ + (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \ + (u64)cpu_to_caam32(upper_32_bits(value))) +#define caam_dma_to_cpu(value) \ + (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \ + (u64)caam32_to_cpu(upper_32_bits(value))) +#else +#define cpu_to_caam_dma(value) cpu_to_caam64(value) +#define caam_dma_to_cpu(value) caam64_to_cpu(value) +#endif /* CONFIG_SOC_IMX7D */ +#else +#define cpu_to_caam_dma(value) cpu_to_caam32(value) +#define caam_dma_to_cpu(value) caam32_to_cpu(value) +#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */ /* * jr_outentry @@ -249,6 +298,8 @@ struct caam_perfmon { u32 faultliodn; /* FALR - Fault Address LIODN */ u32 faultdetail; /* FADR - Fault Addr Detail */ u32 rsvd2; +#define CSTA_PLEND BIT(10) +#define CSTA_ALT_PLEND BIT(18) u32 status; /* CSTA - CAAM Status */ u64 rsvd3; diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index 12ec6616e..19dc64fed 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -5,18 +5,19 @@ * */ +#include "regs.h" + struct sec4_sg_entry; /* * convert single dma address to h/w link table format */ static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, - dma_addr_t dma, u32 len, u32 offset) + dma_addr_t dma, u32 len, u16 offset) { - sec4_sg_ptr->ptr = dma; - sec4_sg_ptr->len = len; - sec4_sg_ptr->buf_pool_id = 0; - sec4_sg_ptr->offset = offset; + sec4_sg_ptr->ptr = cpu_to_caam_dma(dma); + sec4_sg_ptr->len = cpu_to_caam32(len); + sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK); #ifdef DEBUG print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ", DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr, @@ -30,7 +31,7 @@ static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, */ static inline struct sec4_sg_entry * sg_to_sec4_sg(struct scatterlist *sg, int sg_count, - struct sec4_sg_entry *sec4_sg_ptr, u32 offset) + struct sec4_sg_entry *sec4_sg_ptr, u16 offset) { while (sg_count) { dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), @@ -48,10 +49,10 @@ sg_to_sec4_sg(struct scatterlist *sg, int sg_count, */ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count, struct sec4_sg_entry *sec4_sg_ptr, - u32 offset) + u16 offset) { sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset); - sec4_sg_ptr->len |= SEC4_SG_LEN_FIN; + sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN); } static inline struct sec4_sg_entry *sg_to_sec4_sg_len( diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 0d0d4529e..58a4244b4 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -14,9 +14,8 @@ #include <linux/sched.h> #include <linux/delay.h> #include <linux/scatterlist.h> -#include <linux/crypto.h> -#include <crypto/algapi.h> #include <crypto/aes.h> +#include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include "ccp-crypto.h" @@ -110,15 +109,12 @@ static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, ctx->u.aes.key_len = key_len / 2; sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); - return crypto_ablkcipher_setkey(ctx->u.aes.tfm_ablkcipher, key, - key_len); + return crypto_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len); } static int ccp_aes_xts_crypt(struct ablkcipher_request *req, unsigned int encrypt) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); unsigned int unit; @@ -146,14 +142,19 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) || (ctx->u.aes.key_len != AES_KEYSIZE_128)) { + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher); + /* Use the fallback to process the request for any * unsupported unit sizes or key sizes */ - ablkcipher_request_set_tfm(req, ctx->u.aes.tfm_ablkcipher); - ret = (encrypt) ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); - + skcipher_request_set_tfm(subreq, ctx->u.aes.tfm_skcipher); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + ret = encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return ret; } @@ -192,23 +193,21 @@ static int ccp_aes_xts_decrypt(struct ablkcipher_request *req) static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm) { struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_ablkcipher *fallback_tfm; + struct crypto_skcipher *fallback_tfm; ctx->complete = ccp_aes_xts_complete; ctx->u.aes.key_len = 0; - fallback_tfm = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), 0, - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + fallback_tfm = crypto_alloc_skcipher("xts(aes)", 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback_tfm)) { - pr_warn("could not load fallback driver %s\n", - crypto_tfm_alg_name(tfm)); + pr_warn("could not load fallback driver xts(aes)\n"); return PTR_ERR(fallback_tfm); } - ctx->u.aes.tfm_ablkcipher = fallback_tfm; + ctx->u.aes.tfm_skcipher = fallback_tfm; - tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx) + - fallback_tfm->base.crt_ablkcipher.reqsize; + tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx); return 0; } @@ -217,9 +216,7 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm) { struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->u.aes.tfm_ablkcipher) - crypto_free_ablkcipher(ctx->u.aes.tfm_ablkcipher); - ctx->u.aes.tfm_ablkcipher = NULL; + crypto_free_skcipher(ctx->u.aes.tfm_skcipher); } static int ccp_register_aes_xts_alg(struct list_head *head, diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h index a326ec20b..8335b32e8 100644 --- a/drivers/crypto/ccp/ccp-crypto.h +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -17,7 +17,6 @@ #include <linux/wait.h> #include <linux/pci.h> #include <linux/ccp.h> -#include <linux/crypto.h> #include <crypto/algapi.h> #include <crypto/aes.h> #include <crypto/ctr.h> @@ -69,7 +68,7 @@ static inline struct ccp_crypto_ahash_alg * /***** AES related defines *****/ struct ccp_aes_ctx { /* Fallback cipher for XTS with unsupported unit sizes */ - struct crypto_ablkcipher *tfm_ablkcipher; + struct crypto_skcipher *tfm_skcipher; /* Cipher used to generate CMAC K1/K2 keys */ struct crypto_cipher *tfm_cipher; diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index e8ef9fd24..d64af8625 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -31,22 +31,42 @@ #include "cesa.h" +/* Limit of the crypto queue before reaching the backlog */ +#define CESA_CRYPTO_DEFAULT_MAX_QLEN 128 + static int allhwsupport = !IS_ENABLED(CONFIG_CRYPTO_DEV_MV_CESA); module_param_named(allhwsupport, allhwsupport, int, 0444); MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if overlaps with the mv_cesa driver)"); struct mv_cesa_dev *cesa_dev; -static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine) +struct crypto_async_request * +mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine, + struct crypto_async_request **backlog) { - struct crypto_async_request *req, *backlog; + struct crypto_async_request *req; + + *backlog = crypto_get_backlog(&engine->queue); + req = crypto_dequeue_request(&engine->queue); + + if (!req) + return NULL; + + return req; +} + +static void mv_cesa_rearm_engine(struct mv_cesa_engine *engine) +{ + struct crypto_async_request *req = NULL, *backlog = NULL; struct mv_cesa_ctx *ctx; - spin_lock_bh(&cesa_dev->lock); - backlog = crypto_get_backlog(&cesa_dev->queue); - req = crypto_dequeue_request(&cesa_dev->queue); - engine->req = req; - spin_unlock_bh(&cesa_dev->lock); + + spin_lock_bh(&engine->lock); + if (!engine->req) { + req = mv_cesa_dequeue_req_locked(engine, &backlog); + engine->req = req; + } + spin_unlock_bh(&engine->lock); if (!req) return; @@ -55,8 +75,47 @@ static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine) backlog->complete(backlog, -EINPROGRESS); ctx = crypto_tfm_ctx(req->tfm); - ctx->ops->prepare(req, engine); ctx->ops->step(req); + + return; +} + +static int mv_cesa_std_process(struct mv_cesa_engine *engine, u32 status) +{ + struct crypto_async_request *req; + struct mv_cesa_ctx *ctx; + int res; + + req = engine->req; + ctx = crypto_tfm_ctx(req->tfm); + res = ctx->ops->process(req, status); + + if (res == 0) { + ctx->ops->complete(req); + mv_cesa_engine_enqueue_complete_request(engine, req); + } else if (res == -EINPROGRESS) { + ctx->ops->step(req); + } + + return res; +} + +static int mv_cesa_int_process(struct mv_cesa_engine *engine, u32 status) +{ + if (engine->chain.first && engine->chain.last) + return mv_cesa_tdma_process(engine, status); + + return mv_cesa_std_process(engine, status); +} + +static inline void +mv_cesa_complete_req(struct mv_cesa_ctx *ctx, struct crypto_async_request *req, + int res) +{ + ctx->ops->cleanup(req); + local_bh_disable(); + req->complete(req, res); + local_bh_enable(); } static irqreturn_t mv_cesa_int(int irq, void *priv) @@ -83,49 +142,55 @@ static irqreturn_t mv_cesa_int(int irq, void *priv) writel(~status, engine->regs + CESA_SA_FPGA_INT_STATUS); writel(~status, engine->regs + CESA_SA_INT_STATUS); + /* Process fetched requests */ + res = mv_cesa_int_process(engine, status & mask); ret = IRQ_HANDLED; + spin_lock_bh(&engine->lock); req = engine->req; + if (res != -EINPROGRESS) + engine->req = NULL; spin_unlock_bh(&engine->lock); - if (req) { - ctx = crypto_tfm_ctx(req->tfm); - res = ctx->ops->process(req, status & mask); - if (res != -EINPROGRESS) { - spin_lock_bh(&engine->lock); - engine->req = NULL; - mv_cesa_dequeue_req_unlocked(engine); - spin_unlock_bh(&engine->lock); - ctx->ops->cleanup(req); - local_bh_disable(); - req->complete(req, res); - local_bh_enable(); - } else { - ctx->ops->step(req); - } + + ctx = crypto_tfm_ctx(req->tfm); + + if (res && res != -EINPROGRESS) + mv_cesa_complete_req(ctx, req, res); + + /* Launch the next pending request */ + mv_cesa_rearm_engine(engine); + + /* Iterate over the complete queue */ + while (true) { + req = mv_cesa_engine_dequeue_complete_request(engine); + if (!req) + break; + + mv_cesa_complete_req(ctx, req, 0); } } return ret; } -int mv_cesa_queue_req(struct crypto_async_request *req) +int mv_cesa_queue_req(struct crypto_async_request *req, + struct mv_cesa_req *creq) { int ret; - int i; + struct mv_cesa_engine *engine = creq->engine; - spin_lock_bh(&cesa_dev->lock); - ret = crypto_enqueue_request(&cesa_dev->queue, req); - spin_unlock_bh(&cesa_dev->lock); + spin_lock_bh(&engine->lock); + ret = crypto_enqueue_request(&engine->queue, req); + if ((mv_cesa_req_get_type(creq) == CESA_DMA_REQ) && + (ret == -EINPROGRESS || + (ret == -EBUSY && req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG))) + mv_cesa_tdma_chain(engine, creq); + spin_unlock_bh(&engine->lock); if (ret != -EINPROGRESS) return ret; - for (i = 0; i < cesa_dev->caps->nengines; i++) { - spin_lock_bh(&cesa_dev->engines[i].lock); - if (!cesa_dev->engines[i].req) - mv_cesa_dequeue_req_unlocked(&cesa_dev->engines[i]); - spin_unlock_bh(&cesa_dev->engines[i].lock); - } + mv_cesa_rearm_engine(engine); return -EINPROGRESS; } @@ -309,6 +374,10 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa) if (!dma->padding_pool) return -ENOMEM; + dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0); + if (!dma->iv_pool) + return -ENOMEM; + cesa->dma = dma; return 0; @@ -416,7 +485,7 @@ static int mv_cesa_probe(struct platform_device *pdev) return -ENOMEM; spin_lock_init(&cesa->lock); - crypto_init_queue(&cesa->queue, 50); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); cesa->regs = devm_ioremap_resource(dev, res); if (IS_ERR(cesa->regs)) @@ -489,6 +558,10 @@ static int mv_cesa_probe(struct platform_device *pdev) engine); if (ret) goto err_cleanup; + + crypto_init_queue(&engine->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN); + atomic_set(&engine->load, 0); + INIT_LIST_HEAD(&engine->complete_queue); } cesa_dev = cesa; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index 74071e45a..e423d33de 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -271,10 +271,13 @@ struct mv_cesa_op_ctx { /* TDMA descriptor flags */ #define CESA_TDMA_DST_IN_SRAM BIT(31) #define CESA_TDMA_SRC_IN_SRAM BIT(30) -#define CESA_TDMA_TYPE_MSK GENMASK(29, 0) +#define CESA_TDMA_END_OF_REQ BIT(29) +#define CESA_TDMA_BREAK_CHAIN BIT(28) +#define CESA_TDMA_TYPE_MSK GENMASK(27, 0) #define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DATA 1 #define CESA_TDMA_OP 2 +#define CESA_TDMA_IV 3 /** * struct mv_cesa_tdma_desc - TDMA descriptor @@ -390,6 +393,7 @@ struct mv_cesa_dev_dma { struct dma_pool *op_pool; struct dma_pool *cache_pool; struct dma_pool *padding_pool; + struct dma_pool *iv_pool; }; /** @@ -398,7 +402,6 @@ struct mv_cesa_dev_dma { * @regs: device registers * @sram_size: usable SRAM size * @lock: device lock - * @queue: crypto request queue * @engines: array of engines * @dma: dma pools * @@ -410,7 +413,6 @@ struct mv_cesa_dev { struct device *dev; unsigned int sram_size; spinlock_t lock; - struct crypto_queue queue; struct mv_cesa_engine *engines; struct mv_cesa_dev_dma *dma; }; @@ -429,6 +431,11 @@ struct mv_cesa_dev { * @int_mask: interrupt mask cache * @pool: memory pool pointing to the memory region reserved in * SRAM + * @queue: fifo of the pending crypto requests + * @load: engine load counter, useful for load balancing + * @chain: list of the current tdma descriptors being processed + * by this engine. + * @complete_queue: fifo of the processed requests by the engine * * Structure storing CESA engine information. */ @@ -444,23 +451,27 @@ struct mv_cesa_engine { size_t max_req_len; u32 int_mask; struct gen_pool *pool; + struct crypto_queue queue; + atomic_t load; + struct mv_cesa_tdma_chain chain; + struct list_head complete_queue; }; /** * struct mv_cesa_req_ops - CESA request operations - * @prepare: prepare a request to be executed on the specified engine * @process: process a request chunk result (should return 0 if the * operation, -EINPROGRESS if it needs more steps or an error * code) * @step: launch the crypto operation on the next chunk * @cleanup: cleanup the crypto request (release associated data) + * @complete: complete the request, i.e copy result or context from sram when + * needed. */ struct mv_cesa_req_ops { - void (*prepare)(struct crypto_async_request *req, - struct mv_cesa_engine *engine); int (*process)(struct crypto_async_request *req, u32 status); void (*step)(struct crypto_async_request *req); void (*cleanup)(struct crypto_async_request *req); + void (*complete)(struct crypto_async_request *req); }; /** @@ -507,21 +518,11 @@ enum mv_cesa_req_type { /** * struct mv_cesa_req - CESA request - * @type: request type * @engine: engine associated with this request + * @chain: list of tdma descriptors associated with this request */ struct mv_cesa_req { - enum mv_cesa_req_type type; struct mv_cesa_engine *engine; -}; - -/** - * struct mv_cesa_tdma_req - CESA TDMA request - * @base: base information - * @chain: TDMA chain - */ -struct mv_cesa_tdma_req { - struct mv_cesa_req base; struct mv_cesa_tdma_chain chain; }; @@ -538,13 +539,11 @@ struct mv_cesa_sg_std_iter { /** * struct mv_cesa_ablkcipher_std_req - cipher standard request - * @base: base information * @op: operation context * @offset: current operation offset * @size: size of the crypto operation */ struct mv_cesa_ablkcipher_std_req { - struct mv_cesa_req base; struct mv_cesa_op_ctx op; unsigned int offset; unsigned int size; @@ -558,34 +557,27 @@ struct mv_cesa_ablkcipher_std_req { * @dst_nents: number of entries in the dest sg list */ struct mv_cesa_ablkcipher_req { - union { - struct mv_cesa_req base; - struct mv_cesa_tdma_req dma; - struct mv_cesa_ablkcipher_std_req std; - } req; + struct mv_cesa_req base; + struct mv_cesa_ablkcipher_std_req std; int src_nents; int dst_nents; }; /** * struct mv_cesa_ahash_std_req - standard hash request - * @base: base information * @offset: current operation offset */ struct mv_cesa_ahash_std_req { - struct mv_cesa_req base; unsigned int offset; }; /** * struct mv_cesa_ahash_dma_req - DMA hash request - * @base: base information * @padding: padding buffer * @padding_dma: DMA address of the padding buffer * @cache_dma: DMA address of the cache buffer */ struct mv_cesa_ahash_dma_req { - struct mv_cesa_tdma_req base; u8 *padding; dma_addr_t padding_dma; u8 *cache; @@ -604,8 +596,8 @@ struct mv_cesa_ahash_dma_req { * @state: hash state */ struct mv_cesa_ahash_req { + struct mv_cesa_req base; union { - struct mv_cesa_req base; struct mv_cesa_ahash_dma_req dma; struct mv_cesa_ahash_std_req std; } req; @@ -623,6 +615,35 @@ struct mv_cesa_ahash_req { extern struct mv_cesa_dev *cesa_dev; + +static inline void +mv_cesa_engine_enqueue_complete_request(struct mv_cesa_engine *engine, + struct crypto_async_request *req) +{ + list_add_tail(&req->list, &engine->complete_queue); +} + +static inline struct crypto_async_request * +mv_cesa_engine_dequeue_complete_request(struct mv_cesa_engine *engine) +{ + struct crypto_async_request *req; + + req = list_first_entry_or_null(&engine->complete_queue, + struct crypto_async_request, + list); + if (req) + list_del(&req->list); + + return req; +} + + +static inline enum mv_cesa_req_type +mv_cesa_req_get_type(struct mv_cesa_req *req) +{ + return req->chain.first ? CESA_DMA_REQ : CESA_STD_REQ; +} + static inline void mv_cesa_update_op_cfg(struct mv_cesa_op_ctx *op, u32 cfg, u32 mask) { @@ -695,7 +716,32 @@ static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op) CESA_SA_DESC_CFG_FIRST_FRAG; } -int mv_cesa_queue_req(struct crypto_async_request *req); +int mv_cesa_queue_req(struct crypto_async_request *req, + struct mv_cesa_req *creq); + +struct crypto_async_request * +mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine, + struct crypto_async_request **backlog); + +static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight) +{ + int i; + u32 min_load = U32_MAX; + struct mv_cesa_engine *selected = NULL; + + for (i = 0; i < cesa_dev->caps->nengines; i++) { + struct mv_cesa_engine *engine = cesa_dev->engines + i; + u32 load = atomic_read(&engine->load); + if (load < min_load) { + min_load = load; + selected = engine; + } + } + + atomic_add(weight, &selected->load); + + return selected; +} /* * Helper function that indicates whether a crypto request needs to be @@ -765,9 +811,9 @@ static inline bool mv_cesa_req_dma_iter_next_op(struct mv_cesa_dma_iter *iter) return iter->op_len; } -void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq); +void mv_cesa_dma_step(struct mv_cesa_req *dreq); -static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq, +static inline int mv_cesa_dma_process(struct mv_cesa_req *dreq, u32 status) { if (!(status & CESA_SA_INT_ACC0_IDMA_DONE)) @@ -779,10 +825,13 @@ static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq, return 0; } -void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, +void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, struct mv_cesa_engine *engine); +void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq); +void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, + struct mv_cesa_req *dreq); +int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status); -void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq); static inline void mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain) @@ -790,6 +839,9 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain) memset(chain, 0, sizeof(*chain)); } +int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, + u32 size, u32 flags, gfp_t gfp_flags); + struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, const struct mv_cesa_op_ctx *op_templ, bool skip_ctx, diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index dcf1fceb9..d19dc9614 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -70,25 +70,28 @@ mv_cesa_ablkcipher_dma_cleanup(struct ablkcipher_request *req) dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_BIDIRECTIONAL); } - mv_cesa_dma_cleanup(&creq->req.dma); + mv_cesa_dma_cleanup(&creq->base); } static inline void mv_cesa_ablkcipher_cleanup(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ablkcipher_dma_cleanup(req); } static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; size_t len = min_t(size_t, req->nbytes - sreq->offset, CESA_SA_SRAM_PAYLOAD_SIZE); + mv_cesa_adjust_op(engine, &sreq->op); + memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); + len = sg_pcopy_to_buffer(req->src, creq->src_nents, engine->sram + CESA_SA_DATA_SRAM_OFFSET, len, sreq->offset); @@ -106,6 +109,8 @@ static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -113,8 +118,8 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, u32 status) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; size_t len; len = sg_pcopy_from_buffer(req->dst, creq->dst_nents, @@ -133,23 +138,12 @@ static int mv_cesa_ablkcipher_process(struct crypto_async_request *req, { struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; - int ret; - - if (creq->req.base.type == CESA_DMA_REQ) - ret = mv_cesa_dma_process(&creq->req.dma, status); - else - ret = mv_cesa_ablkcipher_std_process(ablkreq, status); - - if (ret) - return ret; + struct mv_cesa_req *basereq = &creq->base; - memcpy_fromio(ablkreq->info, - engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, - crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq))); + if (mv_cesa_req_get_type(basereq) == CESA_STD_REQ) + return mv_cesa_ablkcipher_std_process(ablkreq, status); - return 0; + return mv_cesa_dma_process(basereq, status); } static void mv_cesa_ablkcipher_step(struct crypto_async_request *req) @@ -157,8 +151,8 @@ static void mv_cesa_ablkcipher_step(struct crypto_async_request *req) struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - if (creq->req.base.type == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->req.dma); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_dma_step(&creq->base); else mv_cesa_ablkcipher_std_step(ablkreq); } @@ -167,22 +161,19 @@ static inline void mv_cesa_ablkcipher_dma_prepare(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_tdma_req *dreq = &creq->req.dma; + struct mv_cesa_req *basereq = &creq->base; - mv_cesa_dma_prepare(dreq, dreq->base.engine); + mv_cesa_dma_prepare(basereq, basereq->engine); } static inline void mv_cesa_ablkcipher_std_prepare(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; sreq->size = 0; sreq->offset = 0; - mv_cesa_adjust_op(engine, &sreq->op); - memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); } static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req, @@ -190,9 +181,9 @@ static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req, { struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - creq->req.base.engine = engine; + creq->base.engine = engine; - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ablkcipher_dma_prepare(ablkreq); else mv_cesa_ablkcipher_std_prepare(ablkreq); @@ -206,11 +197,34 @@ mv_cesa_ablkcipher_req_cleanup(struct crypto_async_request *req) mv_cesa_ablkcipher_cleanup(ablkreq); } +static void +mv_cesa_ablkcipher_complete(struct crypto_async_request *req) +{ + struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); + struct mv_cesa_engine *engine = creq->base.engine; + unsigned int ivsize; + + atomic_sub(ablkreq->nbytes, &engine->load); + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) { + struct mv_cesa_req *basereq; + + basereq = &creq->base; + memcpy(ablkreq->info, basereq->chain.last->data, ivsize); + } else { + memcpy_fromio(ablkreq->info, + engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, + ivsize); + } +} + static const struct mv_cesa_req_ops mv_cesa_ablkcipher_req_ops = { .step = mv_cesa_ablkcipher_step, .process = mv_cesa_ablkcipher_process, - .prepare = mv_cesa_ablkcipher_prepare, .cleanup = mv_cesa_ablkcipher_req_cleanup, + .complete = mv_cesa_ablkcipher_complete, }; static int mv_cesa_ablkcipher_cra_init(struct crypto_tfm *tfm) @@ -295,15 +309,14 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; - struct mv_cesa_tdma_req *dreq = &creq->req.dma; + struct mv_cesa_req *basereq = &creq->base; struct mv_cesa_ablkcipher_dma_iter iter; - struct mv_cesa_tdma_chain chain; bool skip_ctx = false; int ret; + unsigned int ivsize; - dreq->base.type = CESA_DMA_REQ; - dreq->chain.first = NULL; - dreq->chain.last = NULL; + basereq->chain.first = NULL; + basereq->chain.last = NULL; if (req->src != req->dst) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, @@ -324,13 +337,13 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, return -ENOMEM; } - mv_cesa_tdma_desc_iter_init(&chain); + mv_cesa_tdma_desc_iter_init(&basereq->chain); mv_cesa_ablkcipher_req_iter_init(&iter, req); do { struct mv_cesa_op_ctx *op; - op = mv_cesa_dma_add_op(&chain, op_templ, skip_ctx, flags); + op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx, flags); if (IS_ERR(op)) { ret = PTR_ERR(op); goto err_free_tdma; @@ -340,30 +353,38 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, mv_cesa_set_crypt_op_len(op, iter.base.op_len); /* Add input transfers */ - ret = mv_cesa_dma_add_op_transfers(&chain, &iter.base, + ret = mv_cesa_dma_add_op_transfers(&basereq->chain, &iter.base, &iter.src, flags); if (ret) goto err_free_tdma; /* Add dummy desc to launch the crypto operation */ - ret = mv_cesa_dma_add_dummy_launch(&chain, flags); + ret = mv_cesa_dma_add_dummy_launch(&basereq->chain, flags); if (ret) goto err_free_tdma; /* Add output transfers */ - ret = mv_cesa_dma_add_op_transfers(&chain, &iter.base, + ret = mv_cesa_dma_add_op_transfers(&basereq->chain, &iter.base, &iter.dst, flags); if (ret) goto err_free_tdma; } while (mv_cesa_ablkcipher_req_iter_next_op(&iter)); - dreq->chain = chain; + /* Add output data for IV */ + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req)); + ret = mv_cesa_dma_add_iv_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET, + ivsize, CESA_TDMA_SRC_IN_SRAM, flags); + + if (ret) + goto err_free_tdma; + + basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ; return 0; err_free_tdma: - mv_cesa_dma_cleanup(dreq); + mv_cesa_dma_cleanup(basereq); if (req->dst != req->src) dma_unmap_sg(cesa_dev->dev, req->dst, creq->dst_nents, DMA_FROM_DEVICE); @@ -380,11 +401,13 @@ mv_cesa_ablkcipher_std_req_init(struct ablkcipher_request *req, const struct mv_cesa_op_ctx *op_templ) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_req *basereq = &creq->base; - sreq->base.type = CESA_STD_REQ; sreq->op = *op_templ; sreq->skip_ctx = false; + basereq->chain.first = NULL; + basereq->chain.last = NULL; return 0; } @@ -414,7 +437,6 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_OP_CRYPT_ONLY, CESA_SA_DESC_CFG_OP_MSK); - /* TODO: add a threshold for DMA usage */ if (cesa_dev->caps->has_tdma) ret = mv_cesa_ablkcipher_dma_req_init(req, tmpl); else @@ -423,28 +445,41 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, return ret; } -static int mv_cesa_des_op(struct ablkcipher_request *req, - struct mv_cesa_op_ctx *tmpl) +static int mv_cesa_ablkcipher_queue_req(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) { - struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int ret; - - mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES, - CESA_SA_DESC_CFG_CRYPTM_MSK); - - memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE); + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + struct mv_cesa_engine *engine; ret = mv_cesa_ablkcipher_req_init(req, tmpl); if (ret) return ret; - ret = mv_cesa_queue_req(&req->base); + engine = mv_cesa_select_engine(req->nbytes); + mv_cesa_ablkcipher_prepare(&req->base, engine); + + ret = mv_cesa_queue_req(&req->base, &creq->base); + if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ablkcipher_cleanup(req); return ret; } +static int mv_cesa_des_op(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES, + CESA_SA_DESC_CFG_CRYPTM_MSK); + + memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE); + + return mv_cesa_ablkcipher_queue_req(req, tmpl); +} + static int mv_cesa_ecb_des_encrypt(struct ablkcipher_request *req) { struct mv_cesa_op_ctx tmpl; @@ -547,22 +582,13 @@ static int mv_cesa_des3_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - int ret; mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_3DES, CESA_SA_DESC_CFG_CRYPTM_MSK); memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES3_EDE_KEY_SIZE); - ret = mv_cesa_ablkcipher_req_init(req, tmpl); - if (ret) - return ret; - - ret = mv_cesa_queue_req(&req->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ablkcipher_cleanup(req); - - return ret; + return mv_cesa_ablkcipher_queue_req(req, tmpl); } static int mv_cesa_ecb_des3_ede_encrypt(struct ablkcipher_request *req) @@ -673,7 +699,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - int ret, i; + int i; u32 *key; u32 cfg; @@ -696,15 +722,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req, CESA_SA_DESC_CFG_CRYPTM_MSK | CESA_SA_DESC_CFG_AES_LEN_MSK); - ret = mv_cesa_ablkcipher_req_init(req, tmpl); - if (ret) - return ret; - - ret = mv_cesa_queue_req(&req->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ablkcipher_cleanup(req); - - return ret; + return mv_cesa_ablkcipher_queue_req(req, tmpl); } static int mv_cesa_ecb_aes_encrypt(struct ablkcipher_request *req) diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 7a5058da9..82e0f4e6e 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -103,14 +103,14 @@ static inline void mv_cesa_ahash_dma_cleanup(struct ahash_request *req) dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); mv_cesa_ahash_dma_free_cache(&creq->req.dma); - mv_cesa_dma_cleanup(&creq->req.dma.base); + mv_cesa_dma_cleanup(&creq->base); } static inline void mv_cesa_ahash_cleanup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_cleanup(req); } @@ -118,7 +118,7 @@ static void mv_cesa_ahash_last_cleanup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_last_cleanup(req); } @@ -157,11 +157,23 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_engine *engine = creq->base.engine; struct mv_cesa_op_ctx *op; unsigned int new_cache_ptr = 0; u32 frag_mode; size_t len; + unsigned int digsize; + int i; + + mv_cesa_adjust_op(engine, &creq->op_tmpl); + memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); + + digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); + for (i = 0; i < digsize / 4; i++) + writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); + + mv_cesa_adjust_op(engine, &creq->op_tmpl); + memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); if (creq->cache_ptr) memcpy_toio(engine->sram + CESA_SA_DATA_SRAM_OFFSET, @@ -237,6 +249,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -254,20 +268,17 @@ static int mv_cesa_ahash_std_process(struct ahash_request *req, u32 status) static inline void mv_cesa_ahash_dma_prepare(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - struct mv_cesa_tdma_req *dreq = &creq->req.dma.base; + struct mv_cesa_req *basereq = &creq->base; - mv_cesa_dma_prepare(dreq, dreq->base.engine); + mv_cesa_dma_prepare(basereq, basereq->engine); } static void mv_cesa_ahash_std_prepare(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; sreq->offset = 0; - mv_cesa_adjust_op(engine, &creq->op_tmpl); - memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); } static void mv_cesa_ahash_step(struct crypto_async_request *req) @@ -275,8 +286,8 @@ static void mv_cesa_ahash_step(struct crypto_async_request *req) struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - if (creq->req.base.type == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->req.dma.base); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_dma_step(&creq->base); else mv_cesa_ahash_std_step(ahashreq); } @@ -285,28 +296,25 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - struct mv_cesa_engine *engine = creq->req.base.engine; - unsigned int digsize; - int ret, i; - if (creq->req.base.type == CESA_DMA_REQ) - ret = mv_cesa_dma_process(&creq->req.dma.base, status); - else - ret = mv_cesa_ahash_std_process(ahashreq, status); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + return mv_cesa_dma_process(&creq->base, status); - if (ret == -EINPROGRESS) - return ret; + return mv_cesa_ahash_std_process(ahashreq, status); +} + +static void mv_cesa_ahash_complete(struct crypto_async_request *req) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + struct mv_cesa_engine *engine = creq->base.engine; + unsigned int digsize; + int i; digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); for (i = 0; i < digsize / 4; i++) creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i)); - if (creq->cache_ptr) - sg_pcopy_to_buffer(ahashreq->src, creq->src_nents, - creq->cache, - creq->cache_ptr, - ahashreq->nbytes - creq->cache_ptr); - if (creq->last_req) { /* * Hardware's MD5 digest is in little endian format, but @@ -325,7 +333,7 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) } } - return ret; + atomic_sub(ahashreq->nbytes, &engine->load); } static void mv_cesa_ahash_prepare(struct crypto_async_request *req, @@ -333,19 +341,13 @@ static void mv_cesa_ahash_prepare(struct crypto_async_request *req, { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - unsigned int digsize; - int i; - creq->req.base.engine = engine; + creq->base.engine = engine; - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_prepare(ahashreq); else mv_cesa_ahash_std_prepare(ahashreq); - - digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); - for (i = 0; i < digsize / 4; i++) - writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); } static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req) @@ -357,13 +359,19 @@ static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req) mv_cesa_ahash_last_cleanup(ahashreq); mv_cesa_ahash_cleanup(ahashreq); + + if (creq->cache_ptr) + sg_pcopy_to_buffer(ahashreq->src, creq->src_nents, + creq->cache, + creq->cache_ptr, + ahashreq->nbytes - creq->cache_ptr); } static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = { .step = mv_cesa_ahash_step, .process = mv_cesa_ahash_process, - .prepare = mv_cesa_ahash_prepare, .cleanup = mv_cesa_ahash_req_cleanup, + .complete = mv_cesa_ahash_complete, }; static int mv_cesa_ahash_init(struct ahash_request *req, @@ -553,15 +561,14 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; - struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma; - struct mv_cesa_tdma_req *dreq = &ahashdreq->base; + struct mv_cesa_req *basereq = &creq->base; struct mv_cesa_ahash_dma_iter iter; struct mv_cesa_op_ctx *op = NULL; unsigned int frag_len; int ret; - dreq->chain.first = NULL; - dreq->chain.last = NULL; + basereq->chain.first = NULL; + basereq->chain.last = NULL; if (creq->src_nents) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, @@ -572,14 +579,14 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) } } - mv_cesa_tdma_desc_iter_init(&dreq->chain); + mv_cesa_tdma_desc_iter_init(&basereq->chain); mv_cesa_ahash_req_iter_init(&iter, req); /* * Add the cache (left-over data from a previous block) first. * This will never overflow the SRAM size. */ - ret = mv_cesa_ahash_dma_add_cache(&dreq->chain, &iter, creq, flags); + ret = mv_cesa_ahash_dma_add_cache(&basereq->chain, &iter, creq, flags); if (ret) goto err_free_tdma; @@ -590,7 +597,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) * data. We intentionally do not add the final op block. */ while (true) { - ret = mv_cesa_dma_add_op_transfers(&dreq->chain, + ret = mv_cesa_dma_add_op_transfers(&basereq->chain, &iter.base, &iter.src, flags); if (ret) @@ -601,7 +608,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (!mv_cesa_ahash_req_iter_next_op(&iter)) break; - op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl, + op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, frag_len, flags); if (IS_ERR(op)) { ret = PTR_ERR(op); @@ -619,10 +626,10 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) * operation, which depends whether this is the final request. */ if (creq->last_req) - op = mv_cesa_ahash_dma_last_req(&dreq->chain, &iter, creq, + op = mv_cesa_ahash_dma_last_req(&basereq->chain, &iter, creq, frag_len, flags); else if (frag_len) - op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl, + op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, frag_len, flags); if (IS_ERR(op)) { @@ -632,7 +639,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (op) { /* Add dummy desc to wait for crypto operation end */ - ret = mv_cesa_dma_add_dummy_end(&dreq->chain, flags); + ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags); if (ret) goto err_free_tdma; } @@ -643,10 +650,13 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) else creq->cache_ptr = 0; + basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ | + CESA_TDMA_BREAK_CHAIN); + return 0; err_free_tdma: - mv_cesa_dma_cleanup(dreq); + mv_cesa_dma_cleanup(basereq); dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); err: @@ -660,11 +670,6 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); int ret; - if (cesa_dev->caps->has_tdma) - creq->req.base.type = CESA_DMA_REQ; - else - creq->req.base.type = CESA_STD_REQ; - creq->src_nents = sg_nents_for_len(req->src, req->nbytes); if (creq->src_nents < 0) { dev_err(cesa_dev->dev, "Invalid number of src SG"); @@ -678,19 +683,19 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) if (*cached) return 0; - if (creq->req.base.type == CESA_DMA_REQ) + if (cesa_dev->caps->has_tdma) ret = mv_cesa_ahash_dma_req_init(req); return ret; } -static int mv_cesa_ahash_update(struct ahash_request *req) +static int mv_cesa_ahash_queue_req(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_engine *engine; bool cached = false; int ret; - creq->len += req->nbytes; ret = mv_cesa_ahash_req_init(req, &cached); if (ret) return ret; @@ -698,61 +703,48 @@ static int mv_cesa_ahash_update(struct ahash_request *req) if (cached) return 0; - ret = mv_cesa_queue_req(&req->base); + engine = mv_cesa_select_engine(req->nbytes); + mv_cesa_ahash_prepare(&req->base, engine); + + ret = mv_cesa_queue_req(&req->base, &creq->base); + if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ahash_cleanup(req); return ret; } +static int mv_cesa_ahash_update(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + creq->len += req->nbytes; + + return mv_cesa_ahash_queue_req(req); +} + static int mv_cesa_ahash_final(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; - bool cached = false; - int ret; mv_cesa_set_mac_op_total_len(tmpl, creq->len); creq->last_req = true; req->nbytes = 0; - ret = mv_cesa_ahash_req_init(req, &cached); - if (ret) - return ret; - - if (cached) - return 0; - - ret = mv_cesa_queue_req(&req->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ahash_cleanup(req); - - return ret; + return mv_cesa_ahash_queue_req(req); } static int mv_cesa_ahash_finup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; - bool cached = false; - int ret; creq->len += req->nbytes; mv_cesa_set_mac_op_total_len(tmpl, creq->len); creq->last_req = true; - ret = mv_cesa_ahash_req_init(req, &cached); - if (ret) - return ret; - - if (cached) - return 0; - - ret = mv_cesa_queue_req(&req->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ahash_cleanup(req); - - return ret; + return mv_cesa_ahash_queue_req(req); } static int mv_cesa_ahash_export(struct ahash_request *req, void *hash, diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index 0ad8f1ecf..86a065bcc 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -37,9 +37,9 @@ bool mv_cesa_req_dma_iter_next_transfer(struct mv_cesa_dma_iter *iter, return true; } -void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq) +void mv_cesa_dma_step(struct mv_cesa_req *dreq) { - struct mv_cesa_engine *engine = dreq->base.engine; + struct mv_cesa_engine *engine = dreq->engine; writel_relaxed(0, engine->regs + CESA_SA_CFG); @@ -53,19 +53,25 @@ void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq) engine->regs + CESA_SA_CFG); writel_relaxed(dreq->chain.first->cur_dma, engine->regs + CESA_TDMA_NEXT_ADDR); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } -void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) +void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq) { struct mv_cesa_tdma_desc *tdma; for (tdma = dreq->chain.first; tdma;) { struct mv_cesa_tdma_desc *old_tdma = tdma; + u32 type = tdma->flags & CESA_TDMA_TYPE_MSK; - if (tdma->flags & CESA_TDMA_OP) + if (type == CESA_TDMA_OP) dma_pool_free(cesa_dev->dma->op_pool, tdma->op, le32_to_cpu(tdma->src)); + else if (type == CESA_TDMA_IV) + dma_pool_free(cesa_dev->dma->iv_pool, tdma->data, + le32_to_cpu(tdma->dst)); tdma = tdma->next; dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma, @@ -76,7 +82,7 @@ void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) dreq->chain.last = NULL; } -void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, +void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, struct mv_cesa_engine *engine) { struct mv_cesa_tdma_desc *tdma; @@ -88,11 +94,97 @@ void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, if (tdma->flags & CESA_TDMA_SRC_IN_SRAM) tdma->src = cpu_to_le32(tdma->src + engine->sram_dma); - if (tdma->flags & CESA_TDMA_OP) + if ((tdma->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_OP) mv_cesa_adjust_op(engine, tdma->op); } } +void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, + struct mv_cesa_req *dreq) +{ + if (engine->chain.first == NULL && engine->chain.last == NULL) { + engine->chain.first = dreq->chain.first; + engine->chain.last = dreq->chain.last; + } else { + struct mv_cesa_tdma_desc *last; + + last = engine->chain.last; + last->next = dreq->chain.first; + engine->chain.last = dreq->chain.last; + + if (!(last->flags & CESA_TDMA_BREAK_CHAIN)) + last->next_dma = dreq->chain.first->cur_dma; + } +} + +int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) +{ + struct crypto_async_request *req = NULL; + struct mv_cesa_tdma_desc *tdma = NULL, *next = NULL; + dma_addr_t tdma_cur; + int res = 0; + + tdma_cur = readl(engine->regs + CESA_TDMA_CUR); + + for (tdma = engine->chain.first; tdma; tdma = next) { + spin_lock_bh(&engine->lock); + next = tdma->next; + spin_unlock_bh(&engine->lock); + + if (tdma->flags & CESA_TDMA_END_OF_REQ) { + struct crypto_async_request *backlog = NULL; + struct mv_cesa_ctx *ctx; + u32 current_status; + + spin_lock_bh(&engine->lock); + /* + * if req is NULL, this means we're processing the + * request in engine->req. + */ + if (!req) + req = engine->req; + else + req = mv_cesa_dequeue_req_locked(engine, + &backlog); + + /* Re-chaining to the next request */ + engine->chain.first = tdma->next; + tdma->next = NULL; + + /* If this is the last request, clear the chain */ + if (engine->chain.first == NULL) + engine->chain.last = NULL; + spin_unlock_bh(&engine->lock); + + ctx = crypto_tfm_ctx(req->tfm); + current_status = (tdma->cur_dma == tdma_cur) ? + status : CESA_SA_INT_ACC0_IDMA_DONE; + res = ctx->ops->process(req, current_status); + ctx->ops->complete(req); + + if (res == 0) + mv_cesa_engine_enqueue_complete_request(engine, + req); + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + } + + if (res || tdma->cur_dma == tdma_cur) + break; + } + + /* Save the last request in error to engine->req, so that the core + * knows which request was fautly */ + if (res) { + spin_lock_bh(&engine->lock); + engine->req = req; + spin_unlock_bh(&engine->lock); + } + + return res; +} + static struct mv_cesa_tdma_desc * mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags) { @@ -117,6 +209,32 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags) return new_tdma; } +int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, + u32 size, u32 flags, gfp_t gfp_flags) +{ + + struct mv_cesa_tdma_desc *tdma; + u8 *iv; + dma_addr_t dma_handle; + + tdma = mv_cesa_dma_add_desc(chain, gfp_flags); + if (IS_ERR(tdma)) + return PTR_ERR(tdma); + + iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle); + if (!iv) + return -ENOMEM; + + tdma->byte_cnt = cpu_to_le32(size | BIT(31)); + tdma->src = src; + tdma->dst = cpu_to_le32(dma_handle); + tdma->data = iv; + + flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM); + tdma->flags = flags | CESA_TDMA_IV; + return 0; +} + struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, const struct mv_cesa_op_ctx *op_templ, bool skip_ctx, diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 59ed54e46..625ee50fd 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -11,7 +11,6 @@ * http://www.gnu.org/copyleft/gpl.html */ -#include <linux/crypto.h> #include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -25,6 +24,7 @@ #include <crypto/aes.h> #include <crypto/sha.h> #include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE @@ -84,7 +84,7 @@ struct dcp_async_ctx { unsigned int hot:1; /* Crypto-specific context */ - struct crypto_ablkcipher *fallback; + struct crypto_skcipher *fallback; unsigned int key_len; uint8_t key[AES_KEYSIZE_128]; }; @@ -374,20 +374,22 @@ static int dcp_chan_thread_aes(void *data) static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); - struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx( - crypto_ablkcipher_reqtfm(req)); + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx(tfm); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); int ret; - ablkcipher_request_set_tfm(req, ctx->fallback); + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); if (enc) - ret = crypto_ablkcipher_encrypt(req); + ret = crypto_skcipher_encrypt(subreq); else - ret = crypto_ablkcipher_decrypt(req); + ret = crypto_skcipher_decrypt(subreq); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + skcipher_request_zero(subreq); return ret; } @@ -453,28 +455,22 @@ static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, return 0; } - /* Check if the key size is supported by kernel at all. */ - if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256) { - tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - /* * If the requested AES key size is not supported by the hardware, * but is supported by in-kernel software implementation, we use * software fallback. */ - actx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - actx->fallback->base.crt_flags |= - tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK; + crypto_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(actx->fallback, + tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); - ret = crypto_ablkcipher_setkey(actx->fallback, key, len); + ret = crypto_skcipher_setkey(actx->fallback, key, len); if (!ret) return 0; tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->base.crt_flags |= - actx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK; + tfm->base.crt_flags |= crypto_skcipher_get_flags(actx->fallback) & + CRYPTO_TFM_RES_MASK; return ret; } @@ -484,9 +480,9 @@ static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm) const char *name = crypto_tfm_alg_name(tfm); const uint32_t flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK; struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); - struct crypto_ablkcipher *blk; + struct crypto_skcipher *blk; - blk = crypto_alloc_ablkcipher(name, 0, flags); + blk = crypto_alloc_skcipher(name, 0, flags); if (IS_ERR(blk)) return PTR_ERR(blk); @@ -499,8 +495,7 @@ static void mxs_dcp_aes_fallback_exit(struct crypto_tfm *tfm) { struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); - crypto_free_ablkcipher(actx->fallback); - actx->fallback = NULL; + crypto_free_skcipher(actx->fallback); } /* diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index ce174d3b8..4ab53a604 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -528,8 +528,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) omap_aes_dma_stop(dd); - dmaengine_terminate_all(dd->dma_lch_in); - dmaengine_terminate_all(dd->dma_lch_out); return 0; } @@ -580,10 +578,12 @@ static int omap_aes_copy_sgs(struct omap_aes_dev *dd) sg_init_table(&dd->in_sgl, 1); sg_set_buf(&dd->in_sgl, buf_in, total); dd->in_sg = &dd->in_sgl; + dd->in_sg_len = 1; sg_init_table(&dd->out_sgl, 1); sg_set_buf(&dd->out_sgl, buf_out, total); dd->out_sg = &dd->out_sgl; + dd->out_sg_len = 1; return 0; } @@ -604,7 +604,6 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, crypto_ablkcipher_reqtfm(req)); struct omap_aes_dev *dd = omap_aes_find_dev(ctx); struct omap_aes_reqctx *rctx; - int len; if (!dd) return -ENODEV; @@ -616,6 +615,14 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, dd->in_sg = req->src; dd->out_sg = req->dst; + dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total); + if (dd->in_sg_len < 0) + return dd->in_sg_len; + + dd->out_sg_len = sg_nents_for_len(dd->out_sg, dd->total); + if (dd->out_sg_len < 0) + return dd->out_sg_len; + if (omap_aes_check_aligned(dd->in_sg, dd->total) || omap_aes_check_aligned(dd->out_sg, dd->total)) { if (omap_aes_copy_sgs(dd)) @@ -625,11 +632,6 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, dd->sgs_copied = 0; } - len = ALIGN(dd->total, AES_BLOCK_SIZE); - dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, len); - dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, len); - BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0); - rctx = ablkcipher_request_ctx(req); ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); rctx->mode &= FLAGS_MODE_MASK; @@ -1185,17 +1187,19 @@ static int omap_aes_probe(struct platform_device *pdev) spin_unlock(&list_lock); for (i = 0; i < dd->pdata->algs_info_size; i++) { - for (j = 0; j < dd->pdata->algs_info[i].size; j++) { - algp = &dd->pdata->algs_info[i].algs_list[j]; + if (!dd->pdata->algs_info[i].registered) { + for (j = 0; j < dd->pdata->algs_info[i].size; j++) { + algp = &dd->pdata->algs_info[i].algs_list[j]; - pr_debug("reg alg: %s\n", algp->cra_name); - INIT_LIST_HEAD(&algp->cra_list); + pr_debug("reg alg: %s\n", algp->cra_name); + INIT_LIST_HEAD(&algp->cra_list); - err = crypto_register_alg(algp); - if (err) - goto err_algs; + err = crypto_register_alg(algp); + if (err) + goto err_algs; - dd->pdata->algs_info[i].registered++; + dd->pdata->algs_info[i].registered++; + } } } diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 3eedb0311..5691434ff 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -560,10 +560,12 @@ static int omap_des_copy_sgs(struct omap_des_dev *dd) sg_init_table(&dd->in_sgl, 1); sg_set_buf(&dd->in_sgl, buf_in, dd->total); dd->in_sg = &dd->in_sgl; + dd->in_sg_len = 1; sg_init_table(&dd->out_sgl, 1); sg_set_buf(&dd->out_sgl, buf_out, dd->total); dd->out_sg = &dd->out_sgl; + dd->out_sg_len = 1; return 0; } @@ -595,6 +597,14 @@ static int omap_des_prepare_req(struct crypto_engine *engine, dd->in_sg = req->src; dd->out_sg = req->dst; + dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total); + if (dd->in_sg_len < 0) + return dd->in_sg_len; + + dd->out_sg_len = sg_nents_for_len(dd->out_sg, dd->total); + if (dd->out_sg_len < 0) + return dd->out_sg_len; + if (omap_des_copy_needed(dd->in_sg) || omap_des_copy_needed(dd->out_sg)) { if (omap_des_copy_sgs(dd)) @@ -604,10 +614,6 @@ static int omap_des_prepare_req(struct crypto_engine *engine, dd->sgs_copied = 0; } - dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, dd->total); - dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, dd->total); - BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0); - rctx = ablkcipher_request_ctx(req); ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); rctx->mode &= FLAGS_MODE_MASK; diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 63464e86f..7fe4eef12 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -100,6 +100,8 @@ #define DEFAULT_TIMEOUT_INTERVAL HZ +#define DEFAULT_AUTOSUSPEND_DELAY 1000 + /* mostly device flags */ #define FLAGS_BUSY 0 #define FLAGS_FINAL 1 @@ -173,7 +175,7 @@ struct omap_sham_ctx { struct omap_sham_hmac_ctx base[0]; }; -#define OMAP_SHAM_QUEUE_LENGTH 1 +#define OMAP_SHAM_QUEUE_LENGTH 10 struct omap_sham_algs_info { struct ahash_alg *algs_list; @@ -813,7 +815,6 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - dmaengine_terminate_all(dd->dma_lch); if (ctx->flags & BIT(FLAGS_SG)) { dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); @@ -999,7 +1000,8 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) | BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY)); - pm_runtime_put(dd->dev); + pm_runtime_mark_last_busy(dd->dev); + pm_runtime_put_autosuspend(dd->dev); if (req->base.complete) req->base.complete(&req->base, err); @@ -1093,7 +1095,7 @@ static int omap_sham_update(struct ahash_request *req) ctx->offset = 0; if (ctx->flags & BIT(FLAGS_FINUP)) { - if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 9) { + if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 240) { /* * OMAP HW accel works only with buffers >= 9 * will switch to bypass in final() @@ -1149,9 +1151,13 @@ static int omap_sham_final(struct ahash_request *req) if (ctx->flags & BIT(FLAGS_ERROR)) return 0; /* uncompleted hash is not needed */ - /* OMAP HW accel works only with buffers >= 9 */ - /* HMAC is always >= 9 because ipad == block size */ - if ((ctx->digcnt + ctx->bufcnt) < 9) + /* + * OMAP HW accel works only with buffers >= 9. + * HMAC is always >= 9 because ipad == block size. + * If buffersize is less than 240, we use fallback SW encoding, + * as using DMA + HW in this case doesn't provide any benefit. + */ + if ((ctx->digcnt + ctx->bufcnt) < 240) return omap_sham_final_shash(req); else if (ctx->bufcnt) return omap_sham_enqueue(req, OP_FINAL); @@ -1328,7 +1334,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "sha1", .cra_driver_name = "omap-sha1", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1351,7 +1357,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "md5", .cra_driver_name = "omap-md5", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1375,7 +1381,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "hmac(sha1)", .cra_driver_name = "omap-hmac-sha1", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1400,7 +1406,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "hmac(md5)", .cra_driver_name = "omap-hmac-md5", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1428,7 +1434,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "sha224", .cra_driver_name = "omap-sha224", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1450,7 +1456,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "sha256", .cra_driver_name = "omap-sha256", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1473,7 +1479,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "hmac(sha224)", .cra_driver_name = "omap-hmac-sha224", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1497,7 +1503,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "hmac(sha256)", .cra_driver_name = "omap-hmac-sha256", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1523,7 +1529,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "sha384", .cra_driver_name = "omap-sha384", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1545,7 +1551,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "sha512", .cra_driver_name = "omap-sha512", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1568,7 +1574,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "hmac(sha384)", .cra_driver_name = "omap-hmac-sha384", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1592,7 +1598,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "hmac(sha512)", .cra_driver_name = "omap-hmac-sha512", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1946,6 +1952,9 @@ static int omap_sham_probe(struct platform_device *pdev) dd->flags |= dd->pdata->flags; + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY); + pm_runtime_enable(dev); pm_runtime_irq_safe(dev); diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 3b1c7ecf0..475760988 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -171,7 +171,7 @@ struct spacc_ablk_ctx { * The fallback cipher. If the operation can't be done in hardware, * fallback to a software version. */ - struct crypto_ablkcipher *sw_cipher; + struct crypto_skcipher *sw_cipher; }; /* AEAD cipher context. */ @@ -789,33 +789,35 @@ static int spacc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, * request for any other size (192 bits) then we need to do a software * fallback. */ - if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256 && - ctx->sw_cipher) { + if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256) { + if (!ctx->sw_cipher) + return -EINVAL; + /* * Set the fallback transform to use the same request flags as * the hardware transform. */ - ctx->sw_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - ctx->sw_cipher->base.crt_flags |= - cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK; + crypto_skcipher_clear_flags(ctx->sw_cipher, + CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->sw_cipher, + cipher->base.crt_flags & + CRYPTO_TFM_REQ_MASK); + + err = crypto_skcipher_setkey(ctx->sw_cipher, key, len); + + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= + crypto_skcipher_get_flags(ctx->sw_cipher) & + CRYPTO_TFM_RES_MASK; - err = crypto_ablkcipher_setkey(ctx->sw_cipher, key, len); if (err) goto sw_setkey_failed; - } else if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256 && - !ctx->sw_cipher) - err = -EINVAL; + } memcpy(ctx->key, key, len); ctx->key_len = len; sw_setkey_failed: - if (err && ctx->sw_cipher) { - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= - ctx->sw_cipher->base.crt_flags & CRYPTO_TFM_RES_MASK; - } - return err; } @@ -910,20 +912,21 @@ static int spacc_ablk_do_fallback(struct ablkcipher_request *req, struct crypto_tfm *old_tfm = crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->sw_cipher); int err; - if (!ctx->sw_cipher) - return -EINVAL; - /* * Change the request to use the software fallback transform, and once * the ciphering has completed, put the old transform back into the * request. */ - ablkcipher_request_set_tfm(req, ctx->sw_cipher); - err = is_encrypt ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(old_tfm)); + skcipher_request_set_tfm(subreq, ctx->sw_cipher); + skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = is_encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -1015,12 +1018,13 @@ static int spacc_ablk_cra_init(struct crypto_tfm *tfm) ctx->generic.flags = spacc_alg->type; ctx->generic.engine = engine; if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { - ctx->sw_cipher = crypto_alloc_ablkcipher(alg->cra_name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + ctx->sw_cipher = crypto_alloc_skcipher( + alg->cra_name, 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->sw_cipher)) { dev_warn(engine->dev, "failed to allocate fallback for %s\n", alg->cra_name); - ctx->sw_cipher = NULL; + return PTR_ERR(ctx->sw_cipher); } } ctx->generic.key_offs = spacc_alg->key_offs; @@ -1035,9 +1039,7 @@ static void spacc_ablk_cra_exit(struct crypto_tfm *tfm) { struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->sw_cipher) - crypto_free_ablkcipher(ctx->sw_cipher); - ctx->sw_cipher = NULL; + crypto_free_skcipher(ctx->sw_cipher); } static int spacc_ablk_encrypt(struct ablkcipher_request *req) diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig index 85b44e577..ce3cae40f 100644 --- a/drivers/crypto/qat/Kconfig +++ b/drivers/crypto/qat/Kconfig @@ -4,12 +4,13 @@ config CRYPTO_DEV_QAT select CRYPTO_AUTHENC select CRYPTO_BLKCIPHER select CRYPTO_AKCIPHER + select CRYPTO_DH select CRYPTO_HMAC + select CRYPTO_RSA select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 select FW_LOADER - select ASN1 config CRYPTO_DEV_QAT_DH895xCC tristate "Support for Intel(R) DH895xCC" diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index c5bd5a9ab..6bc68bc00 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -229,6 +229,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_flr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index 879e04cae..618cec360 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -239,6 +239,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_flr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index 5fc3dbb9a..92fb6ffdc 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -1,12 +1,3 @@ -$(obj)/qat_rsapubkey-asn1.o: $(obj)/qat_rsapubkey-asn1.c \ - $(obj)/qat_rsapubkey-asn1.h -$(obj)/qat_rsaprivkey-asn1.o: $(obj)/qat_rsaprivkey-asn1.c \ - $(obj)/qat_rsaprivkey-asn1.h -$(obj)/qat_asym_algs.o: $(obj)/qat_rsapubkey-asn1.h $(obj)/qat_rsaprivkey-asn1.h - -clean-files += qat_rsapubkey-asn1.c qat_rsapubkey-asn1.h -clean-files += qat_rsaprivkey-asn1.c qat_rsaprivkey-asn1.h - obj-$(CONFIG_CRYPTO_DEV_QAT) += intel_qat.o intel_qat-objs := adf_cfg.o \ adf_isr.o \ @@ -20,8 +11,6 @@ intel_qat-objs := adf_cfg.o \ adf_hw_arbiter.o \ qat_crypto.o \ qat_algs.o \ - qat_rsapubkey-asn1.o \ - qat_rsaprivkey-asn1.o \ qat_asym_algs.o \ qat_uclo.o \ qat_hal.o diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 5a07208ce..e88225365 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -176,6 +176,7 @@ struct adf_hw_device_data { void (*disable_iov)(struct adf_accel_dev *accel_dev); void (*enable_ints)(struct adf_accel_dev *accel_dev); int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev); + void (*reset_device)(struct adf_accel_dev *accel_dev); const char *fw_name; const char *fw_mmp_name; uint32_t fuses; diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index b40d9c8da..2839fccdd 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -82,18 +82,12 @@ struct adf_reset_dev_data { struct work_struct reset_work; }; -void adf_dev_restore(struct adf_accel_dev *accel_dev) +void adf_reset_sbr(struct adf_accel_dev *accel_dev) { struct pci_dev *pdev = accel_to_pci_dev(accel_dev); struct pci_dev *parent = pdev->bus->self; uint16_t bridge_ctl = 0; - if (accel_dev->is_vf) - return; - - dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", - accel_dev->accel_id); - if (!parent) parent = pdev; @@ -101,6 +95,8 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev) dev_info(&GET_DEV(accel_dev), "Transaction still in progress. Proceeding\n"); + dev_info(&GET_DEV(accel_dev), "Secondary bus reset\n"); + pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl); bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); @@ -108,8 +104,40 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev) bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); msleep(100); - pci_restore_state(pdev); - pci_save_state(pdev); +} +EXPORT_SYMBOL_GPL(adf_reset_sbr); + +void adf_reset_flr(struct adf_accel_dev *accel_dev) +{ + struct pci_dev *pdev = accel_to_pci_dev(accel_dev); + u16 control = 0; + int pos = 0; + + dev_info(&GET_DEV(accel_dev), "Function level reset\n"); + pos = pci_pcie_cap(pdev); + if (!pos) { + dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); + return; + } + pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &control); + control |= PCI_EXP_DEVCTL_BCR_FLR; + pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, control); + msleep(100); +} +EXPORT_SYMBOL_GPL(adf_reset_flr); + +void adf_dev_restore(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + struct pci_dev *pdev = accel_to_pci_dev(accel_dev); + + if (hw_device->reset_device) { + dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", + accel_dev->accel_id); + hw_device->reset_device(accel_dev); + pci_restore_state(pdev); + pci_save_state(pdev); + } } static void adf_device_reset_worker(struct work_struct *work) @@ -243,7 +271,8 @@ EXPORT_SYMBOL_GPL(adf_disable_aer); int adf_init_aer(void) { - device_reset_wq = create_workqueue("qat_device_reset_wq"); + device_reset_wq = alloc_workqueue("qat_device_reset_wq", + WQ_MEM_RECLAIM, 0); return !device_reset_wq ? -EFAULT : 0; } diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 75faa39bc..980e07475 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -141,6 +141,8 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev); int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf); void adf_disable_aer(struct adf_accel_dev *accel_dev); +void adf_reset_sbr(struct adf_accel_dev *accel_dev); +void adf_reset_flr(struct adf_accel_dev *accel_dev); void adf_dev_restore(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 4a526e2f1..9320ae1d0 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -292,7 +292,7 @@ EXPORT_SYMBOL_GPL(adf_sriov_configure); int __init adf_init_pf_wq(void) { /* Workqueue for PF2VF responses */ - pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); + pf2vf_resp_wq = alloc_workqueue("qat_pf2vf_resp_wq", WQ_MEM_RECLAIM, 0); return !pf2vf_resp_wq ? -ENOMEM : 0; } diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index aa689cabe..bf99e11a3 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -321,7 +321,7 @@ EXPORT_SYMBOL_GPL(adf_vf_isr_resource_alloc); int __init adf_init_vf_wq(void) { - adf_vf_stop_wq = create_workqueue("adf_vf_stop_wq"); + adf_vf_stop_wq = alloc_workqueue("adf_vf_stop_wq", WQ_MEM_RECLAIM, 0); return !adf_vf_stop_wq ? -EFAULT : 0; } diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 4c9deef6a..20f35df8a 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -947,13 +947,13 @@ static int qat_alg_ablkcipher_setkey(struct crypto_ablkcipher *tfm, return 0; out_free_all: - memset(ctx->dec_cd, 0, sizeof(*ctx->enc_cd)); - dma_free_coherent(dev, sizeof(*ctx->enc_cd), + memset(ctx->dec_cd, 0, sizeof(*ctx->dec_cd)); + dma_free_coherent(dev, sizeof(*ctx->dec_cd), ctx->dec_cd, ctx->dec_cd_paddr); ctx->dec_cd = NULL; out_free_enc: - memset(ctx->enc_cd, 0, sizeof(*ctx->dec_cd)); - dma_free_coherent(dev, sizeof(*ctx->dec_cd), + memset(ctx->enc_cd, 0, sizeof(*ctx->enc_cd)); + dma_free_coherent(dev, sizeof(*ctx->enc_cd), ctx->enc_cd, ctx->enc_cd_paddr); ctx->enc_cd = NULL; return -ENOMEM; diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 05f49d4f9..0d35dca2e 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -49,11 +49,12 @@ #include <crypto/internal/rsa.h> #include <crypto/internal/akcipher.h> #include <crypto/akcipher.h> +#include <crypto/kpp.h> +#include <crypto/internal/kpp.h> +#include <crypto/dh.h> #include <linux/dma-mapping.h> #include <linux/fips.h> #include <crypto/scatterwalk.h> -#include "qat_rsapubkey-asn1.h" -#include "qat_rsaprivkey-asn1.h" #include "icp_qat_fw_pke.h" #include "adf_accel_devices.h" #include "adf_transport.h" @@ -75,6 +76,14 @@ struct qat_rsa_input_params { dma_addr_t d; dma_addr_t n; } dec; + struct { + dma_addr_t c; + dma_addr_t p; + dma_addr_t q; + dma_addr_t dp; + dma_addr_t dq; + dma_addr_t qinv; + } dec_crt; u64 in_tab[8]; }; } __packed __aligned(64); @@ -95,71 +104,480 @@ struct qat_rsa_ctx { char *n; char *e; char *d; + char *p; + char *q; + char *dp; + char *dq; + char *qinv; dma_addr_t dma_n; dma_addr_t dma_e; dma_addr_t dma_d; + dma_addr_t dma_p; + dma_addr_t dma_q; + dma_addr_t dma_dp; + dma_addr_t dma_dq; + dma_addr_t dma_qinv; unsigned int key_sz; + bool crt_mode; + struct qat_crypto_instance *inst; +} __packed __aligned(64); + +struct qat_dh_input_params { + union { + struct { + dma_addr_t b; + dma_addr_t xa; + dma_addr_t p; + } in; + struct { + dma_addr_t xa; + dma_addr_t p; + } in_g2; + u64 in_tab[8]; + }; +} __packed __aligned(64); + +struct qat_dh_output_params { + union { + dma_addr_t r; + u64 out_tab[8]; + }; +} __packed __aligned(64); + +struct qat_dh_ctx { + char *g; + char *xa; + char *p; + dma_addr_t dma_g; + dma_addr_t dma_xa; + dma_addr_t dma_p; + unsigned int p_size; + bool g2; struct qat_crypto_instance *inst; } __packed __aligned(64); -struct qat_rsa_request { - struct qat_rsa_input_params in; - struct qat_rsa_output_params out; +struct qat_asym_request { + union { + struct qat_rsa_input_params rsa; + struct qat_dh_input_params dh; + } in; + union { + struct qat_rsa_output_params rsa; + struct qat_dh_output_params dh; + } out; dma_addr_t phy_in; dma_addr_t phy_out; char *src_align; char *dst_align; struct icp_qat_fw_pke_request req; - struct qat_rsa_ctx *ctx; + union { + struct qat_rsa_ctx *rsa; + struct qat_dh_ctx *dh; + } ctx; + union { + struct akcipher_request *rsa; + struct kpp_request *dh; + } areq; int err; + void (*cb)(struct icp_qat_fw_pke_resp *resp); } __aligned(64); -static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) +static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp) { - struct akcipher_request *areq = (void *)(__force long)resp->opaque; - struct qat_rsa_request *req = PTR_ALIGN(akcipher_request_ctx(areq), 64); - struct device *dev = &GET_DEV(req->ctx->inst->accel_dev); + struct qat_asym_request *req = (void *)(__force long)resp->opaque; + struct kpp_request *areq = req->areq.dh; + struct device *dev = &GET_DEV(req->ctx.dh->inst->accel_dev); int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET( resp->pke_resp_hdr.comn_resp_flags); err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; - if (req->src_align) - dma_free_coherent(dev, req->ctx->key_sz, req->src_align, - req->in.enc.m); - else - dma_unmap_single(dev, req->in.enc.m, req->ctx->key_sz, - DMA_TO_DEVICE); + if (areq->src) { + if (req->src_align) + dma_free_coherent(dev, req->ctx.dh->p_size, + req->src_align, req->in.dh.in.b); + else + dma_unmap_single(dev, req->in.dh.in.b, + req->ctx.dh->p_size, DMA_TO_DEVICE); + } - areq->dst_len = req->ctx->key_sz; + areq->dst_len = req->ctx.dh->p_size; if (req->dst_align) { - char *ptr = req->dst_align; + scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, + areq->dst_len, 1); - while (!(*ptr) && areq->dst_len) { - areq->dst_len--; - ptr++; - } + dma_free_coherent(dev, req->ctx.dh->p_size, req->dst_align, + req->out.dh.r); + } else { + dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size, + DMA_FROM_DEVICE); + } - if (areq->dst_len != req->ctx->key_sz) - memmove(req->dst_align, ptr, areq->dst_len); + dma_unmap_single(dev, req->phy_in, sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); + dma_unmap_single(dev, req->phy_out, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); - scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, - areq->dst_len, 1); + kpp_request_complete(areq, err); +} + +#define PKE_DH_1536 0x390c1a49 +#define PKE_DH_G2_1536 0x2e0b1a3e +#define PKE_DH_2048 0x4d0c1a60 +#define PKE_DH_G2_2048 0x3e0b1a55 +#define PKE_DH_3072 0x510c1a77 +#define PKE_DH_G2_3072 0x3a0b1a6c +#define PKE_DH_4096 0x690c1a8e +#define PKE_DH_G2_4096 0x4a0b1a83 + +static unsigned long qat_dh_fn_id(unsigned int len, bool g2) +{ + unsigned int bitslen = len << 3; + + switch (bitslen) { + case 1536: + return g2 ? PKE_DH_G2_1536 : PKE_DH_1536; + case 2048: + return g2 ? PKE_DH_G2_2048 : PKE_DH_2048; + case 3072: + return g2 ? PKE_DH_G2_3072 : PKE_DH_3072; + case 4096: + return g2 ? PKE_DH_G2_4096 : PKE_DH_4096; + default: + return 0; + }; +} + +static inline struct qat_dh_ctx *qat_dh_get_params(struct crypto_kpp *tfm) +{ + return kpp_tfm_ctx(tfm); +} + +static int qat_dh_compute_value(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + struct qat_asym_request *qat_req = + PTR_ALIGN(kpp_request_ctx(req), 64); + struct icp_qat_fw_pke_request *msg = &qat_req->req; + int ret, ctr = 0; + int n_input_params = 0; + + if (unlikely(!ctx->xa)) + return -EINVAL; + + if (req->dst_len < ctx->p_size) { + req->dst_len = ctx->p_size; + return -EOVERFLOW; + } + memset(msg, '\0', sizeof(*msg)); + ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, + ICP_QAT_FW_COMN_REQ_FLAG_SET); + + msg->pke_hdr.cd_pars.func_id = qat_dh_fn_id(ctx->p_size, + !req->src && ctx->g2); + if (unlikely(!msg->pke_hdr.cd_pars.func_id)) + return -EINVAL; + + qat_req->cb = qat_dh_cb; + qat_req->ctx.dh = ctx; + qat_req->areq.dh = req; + msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; + msg->pke_hdr.comn_req_flags = + ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, + QAT_COMN_CD_FLD_TYPE_64BIT_ADR); - dma_free_coherent(dev, req->ctx->key_sz, req->dst_align, - req->out.enc.c); + /* + * If no source is provided use g as base + */ + if (req->src) { + qat_req->in.dh.in.xa = ctx->dma_xa; + qat_req->in.dh.in.p = ctx->dma_p; + n_input_params = 3; } else { - char *ptr = sg_virt(areq->dst); + if (ctx->g2) { + qat_req->in.dh.in_g2.xa = ctx->dma_xa; + qat_req->in.dh.in_g2.p = ctx->dma_p; + n_input_params = 2; + } else { + qat_req->in.dh.in.b = ctx->dma_g; + qat_req->in.dh.in.xa = ctx->dma_xa; + qat_req->in.dh.in.p = ctx->dma_p; + n_input_params = 3; + } + } - while (!(*ptr) && areq->dst_len) { - areq->dst_len--; - ptr++; + ret = -ENOMEM; + if (req->src) { + /* + * src can be of any size in valid range, but HW expects it to + * be the same as modulo p so in case it is different we need + * to allocate a new buf and copy src data. + * In other case we just need to map the user provided buffer. + * Also need to make sure that it is in contiguous buffer. + */ + if (sg_is_last(req->src) && req->src_len == ctx->p_size) { + qat_req->src_align = NULL; + qat_req->in.dh.in.b = dma_map_single(dev, + sg_virt(req->src), + req->src_len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, + qat_req->in.dh.in.b))) + return ret; + + } else { + int shift = ctx->p_size - req->src_len; + + qat_req->src_align = dma_zalloc_coherent(dev, + ctx->p_size, + &qat_req->in.dh.in.b, + GFP_KERNEL); + if (unlikely(!qat_req->src_align)) + return ret; + + scatterwalk_map_and_copy(qat_req->src_align + shift, + req->src, 0, req->src_len, 0); } + } + /* + * dst can be of any size in valid range, but HW expects it to be the + * same as modulo m so in case it is different we need to allocate a + * new buf and copy src data. + * In other case we just need to map the user provided buffer. + * Also need to make sure that it is in contiguous buffer. + */ + if (sg_is_last(req->dst) && req->dst_len == ctx->p_size) { + qat_req->dst_align = NULL; + qat_req->out.dh.r = dma_map_single(dev, sg_virt(req->dst), + req->dst_len, + DMA_FROM_DEVICE); - if (sg_virt(areq->dst) != ptr && areq->dst_len) - memmove(sg_virt(areq->dst), ptr, areq->dst_len); + if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r))) + goto unmap_src; + + } else { + qat_req->dst_align = dma_zalloc_coherent(dev, ctx->p_size, + &qat_req->out.dh.r, + GFP_KERNEL); + if (unlikely(!qat_req->dst_align)) + goto unmap_src; + } - dma_unmap_single(dev, req->out.enc.c, req->ctx->key_sz, + qat_req->in.dh.in_tab[n_input_params] = 0; + qat_req->out.dh.out_tab[1] = 0; + /* Mapping in.in.b or in.in_g2.xa is the same */ + qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh.in.b, + sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) + goto unmap_dst; + + qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh.r, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) + goto unmap_in_params; + + msg->pke_mid.src_data_addr = qat_req->phy_in; + msg->pke_mid.dest_data_addr = qat_req->phy_out; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; + msg->input_param_count = n_input_params; + msg->output_param_count = 1; + + do { + ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg); + } while (ret == -EBUSY && ctr++ < 100); + + if (!ret) + return -EINPROGRESS; + + if (!dma_mapping_error(dev, qat_req->phy_out)) + dma_unmap_single(dev, qat_req->phy_out, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); +unmap_in_params: + if (!dma_mapping_error(dev, qat_req->phy_in)) + dma_unmap_single(dev, qat_req->phy_in, + sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); +unmap_dst: + if (qat_req->dst_align) + dma_free_coherent(dev, ctx->p_size, qat_req->dst_align, + qat_req->out.dh.r); + else + if (!dma_mapping_error(dev, qat_req->out.dh.r)) + dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size, + DMA_FROM_DEVICE); +unmap_src: + if (req->src) { + if (qat_req->src_align) + dma_free_coherent(dev, ctx->p_size, qat_req->src_align, + qat_req->in.dh.in.b); + else + if (!dma_mapping_error(dev, qat_req->in.dh.in.b)) + dma_unmap_single(dev, qat_req->in.dh.in.b, + ctx->p_size, + DMA_TO_DEVICE); + } + return ret; +} + +static int qat_dh_check_params_length(unsigned int p_len) +{ + switch (p_len) { + case 1536: + case 2048: + case 3072: + case 4096: + return 0; + } + return -EINVAL; +} + +static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params) +{ + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + + if (unlikely(!params->p || !params->g)) + return -EINVAL; + + if (qat_dh_check_params_length(params->p_size << 3)) + return -EINVAL; + + ctx->p_size = params->p_size; + ctx->p = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_p, GFP_KERNEL); + if (!ctx->p) + return -ENOMEM; + memcpy(ctx->p, params->p, ctx->p_size); + + /* If g equals 2 don't copy it */ + if (params->g_size == 1 && *(char *)params->g == 0x02) { + ctx->g2 = true; + return 0; + } + + ctx->g = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_g, GFP_KERNEL); + if (!ctx->g) { + dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p); + ctx->p = NULL; + return -ENOMEM; + } + memcpy(ctx->g + (ctx->p_size - params->g_size), params->g, + params->g_size); + + return 0; +} + +static void qat_dh_clear_ctx(struct device *dev, struct qat_dh_ctx *ctx) +{ + if (ctx->g) { + dma_free_coherent(dev, ctx->p_size, ctx->g, ctx->dma_g); + ctx->g = NULL; + } + if (ctx->xa) { + dma_free_coherent(dev, ctx->p_size, ctx->xa, ctx->dma_xa); + ctx->xa = NULL; + } + if (ctx->p) { + dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p); + ctx->p = NULL; + } + ctx->p_size = 0; + ctx->g2 = false; +} + +static int qat_dh_set_secret(struct crypto_kpp *tfm, void *buf, + unsigned int len) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + struct dh params; + int ret; + + if (crypto_dh_decode_key(buf, len, ¶ms) < 0) + return -EINVAL; + + /* Free old secret if any */ + qat_dh_clear_ctx(dev, ctx); + + ret = qat_dh_set_params(ctx, ¶ms); + if (ret < 0) + return ret; + + ctx->xa = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_xa, + GFP_KERNEL); + if (!ctx->xa) { + qat_dh_clear_ctx(dev, ctx); + return -ENOMEM; + } + memcpy(ctx->xa + (ctx->p_size - params.key_size), params.key, + params.key_size); + + return 0; +} + +static int qat_dh_max_size(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + + return ctx->p ? ctx->p_size : -EINVAL; +} + +static int qat_dh_init_tfm(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct qat_crypto_instance *inst = + qat_crypto_get_instance_node(get_current_node()); + + if (!inst) + return -EINVAL; + + ctx->p_size = 0; + ctx->g2 = false; + ctx->inst = inst; + return 0; +} + +static void qat_dh_exit_tfm(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + + qat_dh_clear_ctx(dev, ctx); + qat_crypto_put_instance(ctx->inst); +} + +static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) +{ + struct qat_asym_request *req = (void *)(__force long)resp->opaque; + struct akcipher_request *areq = req->areq.rsa; + struct device *dev = &GET_DEV(req->ctx.rsa->inst->accel_dev); + int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET( + resp->pke_resp_hdr.comn_resp_flags); + + err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; + + if (req->src_align) + dma_free_coherent(dev, req->ctx.rsa->key_sz, req->src_align, + req->in.rsa.enc.m); + else + dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz, + DMA_TO_DEVICE); + + areq->dst_len = req->ctx.rsa->key_sz; + if (req->dst_align) { + scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, + areq->dst_len, 1); + + dma_free_coherent(dev, req->ctx.rsa->key_sz, req->dst_align, + req->out.rsa.enc.c); + } else { + dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz, DMA_FROM_DEVICE); } @@ -175,8 +593,9 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) void qat_alg_asym_callback(void *_resp) { struct icp_qat_fw_pke_resp *resp = _resp; + struct qat_asym_request *areq = (void *)(__force long)resp->opaque; - qat_rsa_cb(resp); + areq->cb(resp); } #define PKE_RSA_EP_512 0x1c161b21 @@ -237,13 +656,42 @@ static unsigned long qat_rsa_dec_fn_id(unsigned int len) }; } +#define PKE_RSA_DP2_512 0x1c131b57 +#define PKE_RSA_DP2_1024 0x26131c2d +#define PKE_RSA_DP2_1536 0x45111d12 +#define PKE_RSA_DP2_2048 0x59121dfa +#define PKE_RSA_DP2_3072 0x81121ed9 +#define PKE_RSA_DP2_4096 0xb1111fb2 + +static unsigned long qat_rsa_dec_fn_id_crt(unsigned int len) +{ + unsigned int bitslen = len << 3; + + switch (bitslen) { + case 512: + return PKE_RSA_DP2_512; + case 1024: + return PKE_RSA_DP2_1024; + case 1536: + return PKE_RSA_DP2_1536; + case 2048: + return PKE_RSA_DP2_2048; + case 3072: + return PKE_RSA_DP2_3072; + case 4096: + return PKE_RSA_DP2_4096; + default: + return 0; + }; +} + static int qat_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); - struct qat_rsa_request *qat_req = + struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; int ret, ctr = 0; @@ -262,14 +710,16 @@ static int qat_rsa_enc(struct akcipher_request *req) if (unlikely(!msg->pke_hdr.cd_pars.func_id)) return -EINVAL; - qat_req->ctx = ctx; + qat_req->cb = qat_rsa_cb; + qat_req->ctx.rsa = ctx; + qat_req->areq.rsa = req; msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; msg->pke_hdr.comn_req_flags = ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, QAT_COMN_CD_FLD_TYPE_64BIT_ADR); - qat_req->in.enc.e = ctx->dma_e; - qat_req->in.enc.n = ctx->dma_n; + qat_req->in.rsa.enc.e = ctx->dma_e; + qat_req->in.rsa.enc.n = ctx->dma_n; ret = -ENOMEM; /* @@ -281,16 +731,16 @@ static int qat_rsa_enc(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.enc.m = dma_map_single(dev, sg_virt(req->src), + qat_req->in.rsa.enc.m = dma_map_single(dev, sg_virt(req->src), req->src_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.enc.m))) + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m))) return ret; } else { int shift = ctx->key_sz - req->src_len; qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->in.enc.m, + &qat_req->in.rsa.enc.m, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; @@ -300,30 +750,30 @@ static int qat_rsa_enc(struct akcipher_request *req) } if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { qat_req->dst_align = NULL; - qat_req->out.enc.c = dma_map_single(dev, sg_virt(req->dst), - req->dst_len, - DMA_FROM_DEVICE); + qat_req->out.rsa.enc.c = dma_map_single(dev, sg_virt(req->dst), + req->dst_len, + DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->out.enc.c))) + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c))) goto unmap_src; } else { qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->out.enc.c, + &qat_req->out.rsa.enc.c, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; } - qat_req->in.in_tab[3] = 0; - qat_req->out.out_tab[1] = 0; - qat_req->phy_in = dma_map_single(dev, &qat_req->in.enc.m, + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m, sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) goto unmap_dst; - qat_req->phy_out = dma_map_single(dev, &qat_req->out.enc.c, + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.enc.c, sizeof(struct qat_rsa_output_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) @@ -331,7 +781,7 @@ static int qat_rsa_enc(struct akcipher_request *req) msg->pke_mid.src_data_addr = qat_req->phy_in; msg->pke_mid.dest_data_addr = qat_req->phy_out; - msg->pke_mid.opaque = (uint64_t)(__force long)req; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; msg->input_param_count = 3; msg->output_param_count = 1; do { @@ -353,19 +803,19 @@ unmap_in_params: unmap_dst: if (qat_req->dst_align) dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.enc.c); + qat_req->out.rsa.enc.c); else - if (!dma_mapping_error(dev, qat_req->out.enc.c)) - dma_unmap_single(dev, qat_req->out.enc.c, ctx->key_sz, - DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c)) + dma_unmap_single(dev, qat_req->out.rsa.enc.c, + ctx->key_sz, DMA_FROM_DEVICE); unmap_src: if (qat_req->src_align) dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.enc.m); + qat_req->in.rsa.enc.m); else - if (!dma_mapping_error(dev, qat_req->in.enc.m)) - dma_unmap_single(dev, qat_req->in.enc.m, ctx->key_sz, - DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m)) + dma_unmap_single(dev, qat_req->in.rsa.enc.m, + ctx->key_sz, DMA_TO_DEVICE); return ret; } @@ -375,7 +825,7 @@ static int qat_rsa_dec(struct akcipher_request *req) struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); - struct qat_rsa_request *qat_req = + struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; int ret, ctr = 0; @@ -390,18 +840,30 @@ static int qat_rsa_dec(struct akcipher_request *req) memset(msg, '\0', sizeof(*msg)); ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, ICP_QAT_FW_COMN_REQ_FLAG_SET); - msg->pke_hdr.cd_pars.func_id = qat_rsa_dec_fn_id(ctx->key_sz); + msg->pke_hdr.cd_pars.func_id = ctx->crt_mode ? + qat_rsa_dec_fn_id_crt(ctx->key_sz) : + qat_rsa_dec_fn_id(ctx->key_sz); if (unlikely(!msg->pke_hdr.cd_pars.func_id)) return -EINVAL; - qat_req->ctx = ctx; + qat_req->cb = qat_rsa_cb; + qat_req->ctx.rsa = ctx; + qat_req->areq.rsa = req; msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; msg->pke_hdr.comn_req_flags = ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, QAT_COMN_CD_FLD_TYPE_64BIT_ADR); - qat_req->in.dec.d = ctx->dma_d; - qat_req->in.dec.n = ctx->dma_n; + if (ctx->crt_mode) { + qat_req->in.rsa.dec_crt.p = ctx->dma_p; + qat_req->in.rsa.dec_crt.q = ctx->dma_q; + qat_req->in.rsa.dec_crt.dp = ctx->dma_dp; + qat_req->in.rsa.dec_crt.dq = ctx->dma_dq; + qat_req->in.rsa.dec_crt.qinv = ctx->dma_qinv; + } else { + qat_req->in.rsa.dec.d = ctx->dma_d; + qat_req->in.rsa.dec.n = ctx->dma_n; + } ret = -ENOMEM; /* @@ -413,16 +875,16 @@ static int qat_rsa_dec(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.dec.c = dma_map_single(dev, sg_virt(req->src), + qat_req->in.rsa.dec.c = dma_map_single(dev, sg_virt(req->src), req->dst_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.dec.c))) + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c))) return ret; } else { int shift = ctx->key_sz - req->src_len; qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->in.dec.c, + &qat_req->in.rsa.dec.c, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; @@ -432,31 +894,34 @@ static int qat_rsa_dec(struct akcipher_request *req) } if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { qat_req->dst_align = NULL; - qat_req->out.dec.m = dma_map_single(dev, sg_virt(req->dst), + qat_req->out.rsa.dec.m = dma_map_single(dev, sg_virt(req->dst), req->dst_len, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->out.dec.m))) + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m))) goto unmap_src; } else { qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->out.dec.m, + &qat_req->out.rsa.dec.m, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; } - qat_req->in.in_tab[3] = 0; - qat_req->out.out_tab[1] = 0; - qat_req->phy_in = dma_map_single(dev, &qat_req->in.dec.c, + if (ctx->crt_mode) + qat_req->in.rsa.in_tab[6] = 0; + else + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.dec.c, sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) goto unmap_dst; - qat_req->phy_out = dma_map_single(dev, &qat_req->out.dec.m, + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.dec.m, sizeof(struct qat_rsa_output_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) @@ -464,8 +929,12 @@ static int qat_rsa_dec(struct akcipher_request *req) msg->pke_mid.src_data_addr = qat_req->phy_in; msg->pke_mid.dest_data_addr = qat_req->phy_out; - msg->pke_mid.opaque = (uint64_t)(__force long)req; - msg->input_param_count = 3; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; + if (ctx->crt_mode) + msg->input_param_count = 6; + else + msg->input_param_count = 3; + msg->output_param_count = 1; do { ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg); @@ -486,26 +955,24 @@ unmap_in_params: unmap_dst: if (qat_req->dst_align) dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.dec.m); + qat_req->out.rsa.dec.m); else - if (!dma_mapping_error(dev, qat_req->out.dec.m)) - dma_unmap_single(dev, qat_req->out.dec.m, ctx->key_sz, - DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m)) + dma_unmap_single(dev, qat_req->out.rsa.dec.m, + ctx->key_sz, DMA_FROM_DEVICE); unmap_src: if (qat_req->src_align) dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.dec.c); + qat_req->in.rsa.dec.c); else - if (!dma_mapping_error(dev, qat_req->in.dec.c)) - dma_unmap_single(dev, qat_req->in.dec.c, ctx->key_sz, - DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c)) + dma_unmap_single(dev, qat_req->in.rsa.dec.c, + ctx->key_sz, DMA_TO_DEVICE); return ret; } -int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_n(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -518,11 +985,6 @@ int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag, ctx->key_sz = vlen; ret = -EINVAL; - /* In FIPS mode only allow key size 2K & 3K */ - if (fips_enabled && (ctx->key_sz != 256 && ctx->key_sz != 384)) { - pr_err("QAT: RSA: key size not allowed in FIPS mode\n"); - goto err; - } /* invalid key size provided */ if (!qat_rsa_enc_fn_id(ctx->key_sz)) goto err; @@ -540,10 +1002,8 @@ err: return ret; } -int qat_rsa_get_e(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_e(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -559,18 +1019,15 @@ int qat_rsa_get_e(void *context, size_t hdrlen, unsigned char tag, } ctx->e = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_e, GFP_KERNEL); - if (!ctx->e) { - ctx->e = NULL; + if (!ctx->e) return -ENOMEM; - } + memcpy(ctx->e + (ctx->key_sz - vlen), ptr, vlen); return 0; } -int qat_rsa_get_d(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_d(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -585,12 +1042,6 @@ int qat_rsa_get_d(void *context, size_t hdrlen, unsigned char tag, if (!ctx->key_sz || !vlen || vlen > ctx->key_sz) goto err; - /* In FIPS mode only allow key size 2K & 3K */ - if (fips_enabled && (vlen != 256 && vlen != 384)) { - pr_err("QAT: RSA: key size not allowed in FIPS mode\n"); - goto err; - } - ret = -ENOMEM; ctx->d = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_d, GFP_KERNEL); if (!ctx->d) @@ -603,12 +1054,106 @@ err: return ret; } -static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, - unsigned int keylen, bool private) +static void qat_rsa_drop_leading_zeros(const char **ptr, unsigned int *len) { - struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); - struct device *dev = &GET_DEV(ctx->inst->accel_dev); - int ret; + while (!**ptr && *len) { + (*ptr)++; + (*len)--; + } +} + +static void qat_rsa_setkey_crt(struct qat_rsa_ctx *ctx, struct rsa_key *rsa_key) +{ + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + const char *ptr; + unsigned int len; + unsigned int half_key_sz = ctx->key_sz / 2; + + /* p */ + ptr = rsa_key->p; + len = rsa_key->p_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto err; + ctx->p = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_p, GFP_KERNEL); + if (!ctx->p) + goto err; + memcpy(ctx->p + (half_key_sz - len), ptr, len); + + /* q */ + ptr = rsa_key->q; + len = rsa_key->q_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_p; + ctx->q = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_q, GFP_KERNEL); + if (!ctx->q) + goto free_p; + memcpy(ctx->q + (half_key_sz - len), ptr, len); + + /* dp */ + ptr = rsa_key->dp; + len = rsa_key->dp_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_q; + ctx->dp = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_dp, + GFP_KERNEL); + if (!ctx->dp) + goto free_q; + memcpy(ctx->dp + (half_key_sz - len), ptr, len); + + /* dq */ + ptr = rsa_key->dq; + len = rsa_key->dq_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_dp; + ctx->dq = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_dq, + GFP_KERNEL); + if (!ctx->dq) + goto free_dp; + memcpy(ctx->dq + (half_key_sz - len), ptr, len); + + /* qinv */ + ptr = rsa_key->qinv; + len = rsa_key->qinv_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_dq; + ctx->qinv = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_qinv, + GFP_KERNEL); + if (!ctx->qinv) + goto free_dq; + memcpy(ctx->qinv + (half_key_sz - len), ptr, len); + + ctx->crt_mode = true; + return; + +free_dq: + memset(ctx->dq, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dq, ctx->dma_dq); + ctx->dq = NULL; +free_dp: + memset(ctx->dp, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dp, ctx->dma_dp); + ctx->dp = NULL; +free_q: + memset(ctx->q, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->q, ctx->dma_q); + ctx->q = NULL; +free_p: + memset(ctx->p, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->p, ctx->dma_p); + ctx->p = NULL; +err: + ctx->crt_mode = false; +} + +static void qat_rsa_clear_ctx(struct device *dev, struct qat_rsa_ctx *ctx) +{ + unsigned int half_key_sz = ctx->key_sz / 2; /* Free the old key if any */ if (ctx->n) @@ -619,19 +1164,68 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, memset(ctx->d, '\0', ctx->key_sz); dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d); } + if (ctx->p) { + memset(ctx->p, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->p, ctx->dma_p); + } + if (ctx->q) { + memset(ctx->q, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->q, ctx->dma_q); + } + if (ctx->dp) { + memset(ctx->dp, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dp, ctx->dma_dp); + } + if (ctx->dq) { + memset(ctx->dq, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dq, ctx->dma_dq); + } + if (ctx->qinv) { + memset(ctx->qinv, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->qinv, ctx->dma_qinv); + } ctx->n = NULL; ctx->e = NULL; ctx->d = NULL; + ctx->p = NULL; + ctx->q = NULL; + ctx->dp = NULL; + ctx->dq = NULL; + ctx->qinv = NULL; + ctx->crt_mode = false; + ctx->key_sz = 0; +} + +static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen, bool private) +{ + struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + struct rsa_key rsa_key; + int ret; + + qat_rsa_clear_ctx(dev, ctx); if (private) - ret = asn1_ber_decoder(&qat_rsaprivkey_decoder, ctx, key, - keylen); + ret = rsa_parse_priv_key(&rsa_key, key, keylen); else - ret = asn1_ber_decoder(&qat_rsapubkey_decoder, ctx, key, - keylen); + ret = rsa_parse_pub_key(&rsa_key, key, keylen); + if (ret < 0) + goto free; + + ret = qat_rsa_set_n(ctx, rsa_key.n, rsa_key.n_sz); if (ret < 0) goto free; + ret = qat_rsa_set_e(ctx, rsa_key.e, rsa_key.e_sz); + if (ret < 0) + goto free; + if (private) { + ret = qat_rsa_set_d(ctx, rsa_key.d, rsa_key.d_sz); + if (ret < 0) + goto free; + qat_rsa_setkey_crt(ctx, &rsa_key); + } if (!ctx->n || !ctx->e) { /* invalid key provided */ @@ -646,20 +1240,7 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, return 0; free: - if (ctx->d) { - memset(ctx->d, '\0', ctx->key_sz); - dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d); - ctx->d = NULL; - } - if (ctx->e) { - dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e); - ctx->e = NULL; - } - if (ctx->n) { - dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n); - ctx->n = NULL; - ctx->key_sz = 0; - } + qat_rsa_clear_ctx(dev, ctx); return ret; } @@ -725,7 +1306,7 @@ static struct akcipher_alg rsa = { .max_size = qat_rsa_max_size, .init = qat_rsa_init_tfm, .exit = qat_rsa_exit_tfm, - .reqsize = sizeof(struct qat_rsa_request) + 64, + .reqsize = sizeof(struct qat_asym_request) + 64, .base = { .cra_name = "rsa", .cra_driver_name = "qat-rsa", @@ -735,6 +1316,23 @@ static struct akcipher_alg rsa = { }, }; +static struct kpp_alg dh = { + .set_secret = qat_dh_set_secret, + .generate_public_key = qat_dh_compute_value, + .compute_shared_secret = qat_dh_compute_value, + .max_size = qat_dh_max_size, + .init = qat_dh_init_tfm, + .exit = qat_dh_exit_tfm, + .reqsize = sizeof(struct qat_asym_request) + 64, + .base = { + .cra_name = "dh", + .cra_driver_name = "qat-dh", + .cra_priority = 1000, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct qat_dh_ctx), + }, +}; + int qat_asym_algs_register(void) { int ret = 0; @@ -743,7 +1341,11 @@ int qat_asym_algs_register(void) if (++active_devs == 1) { rsa.base.cra_flags = 0; ret = crypto_register_akcipher(&rsa); + if (ret) + goto unlock; + ret = crypto_register_kpp(&dh); } +unlock: mutex_unlock(&algs_lock); return ret; } @@ -751,7 +1353,9 @@ int qat_asym_algs_register(void) void qat_asym_algs_unregister(void) { mutex_lock(&algs_lock); - if (--active_devs == 0) + if (--active_devs == 0) { crypto_unregister_akcipher(&rsa); + crypto_unregister_kpp(&dh); + } mutex_unlock(&algs_lock); } diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 6e1d5e185..1dfcab317 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -252,6 +252,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_sbr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c index dbcbbe242..b04b42f48 100644 --- a/drivers/crypto/qce/ablkcipher.c +++ b/drivers/crypto/qce/ablkcipher.c @@ -15,8 +15,8 @@ #include <linux/interrupt.h> #include <linux/types.h> #include <crypto/aes.h> -#include <crypto/algapi.h> #include <crypto/des.h> +#include <crypto/internal/skcipher.h> #include "cipher.h" @@ -189,7 +189,7 @@ static int qce_ablkcipher_setkey(struct crypto_ablkcipher *ablk, const u8 *key, memcpy(ctx->enc_key, key, keylen); return 0; fallback: - ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen); + ret = crypto_skcipher_setkey(ctx->fallback, key, keylen); if (!ret) ctx->enc_keylen = keylen; return ret; @@ -212,10 +212,16 @@ static int qce_ablkcipher_crypt(struct ablkcipher_request *req, int encrypt) if (IS_AES(rctx->flags) && ctx->enc_keylen != AES_KEYSIZE_128 && ctx->enc_keylen != AES_KEYSIZE_256) { - ablkcipher_request_set_tfm(req, ctx->fallback); - ret = encrypt ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + ret = encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return ret; } @@ -239,10 +245,9 @@ static int qce_ablkcipher_init(struct crypto_tfm *tfm) memset(ctx, 0, sizeof(*ctx)); tfm->crt_ablkcipher.reqsize = sizeof(struct qce_cipher_reqctx); - ctx->fallback = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), - CRYPTO_ALG_TYPE_ABLKCIPHER, - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(tfm), 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback)) return PTR_ERR(ctx->fallback); @@ -253,7 +258,7 @@ static void qce_ablkcipher_exit(struct crypto_tfm *tfm) { struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_ablkcipher(ctx->fallback); + crypto_free_skcipher(ctx->fallback); } struct qce_ablkcipher_def { diff --git a/drivers/crypto/qce/cipher.h b/drivers/crypto/qce/cipher.h index 5c6a5f863..2b0278bb6 100644 --- a/drivers/crypto/qce/cipher.h +++ b/drivers/crypto/qce/cipher.h @@ -22,7 +22,7 @@ struct qce_cipher_ctx { u8 enc_key[QCE_MAX_KEY_SIZE]; unsigned int enc_keylen; - struct crypto_ablkcipher *fallback; + struct crypto_skcipher *fallback; }; /** diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 2b3a0cfe3..dce1af0ce 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -155,43 +155,43 @@ * expansion of its usage. */ struct samsung_aes_variant { - unsigned int aes_offset; + unsigned int aes_offset; }; struct s5p_aes_reqctx { - unsigned long mode; + unsigned long mode; }; struct s5p_aes_ctx { - struct s5p_aes_dev *dev; + struct s5p_aes_dev *dev; - uint8_t aes_key[AES_MAX_KEY_SIZE]; - uint8_t nonce[CTR_RFC3686_NONCE_SIZE]; - int keylen; + uint8_t aes_key[AES_MAX_KEY_SIZE]; + uint8_t nonce[CTR_RFC3686_NONCE_SIZE]; + int keylen; }; struct s5p_aes_dev { - struct device *dev; - struct clk *clk; - void __iomem *ioaddr; - void __iomem *aes_ioaddr; - int irq_fc; + struct device *dev; + struct clk *clk; + void __iomem *ioaddr; + void __iomem *aes_ioaddr; + int irq_fc; - struct ablkcipher_request *req; - struct s5p_aes_ctx *ctx; - struct scatterlist *sg_src; - struct scatterlist *sg_dst; + struct ablkcipher_request *req; + struct s5p_aes_ctx *ctx; + struct scatterlist *sg_src; + struct scatterlist *sg_dst; /* In case of unaligned access: */ - struct scatterlist *sg_src_cpy; - struct scatterlist *sg_dst_cpy; + struct scatterlist *sg_src_cpy; + struct scatterlist *sg_dst_cpy; - struct tasklet_struct tasklet; - struct crypto_queue queue; - bool busy; - spinlock_t lock; + struct tasklet_struct tasklet; + struct crypto_queue queue; + bool busy; + spinlock_t lock; - struct samsung_aes_variant *variant; + struct samsung_aes_variant *variant; }; static struct s5p_aes_dev *s5p_dev; @@ -421,11 +421,11 @@ static bool s5p_aes_rx(struct s5p_aes_dev *dev) static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id) { struct platform_device *pdev = dev_id; - struct s5p_aes_dev *dev = platform_get_drvdata(pdev); - uint32_t status; - unsigned long flags; - bool set_dma_tx = false; - bool set_dma_rx = false; + struct s5p_aes_dev *dev = platform_get_drvdata(pdev); + bool set_dma_tx = false; + bool set_dma_rx = false; + unsigned long flags; + uint32_t status; spin_lock_irqsave(&dev->lock, flags); @@ -538,10 +538,10 @@ static int s5p_set_outdata_start(struct s5p_aes_dev *dev, static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) { - struct ablkcipher_request *req = dev->req; - uint32_t aes_control; - int err; - unsigned long flags; + struct ablkcipher_request *req = dev->req; + uint32_t aes_control; + unsigned long flags; + int err; aes_control = SSS_AES_KEY_CHANGE_MODE; if (mode & FLAGS_AES_DECRYPT) @@ -653,10 +653,10 @@ exit: static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode) { - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct s5p_aes_reqctx *reqctx = ablkcipher_request_ctx(req); - struct s5p_aes_dev *dev = ctx->dev; + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct s5p_aes_reqctx *reqctx = ablkcipher_request_ctx(req); + struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct s5p_aes_dev *dev = ctx->dev; if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) { dev_err(dev->dev, "request size is not exact amount of AES blocks\n"); @@ -671,7 +671,7 @@ static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode) static int s5p_aes_setkey(struct crypto_ablkcipher *cipher, const uint8_t *key, unsigned int keylen) { - struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); struct s5p_aes_ctx *ctx = crypto_tfm_ctx(tfm); if (keylen != AES_KEYSIZE_128 && @@ -763,11 +763,11 @@ static struct crypto_alg algs[] = { static int s5p_aes_probe(struct platform_device *pdev) { - int i, j, err = -ENODEV; - struct s5p_aes_dev *pdata; - struct device *dev = &pdev->dev; - struct resource *res; + struct device *dev = &pdev->dev; + int i, j, err = -ENODEV; struct samsung_aes_variant *variant; + struct s5p_aes_dev *pdata; + struct resource *res; if (s5p_dev) return -EEXIST; diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index c3f3d89e4..0c49956ee 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -14,10 +14,9 @@ * Based on omap-aes.c and tegra-aes.c */ -#include <crypto/algapi.h> #include <crypto/aes.h> -#include <crypto/hash.h> #include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <crypto/sha.h> @@ -150,10 +149,7 @@ struct sahara_ctx { /* AES-specific context */ int keylen; u8 key[AES_KEYSIZE_128]; - struct crypto_ablkcipher *fallback; - - /* SHA-specific context */ - struct crypto_shash *shash_fallback; + struct crypto_skcipher *fallback; }; struct sahara_aes_reqctx { @@ -620,25 +616,21 @@ static int sahara_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, return 0; } - if (keylen != AES_KEYSIZE_128 && - keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256) + if (keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256) return -EINVAL; /* * The requested key size is not supported by HW, do a fallback. */ - ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - ctx->fallback->base.crt_flags |= - (tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); + crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags & + CRYPTO_TFM_REQ_MASK); - ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen); - if (ret) { - struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm); + ret = crypto_skcipher_setkey(ctx->fallback, key, keylen); - tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm_aux->crt_flags |= - (ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK); - } + tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->base.crt_flags |= crypto_skcipher_get_flags(ctx->fallback) & + CRYPTO_TFM_RES_MASK; return ret; } @@ -670,16 +662,20 @@ static int sahara_aes_crypt(struct ablkcipher_request *req, unsigned long mode) static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_encrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_encrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -688,16 +684,20 @@ static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req) static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -706,16 +706,20 @@ static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req) static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_encrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_encrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -724,16 +728,20 @@ static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req) static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -745,8 +753,9 @@ static int sahara_aes_cra_init(struct crypto_tfm *tfm) const char *name = crypto_tfm_alg_name(tfm); struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - ctx->fallback = crypto_alloc_ablkcipher(name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + ctx->fallback = crypto_alloc_skcipher(name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback)) { pr_err("Error allocating fallback algo %s\n", name); return PTR_ERR(ctx->fallback); @@ -761,9 +770,7 @@ static void sahara_aes_cra_exit(struct crypto_tfm *tfm) { struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->fallback) - crypto_free_ablkcipher(ctx->fallback); - ctx->fallback = NULL; + crypto_free_skcipher(ctx->fallback); } static u32 sahara_sha_init_hdr(struct sahara_dev *dev, @@ -1180,15 +1187,6 @@ static int sahara_sha_import(struct ahash_request *req, const void *in) static int sahara_sha_cra_init(struct crypto_tfm *tfm) { - const char *name = crypto_tfm_alg_name(tfm); - struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - - ctx->shash_fallback = crypto_alloc_shash(name, 0, - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(ctx->shash_fallback)) { - pr_err("Error allocating fallback algo %s\n", name); - return PTR_ERR(ctx->shash_fallback); - } crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct sahara_sha_reqctx) + SHA_BUFFER_LEN + SHA256_BLOCK_SIZE); @@ -1196,14 +1194,6 @@ static int sahara_sha_cra_init(struct crypto_tfm *tfm) return 0; } -static void sahara_sha_cra_exit(struct crypto_tfm *tfm) -{ - struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - - crypto_free_shash(ctx->shash_fallback); - ctx->shash_fallback = NULL; -} - static struct crypto_alg aes_algs[] = { { .cra_name = "ecb(aes)", @@ -1272,7 +1262,6 @@ static struct ahash_alg sha_v3_algs[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = sahara_sha_cra_init, - .cra_exit = sahara_sha_cra_exit, } }, }; @@ -1300,7 +1289,6 @@ static struct ahash_alg sha_v4_algs[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = sahara_sha_cra_init, - .cra_exit = sahara_sha_cra_exit, } }, }; diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index b7ee8d301..0418a2f41 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -91,10 +91,17 @@ static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr, return be16_to_cpu(ptr->len); } -static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1) +static void to_talitos_ptr_ext_set(struct talitos_ptr *ptr, u8 val, + bool is_sec1) { if (!is_sec1) - ptr->j_extent = 0; + ptr->j_extent = val; +} + +static void to_talitos_ptr_ext_or(struct talitos_ptr *ptr, u8 val, bool is_sec1) +{ + if (!is_sec1) + ptr->j_extent |= val; } /* @@ -111,7 +118,7 @@ static void map_single_talitos_ptr(struct device *dev, to_talitos_ptr_len(ptr, len, is_sec1); to_talitos_ptr(ptr, dma_addr, is_sec1); - to_talitos_ptr_extent_clear(ptr, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); } /* @@ -804,6 +811,11 @@ static void talitos_unregister_rng(struct device *dev) * crypto alg */ #define TALITOS_CRA_PRIORITY 3000 +/* + * Defines a priority for doing AEAD with descriptors type + * HMAC_SNOOP_NO_AFEA (HSNA) instead of type IPSEC_ESP + */ +#define TALITOS_CRA_PRIORITY_AEAD_HSNA (TALITOS_CRA_PRIORITY - 1) #define TALITOS_MAX_KEY_SIZE 96 #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ @@ -904,35 +916,59 @@ struct talitos_edesc { static void talitos_sg_unmap(struct device *dev, struct talitos_edesc *edesc, struct scatterlist *src, - struct scatterlist *dst) + struct scatterlist *dst, + unsigned int len, unsigned int offset) { + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); unsigned int src_nents = edesc->src_nents ? : 1; unsigned int dst_nents = edesc->dst_nents ? : 1; + if (is_sec1 && dst && dst_nents > 1) { + dma_sync_single_for_device(dev, edesc->dma_link_tbl + offset, + len, DMA_FROM_DEVICE); + sg_pcopy_from_buffer(dst, dst_nents, edesc->buf + offset, len, + offset); + } if (src != dst) { - dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); + if (src_nents == 1 || !is_sec1) + dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); - if (dst) { + if (dst && (dst_nents == 1 || !is_sec1)) dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE); - } - } else + } else if (src_nents == 1 || !is_sec1) { dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL); + } } static void ipsec_esp_unmap(struct device *dev, struct talitos_edesc *edesc, struct aead_request *areq) { - unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], DMA_FROM_DEVICE); + struct crypto_aead *aead = crypto_aead_reqtfm(areq); + struct talitos_ctx *ctx = crypto_aead_ctx(aead); + unsigned int ivsize = crypto_aead_ivsize(aead); + + if (edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP) + unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], + DMA_FROM_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[3], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[0], DMA_TO_DEVICE); - talitos_sg_unmap(dev, edesc, areq->src, areq->dst); + talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->cryptlen, + areq->assoclen); if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); + + if (!(edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP)) { + unsigned int dst_nents = edesc->dst_nents ? : 1; + + sg_pcopy_to_buffer(areq->dst, dst_nents, ctx->iv, ivsize, + areq->assoclen + areq->cryptlen - ivsize); + } } /* @@ -942,6 +978,8 @@ static void ipsec_esp_encrypt_done(struct device *dev, struct talitos_desc *desc, void *context, int err) { + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); struct aead_request *areq = context; struct crypto_aead *authenc = crypto_aead_reqtfm(areq); unsigned int authsize = crypto_aead_authsize(authenc); @@ -955,8 +993,11 @@ static void ipsec_esp_encrypt_done(struct device *dev, /* copy the generated ICV to dst */ if (edesc->icv_ool) { - icvdata = &edesc->link_tbl[edesc->src_nents + - edesc->dst_nents + 2]; + if (is_sec1) + icvdata = edesc->buf + areq->assoclen + areq->cryptlen; + else + icvdata = &edesc->link_tbl[edesc->src_nents + + edesc->dst_nents + 2]; sg = sg_last(areq->dst, edesc->dst_nents); memcpy((char *)sg_virt(sg) + sg->length - authsize, icvdata, authsize); @@ -977,6 +1018,8 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev, struct talitos_edesc *edesc; struct scatterlist *sg; char *oicv, *icv; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); edesc = container_of(desc, struct talitos_edesc, desc); @@ -988,7 +1031,12 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev, icv = (char *)sg_virt(sg) + sg->length - authsize; if (edesc->dma_len) { - oicv = (char *)&edesc->link_tbl[edesc->src_nents + + if (is_sec1) + oicv = (char *)&edesc->dma_link_tbl + + req->assoclen + req->cryptlen; + else + oicv = (char *) + &edesc->link_tbl[edesc->src_nents + edesc->dst_nents + 2]; if (edesc->icv_ool) icv = oicv + authsize; @@ -1050,8 +1098,8 @@ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count, to_talitos_ptr(link_tbl_ptr + count, sg_dma_address(sg) + offset, 0); - link_tbl_ptr[count].len = cpu_to_be16(len); - link_tbl_ptr[count].j_extent = 0; + to_talitos_ptr_len(link_tbl_ptr + count, len, 0); + to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0); count++; cryptlen -= len; offset = 0; @@ -1062,17 +1110,43 @@ next: /* tag end of link table */ if (count > 0) - link_tbl_ptr[count - 1].j_extent = DESC_PTR_LNKTBL_RETURN; + to_talitos_ptr_ext_set(link_tbl_ptr + count - 1, + DESC_PTR_LNKTBL_RETURN, 0); return count; } -static inline int sg_to_link_tbl(struct scatterlist *sg, int sg_count, - int cryptlen, - struct talitos_ptr *link_tbl_ptr) +int talitos_sg_map(struct device *dev, struct scatterlist *src, + unsigned int len, struct talitos_edesc *edesc, + struct talitos_ptr *ptr, + int sg_count, unsigned int offset, int tbl_off) { - return sg_to_link_tbl_offset(sg, sg_count, 0, cryptlen, - link_tbl_ptr); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + to_talitos_ptr_len(ptr, len, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(src) + offset, is_sec1); + return sg_count; + } + if (is_sec1) { + to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, is_sec1); + return sg_count; + } + sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len, + &edesc->link_tbl[tbl_off]); + if (sg_count == 1) { + /* Only one segment now, so no link tbl needed*/ + copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1); + return sg_count; + } + to_talitos_ptr(ptr, edesc->dma_link_tbl + + tbl_off * sizeof(struct talitos_ptr), is_sec1); + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, is_sec1); + + return sg_count; } /* @@ -1093,42 +1167,52 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, int tbl_off = 0; int sg_count, ret; int sg_link_tbl_len; + bool sync_needed = false; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); /* hmac key */ map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key, DMA_TO_DEVICE); - sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ?: 1, - (areq->src == areq->dst) ? DMA_BIDIRECTIONAL - : DMA_TO_DEVICE); - /* hmac data */ - desc->ptr[1].len = cpu_to_be16(areq->assoclen); - if (sg_count > 1 && - (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0, - areq->assoclen, - &edesc->link_tbl[tbl_off])) > 1) { - to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * - sizeof(struct talitos_ptr), 0); - desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP; + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, + areq->assoclen + cryptlen); + else + sg_count = dma_map_sg(dev, areq->src, sg_count, + (areq->src == areq->dst) ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, DMA_BIDIRECTIONAL); + /* hmac data */ + ret = talitos_sg_map(dev, areq->src, areq->assoclen, edesc, + &desc->ptr[1], sg_count, 0, tbl_off); + if (ret > 1) { tbl_off += ret; - } else { - to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0); - desc->ptr[1].j_extent = 0; + sync_needed = true; } /* cipher iv */ - to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, 0); - desc->ptr[2].len = cpu_to_be16(ivsize); - desc->ptr[2].j_extent = 0; + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, is_sec1); + to_talitos_ptr_len(&desc->ptr[2], ivsize, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[2], 0, is_sec1); + } else { + to_talitos_ptr(&desc->ptr[3], edesc->iv_dma, is_sec1); + to_talitos_ptr_len(&desc->ptr[3], ivsize, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[3], 0, is_sec1); + } /* cipher key */ - map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, - (char *)&ctx->key + ctx->authkeylen, - DMA_TO_DEVICE); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, + (char *)&ctx->key + ctx->authkeylen, + DMA_TO_DEVICE); + else + map_single_talitos_ptr(dev, &desc->ptr[2], ctx->enckeylen, + (char *)&ctx->key + ctx->authkeylen, + DMA_TO_DEVICE); /* * cipher in @@ -1136,78 +1220,82 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, * extent is bytes of HMAC postpended to ciphertext, * typically 12 for ipsec */ - desc->ptr[4].len = cpu_to_be16(cryptlen); - desc->ptr[4].j_extent = authsize; + to_talitos_ptr_len(&desc->ptr[4], cryptlen, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[4], 0, is_sec1); sg_link_tbl_len = cryptlen; - if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) - sg_link_tbl_len += authsize; - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src) + - areq->assoclen, 0); - } else if ((ret = sg_to_link_tbl_offset(areq->src, sg_count, - areq->assoclen, sg_link_tbl_len, - &edesc->link_tbl[tbl_off])) > - 1) { - desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; - to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl + - tbl_off * - sizeof(struct talitos_ptr), 0); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - tbl_off += ret; - } else { - copy_talitos_ptr(&desc->ptr[4], &edesc->link_tbl[tbl_off], 0); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + to_talitos_ptr_ext_set(&desc->ptr[4], authsize, is_sec1); + + if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) + sg_link_tbl_len += authsize; } - /* cipher out */ - desc->ptr[5].len = cpu_to_be16(cryptlen); - desc->ptr[5].j_extent = authsize; + sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, + &desc->ptr[4], sg_count, areq->assoclen, + tbl_off); - if (areq->src != areq->dst) - sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1, - DMA_FROM_DEVICE); + if (sg_count > 1) { + tbl_off += sg_count; + sync_needed = true; + } - edesc->icv_ool = false; + /* cipher out */ + if (areq->src != areq->dst) { + sg_count = edesc->dst_nents ? : 1; + if (!is_sec1 || sg_count == 1) + dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE); + } - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst) + - areq->assoclen, 0); - } else if ((sg_count = - sg_to_link_tbl_offset(areq->dst, sg_count, - areq->assoclen, cryptlen, - &edesc->link_tbl[tbl_off])) > 1) { - struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; - - to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl + - tbl_off * sizeof(struct talitos_ptr), 0); - - /* Add an entry to the link table for ICV data */ - tbl_ptr += sg_count - 1; - tbl_ptr->j_extent = 0; - tbl_ptr++; - tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; - tbl_ptr->len = cpu_to_be16(authsize); - - /* icv data follows link tables */ - to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + - (edesc->src_nents + edesc->dst_nents + - 2) * sizeof(struct talitos_ptr) + - authsize, 0); - desc->ptr[5].j_extent |= DESC_PTR_LNKTBL_JUMP; - dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, - edesc->dma_len, DMA_BIDIRECTIONAL); + sg_count = talitos_sg_map(dev, areq->dst, cryptlen, edesc, + &desc->ptr[5], sg_count, areq->assoclen, + tbl_off); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + to_talitos_ptr_ext_or(&desc->ptr[5], authsize, is_sec1); + + if (sg_count > 1) { edesc->icv_ool = true; + sync_needed = true; + + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; + int offset = (edesc->src_nents + edesc->dst_nents + 2) * + sizeof(struct talitos_ptr) + authsize; + + /* Add an entry to the link table for ICV data */ + tbl_ptr += sg_count - 1; + to_talitos_ptr_ext_set(tbl_ptr, 0, is_sec1); + tbl_ptr++; + to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN, + is_sec1); + to_talitos_ptr_len(tbl_ptr, authsize, is_sec1); + + /* icv data follows link tables */ + to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + offset, + is_sec1); + } } else { - copy_talitos_ptr(&desc->ptr[5], &edesc->link_tbl[tbl_off], 0); + edesc->icv_ool = false; + } + + /* ICV data */ + if (!(desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)) { + to_talitos_ptr_len(&desc->ptr[6], authsize, is_sec1); + to_talitos_ptr(&desc->ptr[6], edesc->dma_link_tbl + + areq->assoclen + cryptlen, is_sec1); } /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, - DMA_FROM_DEVICE); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, + DMA_FROM_DEVICE); + + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { @@ -1233,7 +1321,7 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, bool encrypt) { struct talitos_edesc *edesc; - int src_nents, dst_nents, alloc_len, dma_len; + int src_nents, dst_nents, alloc_len, dma_len, src_len, dst_len; dma_addr_t iv_dma = 0; gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; @@ -1251,8 +1339,8 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE); if (!dst || dst == src) { - src_nents = sg_nents_for_len(src, - assoclen + cryptlen + authsize); + src_len = assoclen + cryptlen + authsize; + src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); err = ERR_PTR(-EINVAL); @@ -1260,17 +1348,18 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, } src_nents = (src_nents == 1) ? 0 : src_nents; dst_nents = dst ? src_nents : 0; + dst_len = 0; } else { /* dst && dst != src*/ - src_nents = sg_nents_for_len(src, assoclen + cryptlen + - (encrypt ? 0 : authsize)); + src_len = assoclen + cryptlen + (encrypt ? 0 : authsize); + src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); err = ERR_PTR(-EINVAL); goto error_sg; } src_nents = (src_nents == 1) ? 0 : src_nents; - dst_nents = sg_nents_for_len(dst, assoclen + cryptlen + - (encrypt ? authsize : 0)); + dst_len = assoclen + cryptlen + (encrypt ? authsize : 0); + dst_nents = sg_nents_for_len(dst, dst_len); if (dst_nents < 0) { dev_err(dev, "Invalid number of dst SG.\n"); err = ERR_PTR(-EINVAL); @@ -1287,8 +1376,8 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, alloc_len = sizeof(struct talitos_edesc); if (src_nents || dst_nents) { if (is_sec1) - dma_len = (src_nents ? cryptlen : 0) + - (dst_nents ? cryptlen : 0); + dma_len = (src_nents ? src_len : 0) + + (dst_nents ? dst_len : 0); else dma_len = (src_nents + dst_nents + 2) * sizeof(struct talitos_ptr) + authsize * 2; @@ -1412,40 +1501,13 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, return 0; } -static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, - struct scatterlist *dst, unsigned int len, - struct talitos_edesc *edesc) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (is_sec1) { - if (!edesc->src_nents) { - dma_unmap_sg(dev, src, 1, - dst != src ? DMA_TO_DEVICE - : DMA_BIDIRECTIONAL); - } - if (dst && edesc->dst_nents) { - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, - edesc->buf + len, len); - } else if (dst && dst != src) { - dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); - } - } else { - talitos_sg_unmap(dev, edesc, src, dst); - } -} - static void common_nonsnoop_unmap(struct device *dev, struct talitos_edesc *edesc, struct ablkcipher_request *areq) { unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); - unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc); + talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->nbytes, 0); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); @@ -1470,100 +1532,6 @@ static void ablkcipher_done(struct device *dev, areq->base.complete(&areq->base, err); } -int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, struct talitos_ptr *ptr) -{ - int sg_count; - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - to_talitos_ptr_len(ptr, len, is_sec1); - - if (is_sec1) { - sg_count = edesc->src_nents ? : 1; - - if (sg_count == 1) { - dma_map_sg(dev, src, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_copy_to_buffer(src, sg_count, edesc->buf, len); - to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - len, DMA_TO_DEVICE); - } - } else { - to_talitos_ptr_extent_clear(ptr, is_sec1); - - sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_count = sg_to_link_tbl(src, sg_count, len, - &edesc->link_tbl[0]); - if (sg_count > 1) { - to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); - ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; - dma_sync_single_for_device(dev, - edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } else { - /* Only one segment now, so no link tbl needed*/ - to_talitos_ptr(ptr, sg_dma_address(src), - is_sec1); - } - } - } - return sg_count; -} - -void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, - struct talitos_ptr *ptr, int sg_count) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (dir != DMA_NONE) - sg_count = dma_map_sg(dev, dst, edesc->dst_nents ? : 1, dir); - - to_talitos_ptr_len(ptr, len, is_sec1); - - if (is_sec1) { - if (sg_count == 1) { - if (dir != DMA_NONE) - dma_map_sg(dev, dst, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1); - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - } - } else { - to_talitos_ptr_extent_clear(ptr, is_sec1); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - struct talitos_ptr *link_tbl_ptr = - &edesc->link_tbl[edesc->src_nents + 1]; - - to_talitos_ptr(ptr, edesc->dma_link_tbl + - (edesc->src_nents + 1) * - sizeof(struct talitos_ptr), 0); - ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; - sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } - } -} - static int common_nonsnoop(struct talitos_edesc *edesc, struct ablkcipher_request *areq, void (*callback) (struct device *dev, @@ -1577,6 +1545,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, unsigned int cryptlen = areq->nbytes; unsigned int ivsize = crypto_ablkcipher_ivsize(cipher); int sg_count, ret; + bool sync_needed = false; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); @@ -1586,25 +1555,39 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* cipher iv */ to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, is_sec1); to_talitos_ptr_len(&desc->ptr[1], ivsize, is_sec1); - to_talitos_ptr_extent_clear(&desc->ptr[1], is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[1], 0, is_sec1); /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, (char *)&ctx->key, DMA_TO_DEVICE); + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, + cryptlen); + else + sg_count = dma_map_sg(dev, areq->src, sg_count, + (areq->src == areq->dst) ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE); /* * cipher in */ - sg_count = map_sg_in_talitos_ptr(dev, areq->src, cryptlen, edesc, - (areq->src == areq->dst) ? - DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - &desc->ptr[3]); + sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, + &desc->ptr[3], sg_count, 0, 0); + if (sg_count > 1) + sync_needed = true; /* cipher out */ - map_sg_out_talitos_ptr(dev, areq->dst, cryptlen, edesc, - (areq->src == areq->dst) ? DMA_NONE - : DMA_FROM_DEVICE, - &desc->ptr[4], sg_count); + if (areq->src != areq->dst) { + sg_count = edesc->dst_nents ? : 1; + if (!is_sec1 || sg_count == 1) + dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE); + } + + ret = talitos_sg_map(dev, areq->dst, cryptlen, edesc, &desc->ptr[4], + sg_count, 0, (edesc->src_nents + 1)); + if (ret > 1) + sync_needed = true; /* iv out */ map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, @@ -1613,6 +1596,10 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* last DWORD empty */ desc->ptr[6] = zero_entry; + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, DMA_BIDIRECTIONAL); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_unmap(dev, edesc, areq); @@ -1676,7 +1663,7 @@ static void common_nonsnoop_hash_unmap(struct device *dev, unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); - unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc); + talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0); /* When using hashctx-in, must unmap it. */ if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1)) @@ -1747,8 +1734,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, struct device *dev = ctx->dev; struct talitos_desc *desc = &edesc->desc; int ret; + bool sync_needed = false; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); + int sg_count; /* first DWORD empty */ desc->ptr[0] = zero_entry; @@ -1773,11 +1762,19 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, else desc->ptr[2] = zero_entry; + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length); + else + sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count, + DMA_TO_DEVICE); /* * data in */ - map_sg_in_talitos_ptr(dev, req_ctx->psrc, length, edesc, - DMA_TO_DEVICE, &desc->ptr[3]); + sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc, + &desc->ptr[3], sg_count, 0, 0); + if (sg_count > 1) + sync_needed = true; /* fifth DWORD empty */ desc->ptr[4] = zero_entry; @@ -1798,6 +1795,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0) talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]); + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, DMA_BIDIRECTIONAL); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_hash_unmap(dev, edesc, areq); @@ -2124,6 +2125,7 @@ static int ahash_setkey(struct crypto_ahash *tfm, const u8 *key, struct talitos_alg_template { u32 type; + u32 priority; union { struct crypto_alg crypto; struct ahash_alg hash; @@ -2155,6 +2157,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha1-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA1_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { .cra_name = "authenc(hmac(sha1)," @@ -2176,6 +2199,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha1-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA1_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2196,6 +2242,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA224_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha224),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha224-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA224_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2219,6 +2286,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA224_HMAC, }, { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha224)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha224-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA224_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { .cra_name = "authenc(hmac(sha256),cbc(aes))", @@ -2239,6 +2329,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha256-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA256_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { .cra_name = "authenc(hmac(sha256)," @@ -2261,6 +2372,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha256)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha256-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA256_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { .cra_name = "authenc(hmac(sha384),cbc(aes))", @@ -2365,6 +2499,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_MD5_HMAC, }, { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(md5),cbc(aes))", + .cra_driver_name = "authenc-hmac-md5-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_MD5_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { .cra_name = "authenc(hmac(md5),cbc(des3_ede))", @@ -2385,6 +2540,28 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_MD5_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(md5),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-md5-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_MD5_HMAC, + }, /* ABLKCIPHER algorithms. */ { .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .alg.crypto = { @@ -2901,7 +3078,10 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, } alg->cra_module = THIS_MODULE; - alg->cra_priority = TALITOS_CRA_PRIORITY; + if (t_alg->algt.priority) + alg->cra_priority = t_alg->algt.priority; + else + alg->cra_priority = TALITOS_CRA_PRIORITY; alg->cra_alignmask = 0; alg->cra_ctxsize = sizeof(struct talitos_ctx); alg->cra_flags |= CRYPTO_ALG_KERN_DRIVER_ONLY; diff --git a/drivers/crypto/ux500/cryp/Makefile b/drivers/crypto/ux500/cryp/Makefile index e5d362a6f..b497ae3dd 100644 --- a/drivers/crypto/ux500/cryp/Makefile +++ b/drivers/crypto/ux500/cryp/Makefile @@ -4,9 +4,9 @@ # * License terms: GNU General Public License (GPL) version 2 */ ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG -CFLAGS_cryp_core.o := -DDEBUG -O0 -CFLAGS_cryp.o := -DDEBUG -O0 -CFLAGS_cryp_irq.o := -DDEBUG -O0 +CFLAGS_cryp_core.o := -DDEBUG +CFLAGS_cryp.o := -DDEBUG +CFLAGS_cryp_irq.o := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_UX500_CRYP) += ux500_cryp.o diff --git a/drivers/crypto/ux500/hash/Makefile b/drivers/crypto/ux500/hash/Makefile index b2f90d9ba..784d9c0a8 100644 --- a/drivers/crypto/ux500/hash/Makefile +++ b/drivers/crypto/ux500/hash/Makefile @@ -4,7 +4,7 @@ # License terms: GNU General Public License (GPL) version 2 # ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG -CFLAGS_hash_core.o := -DDEBUG -O0 +CFLAGS_hash_core.o := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += ux500_hash.o diff --git a/drivers/crypto/vmx/.gitignore b/drivers/crypto/vmx/.gitignore new file mode 100644 index 000000000..af4a7ce47 --- /dev/null +++ b/drivers/crypto/vmx/.gitignore @@ -0,0 +1,2 @@ +aesp8-ppc.S +ghashp8-ppc.S diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig index 89d8208d9..a83ead109 100644 --- a/drivers/crypto/vmx/Kconfig +++ b/drivers/crypto/vmx/Kconfig @@ -1,7 +1,7 @@ config CRYPTO_DEV_VMX_ENCRYPT tristate "Encryption acceleration support on P8 CPU" depends on CRYPTO_DEV_VMX - default y + default m help Support for VMX cryptographic acceleration instructions on Power8 CPU. This module supports acceleration for AES and GHASH in hardware. If you diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index d28ab96a2..de6e241b0 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o -vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o ghash.o +vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) TARGET := linux-ppc64le diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c new file mode 100644 index 000000000..24353ec33 --- /dev/null +++ b/drivers/crypto/vmx/aes_xts.c @@ -0,0 +1,190 @@ +/** + * AES XTS routines supporting VMX In-core instructions on Power 8 + * + * Copyright (C) 2015 International Business Machines Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundations; version 2 only. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY of FITNESS FOR A PARTICUPAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com> + */ + +#include <linux/types.h> +#include <linux/err.h> +#include <linux/crypto.h> +#include <linux/delay.h> +#include <linux/hardirq.h> +#include <asm/switch_to.h> +#include <crypto/aes.h> +#include <crypto/scatterwalk.h> +#include <crypto/xts.h> + +#include "aesp8-ppc.h" + +struct p8_aes_xts_ctx { + struct crypto_blkcipher *fallback; + struct aes_key enc_key; + struct aes_key dec_key; + struct aes_key tweak_key; +}; + +static int p8_aes_xts_init(struct crypto_tfm *tfm) +{ + const char *alg; + struct crypto_blkcipher *fallback; + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = + crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR + "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); + + crypto_blkcipher_set_flags( + fallback, + crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm)); + ctx->fallback = fallback; + + return 0; +} + +static void p8_aes_xts_exit(struct crypto_tfm *tfm) +{ + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback) { + crypto_free_blkcipher(ctx->fallback); + ctx->fallback = NULL; + } +} + +static int p8_aes_xts_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + int ret; + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + ret = xts_check_key(tfm, key, keylen); + if (ret) + return ret; + + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); + ret = aes_p8_set_encrypt_key(key + keylen/2, (keylen/2) * 8, &ctx->tweak_key); + ret += aes_p8_set_encrypt_key(key, (keylen/2) * 8, &ctx->enc_key); + ret += aes_p8_set_decrypt_key(key, (keylen/2) * 8, &ctx->dec_key); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + + ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); + return ret; +} + +static int p8_aes_xts_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes, int enc) +{ + int ret; + u8 tweak[AES_BLOCK_SIZE]; + u8 *iv; + struct blkcipher_walk walk; + struct p8_aes_xts_ctx *ctx = + crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm)); + struct blkcipher_desc fallback_desc = { + .tfm = ctx->fallback, + .info = desc->info, + .flags = desc->flags + }; + + if (in_interrupt()) { + ret = enc ? crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes) : + crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes); + } else { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); + + blkcipher_walk_init(&walk, dst, src, nbytes); + + ret = blkcipher_walk_virt(desc, &walk); + iv = walk.iv; + memset(tweak, 0, AES_BLOCK_SIZE); + aes_p8_encrypt(iv, tweak, &ctx->tweak_key); + + while ((nbytes = walk.nbytes)) { + if (enc) + aes_p8_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, &ctx->enc_key, NULL, tweak); + else + aes_p8_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, &ctx->dec_key, NULL, tweak); + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, &walk, nbytes); + } + + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + } + return ret; +} + +static int p8_aes_xts_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return p8_aes_xts_crypt(desc, dst, src, nbytes, 1); +} + +static int p8_aes_xts_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return p8_aes_xts_crypt(desc, dst, src, nbytes, 0); +} + +struct crypto_alg p8_aes_xts_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "p8_aes_xts", + .cra_module = THIS_MODULE, + .cra_priority = 2000, + .cra_type = &crypto_blkcipher_type, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, + .cra_alignmask = 0, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct p8_aes_xts_ctx), + .cra_init = p8_aes_xts_init, + .cra_exit = p8_aes_xts_exit, + .cra_blkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = p8_aes_xts_setkey, + .encrypt = p8_aes_xts_encrypt, + .decrypt = p8_aes_xts_decrypt, + } +}; diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h index 4cd34ee54..01972e16a 100644 --- a/drivers/crypto/vmx/aesp8-ppc.h +++ b/drivers/crypto/vmx/aesp8-ppc.h @@ -19,3 +19,7 @@ void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len, void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, size_t len, const struct aes_key *key, const u8 *iv); +void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len, + const struct aes_key *key1, const struct aes_key *key2, u8 *iv); +void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len, + const struct aes_key *key1, const struct aes_key *key2, u8 *iv); diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl index 228053921..0b4a293b8 100644 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ b/drivers/crypto/vmx/aesp8-ppc.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -20,6 +27,19 @@ # instructions are interleaved. It's reckoned that eventual # misalignment penalties at page boundaries are in average lower # than additional overhead in pure AltiVec approach. +# +# May 2016 +# +# Add XTS subroutine, 9x on little- and 12x improvement on big-endian +# systems were measured. +# +###################################################################### +# Current large-block performance in cycles per byte processed with +# 128-bit key (less is better). +# +# CBC en-/decrypt CTR XTS +# POWER8[le] 3.96/0.72 0.74 1.1 +# POWER8[be] 3.75/0.65 0.66 1.0 $flavour = shift; @@ -1875,6 +1895,1845 @@ Lctr32_enc8x_done: ___ }} }}} +######################################################################### +{{{ # XTS procedures # +# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # +# const AES_KEY *key1, const AES_KEY *key2, # +# [const] unsigned char iv[16]); # +# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # +# input tweak value is assumed to be encrypted already, and last tweak # +# value, one suitable for consecutive call on same chunk of data, is # +# written back to original buffer. In addition, in "tweak chaining" # +# mode only complete input blocks are processed. # + +my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); +my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); +my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); +my $taillen = $key2; + + ($inp,$idx) = ($idx,$inp); # reassign + +$code.=<<___; +.globl .${prefix}_xts_encrypt + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff0 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_enc_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + li $ivp,0 # don't chain the tweak + b Lxts_enc + +Lxts_enc_no_key2: + li $idx,-16 + and $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_enc: + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_encrypt6x + + andi. $taillen,$len,15 + subic r0,$len,32 + subi $taillen,$taillen,16 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + b Loop_xts_enc + +.align 5 +Loop_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vcipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_enc_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + subic r0,$len,32 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $output,$output,$rndkey0 # just in case $len<16 + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_enc + + vxor $output,$output,$tweak + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + subi r11,$out,17 + subi $out,$out,16 + mtctr $len + li $len,16 +Loop_xts_enc_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_enc_steal + + mtctr $rounds + b Loop_xts_enc # one more time... + +Lxts_enc_done: + ${UCMP}i $ivp,0 + beq Lxts_enc_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc_ret: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt + +.globl .${prefix}_xts_decrypt + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff8 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + andi. r0,$len,15 + neg r0,r0 + andi. r0,r0,16 + sub $len,$len,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_dec_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + li $ivp,0 # don't chain the tweak + b Lxts_dec + +Lxts_dec_no_key2: + neg $idx,$len + andi. $idx,$idx,15 + add $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_dec: + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_decrypt6x + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + + ${UCMP}i $len,16 + blt Ltail_xts_dec + be?b Loop_xts_dec + +.align 5 +Loop_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_dec_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_dec + +Ltail_xts_dec: + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak1,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak1,$tweak1,$tmp + + subi $inp,$inp,16 + add $inp,$inp,$len + + vxor $inout,$inout,$tweak # :-( + vxor $inout,$inout,$tweak1 # :-) + +Loop_xts_dec_short: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec_short + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak1 + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + + vmr $inout,$inptail + lvx $inptail,0,$inp + #addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + vxor $rndkey0,$rndkey0,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + subi r11,$out,1 + mtctr $len + li $len,16 +Loop_xts_dec_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_dec_steal + + mtctr $rounds + b Loop_xts_dec # one more time... + +Lxts_dec_done: + ${UCMP}i $ivp,0 + beq Lxts_dec_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec_ret: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt +___ +######################################################################### +{{ # Optimized XTS procedures # +my $key_=$key2; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); +my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); +my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($keyperm)=($out0); # aliases with "caller", redundant assignment +my $taillen=$x70; + +$code.=<<___; +.align 5 +_aesp8_xts_encrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r11 + li r7,`$FRAME+8*16+15` + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r3,$sp + addi r3,r3,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r3,$sp + addi r3,r3,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r3,$sp + addi r3,r3,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r3,$sp + addi r3,r3,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r3,$sp + addi r3,r3,32 + stvx v30,r7,$sp + stvx v31,r3,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_enc_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_enc_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_enc6x + +.align 5 +Loop_xts_enc6x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out0,$out0,v26 + vcipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v26 + vcipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out0,$out0,v27 + vcipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vcipher $out4,$out4,v27 + vcipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v28 + vcipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vcipher $out0,$out0,v29 + vcipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vcipher $out4,$out4,v29 + vcipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vcipher $out0,$out0,v30 + vcipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v30 + vcipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vcipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vcipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vcipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vcipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vcipherlast $tmp,$out5,$in5 # last block might be needed + # in stealing mode + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$tmp,$tmp,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + le?stvx_u $out5,$x50,$out + be?stvx_u $tmp, $x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_enc6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_enc6x_zero + cmpwi $len,0x20 + blt Lxts_enc6x_one + nop + beq Lxts_enc6x_two + cmpwi $len,0x40 + blt Lxts_enc6x_three + nop + beq Lxts_enc6x_four + +Lxts_enc6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $tmp,$out4,$twk5 # last block prep for stealing + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $tmp,$out3,$twk4 # last block prep for stealing + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $tmp,$out2,$twk3 # last block prep for stealing + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vxor $tmp,$out1,$twk2 # last block prep for stealing + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_enc1x: + vcipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc1x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + + lvsr $inpperm,0,$taillen + vcipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vcipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vcipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vcipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vcipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vxor $tmp,$out0,$twk1 # last block prep for stealing + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_zero: + cmpwi $taillen,0 + beq Lxts_enc6x_done + + add $inp,$inp,$taillen + subi $inp,$inp,16 + lvx_u $in0,0,$inp + lvsr $inpperm,0,$taillen # $in5 is no more + le?vperm $in0,$in0,$in0,$leperm + vperm $in0,$in0,$in0,$inpperm + vxor $tmp,$tmp,$twk0 +Lxts_enc6x_steal: + vxor $in0,$in0,$twk0 + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? + + subi r30,$out,17 + subi $out,$out,16 + mtctr $taillen +Loop_xts_enc6x_steal: + lbzu r0,1(r30) + stb r0,16(r30) + bdnz Loop_xts_enc6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_enc1x # one more time... + +.align 4 +Lxts_enc6x_done: + ${UCMP}i $ivp,0 + beq Lxts_enc6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc6x_ret: + mtlr r11 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_enc5x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_enc5x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + vcipher $out0,$out0,v26 + lvsr $inpperm,r0,$taillen # $in5 is no more + vcipher $out1,$out1,v26 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vcipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vcipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vcipher $out1,$out1,v27 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vcipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vcipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vcipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vcipher $out1,$out1,v29 + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vcipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vcipher $out0,$out0,v30 + vperm $in0,$in0,$in0,$inpperm + vcipher $out1,$out1,v30 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vcipher $out4,$out4,v30 + + vcipherlast $out0,$out0,$twk0 + vcipherlast $out1,$out1,$in1 + vcipherlast $out2,$out2,$in2 + vcipherlast $out3,$out3,$in3 + vcipherlast $out4,$out4,$in4 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +.align 5 +_aesp8_xts_decrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r11 + li r7,`$FRAME+8*16+15` + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r3,$sp + addi r3,r3,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r3,$sp + addi r3,r3,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r3,$sp + addi r3,r3,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r3,$sp + addi r3,r3,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r3,$sp + addi r3,r3,32 + stvx v30,r7,$sp + stvx v31,r3,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_dec_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_dec_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_dec6x + +.align 5 +Loop_xts_dec6x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out0,$out0,v27 + vncipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vncipher $out0,$out0,v29 + vncipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v30 + vncipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vncipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vncipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vncipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vncipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vncipherlast $out5,$out5,$in5 + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$out5,$out5,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + stvx_u $out5,$x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_dec6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_dec6x_zero + cmpwi $len,0x20 + blt Lxts_dec6x_one + nop + beq Lxts_dec6x_two + cmpwi $len,0x40 + blt Lxts_dec6x_three + nop + beq Lxts_dec6x_four + +Lxts_dec6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + vxor $twk1,$tweak,$rndkey0 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk1 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + vmr $twk1,$twk5 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk5 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + vmr $twk1,$twk4 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk4 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vmr $twk1,$twk3 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk3 + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_dec1x: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec1x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + mtctr $rounds + vncipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vmr $twk1,$twk2 + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + vxor $out0,$in0,$twk2 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_zero: + cmpwi $taillen,0 + beq Lxts_dec6x_done + + lvx_u $in0,0,$inp + le?vperm $in0,$in0,$in0,$leperm + vxor $out0,$in0,$twk1 +Lxts_dec6x_steal: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Lxts_dec6x_steal + + add $inp,$inp,$taillen + vncipher $out0,$out0,v24 + + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v26 + + lvsr $inpperm,0,$taillen # $in5 is no more + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk1,$twk1,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vncipherlast $tmp,$out0,$twk1 + + le?vperm $out0,$tmp,$tmp,$leperm + le?stvx_u $out0,0,$out + be?stvx_u $tmp,0,$out + + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 + vxor $out0,$out0,$twk0 + + subi r30,$out,1 + mtctr $taillen +Loop_xts_dec6x_steal: + lbzu r0,1(r30) + stb r0,16(r30) + bdnz Loop_xts_dec6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_dec1x # one more time... + +.align 4 +Lxts_dec6x_done: + ${UCMP}i $ivp,0 + beq Lxts_dec6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec6x_ret: + mtlr r11 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_dec5x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_dec5x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vncipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vncipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vncipher $out4,$out4,v30 + + vncipherlast $out0,$out0,$twk0 + vncipherlast $out1,$out1,$in1 + vncipherlast $out2,$out2,$in2 + vncipherlast $out3,$out3,$in3 + vncipherlast $out4,$out4,$in4 + mtctr $rounds + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +___ +}} }}} + my $consts=1; foreach(split("\n",$code)) { s/\`([^\`]*)\`/eval($1)/geo; @@ -1898,7 +3757,7 @@ foreach(split("\n",$code)) { if ($flavour =~ /le$/o) { SWITCH: for($conv) { /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; - /\?rev/ && do { @bytes=reverse(@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; } } diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c index e163d5770..31a98dc6f 100644 --- a/drivers/crypto/vmx/vmx.c +++ b/drivers/crypto/vmx/vmx.c @@ -23,6 +23,7 @@ #include <linux/moduleparam.h> #include <linux/types.h> #include <linux/err.h> +#include <linux/cpufeature.h> #include <linux/crypto.h> #include <asm/cputable.h> #include <crypto/internal/hash.h> @@ -31,10 +32,12 @@ extern struct shash_alg p8_ghash_alg; extern struct crypto_alg p8_aes_alg; extern struct crypto_alg p8_aes_cbc_alg; extern struct crypto_alg p8_aes_ctr_alg; +extern struct crypto_alg p8_aes_xts_alg; static struct crypto_alg *algs[] = { &p8_aes_alg, &p8_aes_cbc_alg, &p8_aes_ctr_alg, + &p8_aes_xts_alg, NULL, }; @@ -43,9 +46,6 @@ int __init p8_init(void) int ret = 0; struct crypto_alg **alg_it; - if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) - return -ENODEV; - for (alg_it = algs; *alg_it; alg_it++) { ret = crypto_register_alg(*alg_it); printk(KERN_INFO "crypto_register_alg '%s' = %d\n", @@ -78,7 +78,7 @@ void __exit p8_exit(void) crypto_unregister_shash(&p8_ghash_alg); } -module_init(p8_init); +module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, p8_init); module_exit(p8_exit); MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>"); |