diff options
Diffstat (limited to 'drivers/misc/cxl')
-rw-r--r-- | drivers/misc/cxl/Kconfig | 7 | ||||
-rw-r--r-- | drivers/misc/cxl/Makefile | 2 | ||||
-rw-r--r-- | drivers/misc/cxl/api.c | 58 | ||||
-rw-r--r-- | drivers/misc/cxl/context.c | 25 | ||||
-rw-r--r-- | drivers/misc/cxl/cxl.h | 95 | ||||
-rw-r--r-- | drivers/misc/cxl/debugfs.c | 2 | ||||
-rw-r--r-- | drivers/misc/cxl/file.c | 40 | ||||
-rw-r--r-- | drivers/misc/cxl/irq.c | 52 | ||||
-rw-r--r-- | drivers/misc/cxl/main.c | 1 | ||||
-rw-r--r-- | drivers/misc/cxl/native.c | 121 | ||||
-rw-r--r-- | drivers/misc/cxl/pci.c | 618 | ||||
-rw-r--r-- | drivers/misc/cxl/sysfs.c | 35 | ||||
-rw-r--r-- | drivers/misc/cxl/trace.h | 10 | ||||
-rw-r--r-- | drivers/misc/cxl/vphb.c | 40 |
14 files changed, 914 insertions, 192 deletions
diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index b6db9ebd5..8756d06e2 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -11,11 +11,16 @@ config CXL_KERNEL_API bool default n +config CXL_EEH + bool + default n + config CXL tristate "Support for IBM Coherent Accelerators (CXL)" - depends on PPC_POWERNV && PCI_MSI + depends on PPC_POWERNV && PCI_MSI && EEH select CXL_BASE select CXL_KERNEL_API + select CXL_EEH default m help Select this option to enable driver support for IBM Coherent diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index 14e3f8219..6982f603f 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -1,3 +1,5 @@ +ccflags-y := -Werror -Wno-unused-const-variable + cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o debugfs.o pci.o trace.o cxl-y += vphb.o api.o diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 4224a6acf..103baf0e0 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -12,11 +12,13 @@ #include <linux/anon_inodes.h> #include <linux/file.h> #include <misc/cxl.h> +#include <linux/fs.h> #include "cxl.h" struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) { + struct address_space *mapping; struct cxl_afu *afu; struct cxl_context *ctx; int rc; @@ -25,19 +27,42 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) get_device(&afu->dev); ctx = cxl_context_alloc(); - if (IS_ERR(ctx)) - return ctx; + if (IS_ERR(ctx)) { + rc = PTR_ERR(ctx); + goto err_dev; + } - /* Make it a slave context. We can promote it later? */ - rc = cxl_context_init(ctx, afu, false, NULL); - if (rc) { - kfree(ctx); - put_device(&afu->dev); - return ERR_PTR(-ENOMEM); + ctx->kernelapi = true; + + /* + * Make our own address space since we won't have one from the + * filesystem like the user api has, and even if we do associate a file + * with this context we don't want to use the global anonymous inode's + * address space as that can invalidate unrelated users: + */ + mapping = kmalloc(sizeof(struct address_space), GFP_KERNEL); + if (!mapping) { + rc = -ENOMEM; + goto err_ctx; } + address_space_init_once(mapping); + + /* Make it a slave context. We can promote it later? */ + rc = cxl_context_init(ctx, afu, false, mapping); + if (rc) + goto err_mapping; + cxl_assign_psn_space(ctx); return ctx; + +err_mapping: + kfree(mapping); +err_ctx: + kfree(ctx); +err_dev: + put_device(&afu->dev); + return ERR_PTR(rc); } EXPORT_SYMBOL_GPL(cxl_dev_context_init); @@ -80,6 +105,7 @@ EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); void cxl_free_afu_irqs(struct cxl_context *ctx) { + afu_irq_name_free(ctx); cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); } EXPORT_SYMBOL_GPL(cxl_free_afu_irqs); @@ -255,9 +281,16 @@ struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops, file = anon_inode_getfile("cxl", fops, ctx, flags); if (IS_ERR(file)) - put_unused_fd(fdtmp); + goto err_fd; + + file->f_mapping = ctx->mapping; + *fd = fdtmp; return file; + +err_fd: + put_unused_fd(fdtmp); + return NULL; } EXPORT_SYMBOL_GPL(cxl_get_fd); @@ -327,3 +360,10 @@ int cxl_afu_reset(struct cxl_context *ctx) return cxl_afu_check_and_enable(afu); } EXPORT_SYMBOL_GPL(cxl_afu_reset); + +void cxl_perst_reloads_same_image(struct cxl_afu *afu, + bool perst_reloads_same_image) +{ + afu->adapter->perst_same_image = perst_reloads_same_image; +} +EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image); diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 128714862..2faa1270d 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -126,6 +126,18 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ctx->status != STARTED) { mutex_unlock(&ctx->status_mutex); pr_devel("%s: Context not started, failing problem state access\n", __func__); + if (ctx->mmio_err_ff) { + if (!ctx->ff_page) { + ctx->ff_page = alloc_page(GFP_USER); + if (!ctx->ff_page) + return VM_FAULT_OOM; + memset(page_address(ctx->ff_page), 0xff, PAGE_SIZE); + } + get_page(ctx->ff_page); + vmf->page = ctx->ff_page; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + return 0; + } return VM_FAULT_SIGBUS; } @@ -193,7 +205,11 @@ int __detach_context(struct cxl_context *ctx) if (status != STARTED) return -EBUSY; - WARN_ON(cxl_detach_process(ctx)); + /* Only warn if we detached while the link was OK. + * If detach fails when hw is down, we don't care. + */ + WARN_ON(cxl_detach_process(ctx) && + cxl_adapter_link_ok(ctx->afu->adapter)); flush_work(&ctx->fault_work); /* Only needed for dedicated process */ put_pid(ctx->pid); cxl_ctx_put(); @@ -253,7 +269,14 @@ static void reclaim_ctx(struct rcu_head *rcu) struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu); free_page((u64)ctx->sstp); + if (ctx->ff_page) + __free_page(ctx->ff_page); ctx->sstp = NULL; + if (ctx->kernelapi) + kfree(ctx->mapping); + + if (ctx->irq_bitmap) + kfree(ctx->irq_bitmap); kfree(ctx); } diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 4fd66cabd..0cfb9c129 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -34,7 +34,7 @@ extern uint cxl_verbose; * Bump version each time a user API change is made, whether it is * backwards compatible ot not. */ -#define CXL_API_VERSION 1 +#define CXL_API_VERSION 2 #define CXL_API_VERSION_COMPATIBLE 1 /* @@ -83,8 +83,10 @@ static const cxl_p1_reg_t CXL_PSL_AFUSEL = {0x00B0}; /* 0x00C0:7EFF Implementation dependent area */ static const cxl_p1_reg_t CXL_PSL_FIR1 = {0x0100}; static const cxl_p1_reg_t CXL_PSL_FIR2 = {0x0108}; +static const cxl_p1_reg_t CXL_PSL_Timebase = {0x0110}; static const cxl_p1_reg_t CXL_PSL_VERSION = {0x0118}; static const cxl_p1_reg_t CXL_PSL_RESLCKTO = {0x0128}; +static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140}; static const cxl_p1_reg_t CXL_PSL_FIR_CNTL = {0x0148}; static const cxl_p1_reg_t CXL_PSL_DSNDCTL = {0x0150}; static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158}; @@ -152,6 +154,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; #define CXL_PSL_SPAP_Size_Shift 4 #define CXL_PSL_SPAP_V 0x0000000000000001ULL +/****** CXL_PSL_Control ****************************************************/ +#define CXL_PSL_Control_tb 0x0000000000000001ULL + /****** CXL_PSL_DLCNTL *****************************************************/ #define CXL_PSL_DLCNTL_D (0x1ull << (63-28)) #define CXL_PSL_DLCNTL_C (0x1ull << (63-29)) @@ -418,6 +423,9 @@ struct cxl_context { /* Used to unmap any mmaps when force detaching */ struct address_space *mapping; struct mutex mapping_lock; + struct page *ff_page; + bool mmio_err_ff; + bool kernelapi; spinlock_t sste_lock; /* Protects segment table entries */ struct cxl_sste *sstp; @@ -493,6 +501,7 @@ struct cxl { bool user_image_loaded; bool perst_loads_image; bool perst_select_user; + bool perst_same_image; }; int cxl_alloc_one_irq(struct cxl *adapter); @@ -531,16 +540,33 @@ struct cxl_process_element { __be32 software_state; } __packed; +static inline bool cxl_adapter_link_ok(struct cxl *cxl) +{ + struct pci_dev *pdev; + + pdev = to_pci_dev(cxl->dev.parent); + return !pci_channel_offline(pdev); +} + static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg) { WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); return cxl->p1_mmio + cxl_reg_off(reg); } -#define cxl_p1_write(cxl, reg, val) \ - out_be64(_cxl_p1_addr(cxl, reg), val) -#define cxl_p1_read(cxl, reg) \ - in_be64(_cxl_p1_addr(cxl, reg)) +static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(cxl))) + out_be64(_cxl_p1_addr(cxl, reg), val); +} + +static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(cxl))) + return in_be64(_cxl_p1_addr(cxl, reg)); + else + return ~0ULL; +} static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg) { @@ -548,26 +574,56 @@ static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg return afu->p1n_mmio + cxl_reg_off(reg); } -#define cxl_p1n_write(afu, reg, val) \ - out_be64(_cxl_p1n_addr(afu, reg), val) -#define cxl_p1n_read(afu, reg) \ - in_be64(_cxl_p1n_addr(afu, reg)) +static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + out_be64(_cxl_p1n_addr(afu, reg), val); +} + +static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_be64(_cxl_p1n_addr(afu, reg)); + else + return ~0ULL; +} static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg) { return afu->p2n_mmio + cxl_reg_off(reg); } -#define cxl_p2n_write(afu, reg, val) \ - out_be64(_cxl_p2n_addr(afu, reg), val) -#define cxl_p2n_read(afu, reg) \ - in_be64(_cxl_p2n_addr(afu, reg)) +static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + out_be64(_cxl_p2n_addr(afu, reg), val); +} + +static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_be64(_cxl_p2n_addr(afu, reg)); + else + return ~0ULL; +} +static inline u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + + ((cr) * (afu)->crs_len) + (off)); + else + return ~0ULL; +} -#define cxl_afu_cr_read64(afu, cr, off) \ - in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) -#define cxl_afu_cr_read32(afu, cr, off) \ - in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) +static inline u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + + ((cr) * (afu)->crs_len) + (off)); + else + return 0xffffffff; +} u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off); u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off); @@ -585,6 +641,9 @@ void unregister_cxl_calls(struct cxl_calls *calls); int cxl_alloc_adapter_nr(struct cxl *adapter); void cxl_remove_adapter_nr(struct cxl *adapter); +int cxl_alloc_spa(struct cxl_afu *afu); +void cxl_release_spa(struct cxl_afu *afu); + int cxl_file_init(void); void cxl_file_exit(void); int cxl_register_adapter(struct cxl *adapter); @@ -618,6 +677,7 @@ int cxl_register_serr_irq(struct cxl_afu *afu); void cxl_release_serr_irq(struct cxl_afu *afu); int afu_register_irqs(struct cxl_context *ctx, u32 count); void afu_release_irqs(struct cxl_context *ctx, void *cookie); +void afu_irq_name_free(struct cxl_context *ctx); irqreturn_t cxl_slice_irq_err(int irq, void *data); int cxl_debugfs_init(void); @@ -675,6 +735,7 @@ int cxl_psl_purge(struct cxl_afu *afu); void cxl_stop_trace(struct cxl *cxl); int cxl_pci_vphb_add(struct cxl_afu *afu); +void cxl_pci_vphb_reconfigure(struct cxl_afu *afu); void cxl_pci_vphb_remove(struct cxl_afu *afu); extern struct pci_driver cxl_pci_driver; diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c index 825c41258..18df6f44a 100644 --- a/drivers/misc/cxl/debugfs.c +++ b/drivers/misc/cxl/debugfs.c @@ -48,7 +48,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set, "0x static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode, struct dentry *parent, u64 __iomem *value) { - return debugfs_create_file(name, mode, parent, (void *)value, &fops_io_x64); + return debugfs_create_file(name, mode, parent, (void __force *)value, &fops_io_x64); } int cxl_debugfs_adapter_add(struct cxl *adapter) diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index e3f4b6952..7ccd2998b 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -73,6 +73,11 @@ static int __afu_open(struct inode *inode, struct file *file, bool master) if (!afu->current_mode) goto err_put_afu; + if (!cxl_adapter_link_ok(adapter)) { + rc = -EIO; + goto err_put_afu; + } + if (!(ctx = cxl_context_alloc())) { rc = -ENOMEM; goto err_put_afu; @@ -115,9 +120,16 @@ int afu_release(struct inode *inode, struct file *file) __func__, ctx->pe); cxl_context_detach(ctx); - mutex_lock(&ctx->mapping_lock); - ctx->mapping = NULL; - mutex_unlock(&ctx->mapping_lock); + + /* + * Delete the context's mapping pointer, unless it's created by the + * kernel API, in which case leave it so it can be freed by reclaim_ctx() + */ + if (!ctx->kernelapi) { + mutex_lock(&ctx->mapping_lock); + ctx->mapping = NULL; + mutex_unlock(&ctx->mapping_lock); + } put_device(&ctx->afu->dev); @@ -179,6 +191,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, if (work.flags & CXL_START_WORK_AMR) amr = work.amr & mfspr(SPRN_UAMOR); + ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF); + /* * We grab the PID here and not in the file open to allow for the case * where a process (master, some daemon, etc) has opened the chardev on @@ -238,6 +252,9 @@ long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (ctx->status == CLOSED) return -EIO; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + pr_devel("afu_ioctl\n"); switch (cmd) { case CXL_IOCTL_START_WORK: @@ -251,7 +268,7 @@ long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; } -long afu_compat_ioctl(struct file *file, unsigned int cmd, +static long afu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return afu_ioctl(file, cmd, arg); @@ -265,6 +282,9 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm) if (ctx->status != STARTED) return -EIO; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + return cxl_context_iomap(ctx, vm); } @@ -309,6 +329,9 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, int rc; DEFINE_WAIT(wait); + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + if (count < CXL_READ_MIN_SIZE) return -EINVAL; @@ -319,6 +342,11 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, if (ctx_event_pending(ctx)) break; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + rc = -EIO; + goto out; + } + if (file->f_flags & O_NONBLOCK) { rc = -EAGAIN; goto out; @@ -396,7 +424,7 @@ const struct file_operations afu_fops = { .mmap = afu_mmap, }; -const struct file_operations afu_master_fops = { +static const struct file_operations afu_master_fops = { .owner = THIS_MODULE, .open = afu_master_open, .poll = afu_poll, @@ -519,7 +547,7 @@ int __init cxl_file_init(void) * If these change we really need to update API. Either change some * flags or update API version number CXL_API_VERSION. */ - BUILD_BUG_ON(CXL_API_VERSION != 1); + BUILD_BUG_ON(CXL_API_VERSION != 2); BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64); BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8); BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8); diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 680cd2634..09a406058 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -30,12 +30,12 @@ static irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, u6 serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); - dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat); - dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%.16llx\n", fir1); - dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%.16llx\n", fir2); - dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); - dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%.16llx\n", fir_slice); - dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%.16llx\n", afu_debug); + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); + dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); + dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2); + dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); + dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); cxl_stop_trace(ctx->afu->adapter); @@ -54,10 +54,10 @@ irqreturn_t cxl_slice_irq_err(int irq, void *data) fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); - dev_crit(&afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); - dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%.16llx\n", fir_slice); - dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%.16llx\n", errstat); - dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%.16llx\n", afu_debug); + dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); + dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat); + dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); @@ -72,7 +72,7 @@ static irqreturn_t cxl_irq_err(int irq, void *data) WARN(1, "CXL ERROR interrupt %i\n", irq); err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); - dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%.16llx\n", err_ivte); + dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte); dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); cxl_stop_trace(adapter); @@ -80,7 +80,7 @@ static irqreturn_t cxl_irq_err(int irq, void *data) fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); - dev_crit(&adapter->dev, "PSL_FIR1: 0x%.16llx\nPSL_FIR2: 0x%.16llx\n", fir1, fir2); + dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2); return IRQ_HANDLED; } @@ -147,7 +147,7 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) if (dsisr & CXL_PSL_DSISR_An_PE) return handle_psl_slice_error(ctx, dsisr, irq_info->errstat); if (dsisr & CXL_PSL_DSISR_An_AE) { - pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info->afu_err); + pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err); if (ctx->pending_afu_err) { /* @@ -158,7 +158,7 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) * probably best that we log them somewhere: */ dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error " - "undelivered to pe %i: %.llx\n", + "undelivered to pe %i: 0x%016llx\n", ctx->pe, irq_info->afu_err); } else { spin_lock(&ctx->lock); @@ -211,8 +211,8 @@ static irqreturn_t cxl_irq_multiplexed(int irq, void *data) } rcu_read_unlock(); - WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %.16llx DAR" - " %.16llx\n(Possible AFU HW issue - was a term/remove acked" + WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR" + " %016llx\n(Possible AFU HW issue - was a term/remove acked" " with outstanding transactions?)\n", ph, irq_info.dsisr, irq_info.dar); return fail_psl_irq(afu, &irq_info); @@ -341,6 +341,9 @@ int cxl_register_psl_err_irq(struct cxl *adapter) void cxl_release_psl_err_irq(struct cxl *adapter) { + if (adapter->err_virq != irq_find_mapping(NULL, adapter->err_hwirq)) + return; + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); cxl_unmap_irq(adapter->err_virq, adapter); cxl_release_one_irq(adapter, adapter->err_hwirq); @@ -374,6 +377,9 @@ int cxl_register_serr_irq(struct cxl_afu *afu) void cxl_release_serr_irq(struct cxl_afu *afu) { + if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq)) + return; + cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); cxl_unmap_irq(afu->serr_virq, afu); cxl_release_one_irq(afu->adapter, afu->serr_hwirq); @@ -400,6 +406,9 @@ int cxl_register_psl_irq(struct cxl_afu *afu) void cxl_release_psl_irq(struct cxl_afu *afu) { + if (afu->psl_virq != irq_find_mapping(NULL, afu->psl_hwirq)) + return; + cxl_unmap_irq(afu->psl_virq, afu); cxl_release_one_irq(afu->adapter, afu->psl_hwirq); kfree(afu->psl_irq_name); @@ -421,6 +430,9 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) int rc, r, i, j = 1; struct cxl_irq_name *irq_name; + /* Initialize the list head to hold irq names */ + INIT_LIST_HEAD(&ctx->irq_names); + if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count))) return rc; @@ -432,13 +444,12 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count), sizeof(*ctx->irq_bitmap), GFP_KERNEL); if (!ctx->irq_bitmap) - return -ENOMEM; + goto out; /* * Allocate names first. If any fail, bail out before allocating * actual hardware IRQs. */ - INIT_LIST_HEAD(&ctx->irq_names); for (r = 1; r < CXL_IRQ_RANGES; r++) { for (i = 0; i < ctx->irqs.range[r]; i++) { irq_name = kmalloc(sizeof(struct cxl_irq_name), @@ -460,11 +471,12 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) return 0; out: + cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); afu_irq_name_free(ctx); return -ENOMEM; } -void afu_register_hwirqs(struct cxl_context *ctx) +static void afu_register_hwirqs(struct cxl_context *ctx) { irq_hw_number_t hwirq; struct cxl_irq_name *irq_name; @@ -511,4 +523,6 @@ void afu_release_irqs(struct cxl_context *ctx, void *cookie) afu_irq_name_free(ctx); cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); + + ctx->irq_count = 0; } diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 4a164ab8b..9fde75ed4 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -222,6 +222,7 @@ static void exit_cxl(void) cxl_debugfs_exit(); cxl_file_exit(); unregister_cxl_calls(&cxl_calls); + idr_destroy(&cxl_adapter_idr); } module_init(init_cxl); diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 10567f245..d2e75c88f 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -41,7 +41,14 @@ static int afu_control(struct cxl_afu *afu, u64 command, rc = -EBUSY; goto out; } - pr_devel_ratelimited("AFU control... (0x%.16llx)\n", + + if (!cxl_adapter_link_ok(afu->adapter)) { + afu->enabled = enabled; + rc = -EIO; + goto out; + } + + pr_devel_ratelimited("AFU control... (0x%016llx)\n", AFU_Cntl | command); cpu_relax(); AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); @@ -85,6 +92,10 @@ int __cxl_afu_reset(struct cxl_afu *afu) int cxl_afu_check_and_enable(struct cxl_afu *afu) { + if (!cxl_adapter_link_ok(afu->adapter)) { + WARN(1, "Refusing to enable afu while link down!\n"); + return -EIO; + } if (afu->enabled) return 0; return afu_enable(afu); @@ -103,6 +114,12 @@ int cxl_psl_purge(struct cxl_afu *afu) pr_devel("PSL purge request\n"); + if (!cxl_adapter_link_ok(afu->adapter)) { + dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n"); + rc = -EIO; + goto out; + } + if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { WARN(1, "psl_purge request while AFU not disabled!\n"); cxl_afu_disable(afu); @@ -119,14 +136,19 @@ int cxl_psl_purge(struct cxl_afu *afu) rc = -EBUSY; goto out; } + if (!cxl_adapter_link_ok(afu->adapter)) { + rc = -EIO; + goto out; + } + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); - pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%.16llx PSL_DSISR: 0x%.16llx\n", PSL_CNTL, dsisr); + pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%016llx PSL_DSISR: 0x%016llx\n", PSL_CNTL, dsisr); if (dsisr & CXL_PSL_DSISR_TRANS) { dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); - dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%.16llx, DAR: 0x%.16llx\n", dsisr, dar); + dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%016llx, DAR: 0x%016llx\n", dsisr, dar); cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); } else if (dsisr) { - dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%.16llx\n", dsisr); + dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%016llx\n", dsisr); cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); } else { cpu_relax(); @@ -161,10 +183,8 @@ static int spa_max_procs(int spa_size) return ((spa_size / 8) - 96) / 17; } -static int alloc_spa(struct cxl_afu *afu) +int cxl_alloc_spa(struct cxl_afu *afu) { - u64 spap; - /* Work out how many pages to allocate */ afu->spa_order = 0; do { @@ -183,6 +203,13 @@ static int alloc_spa(struct cxl_afu *afu) pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n", 1<<afu->spa_order, afu->spa_max_procs, afu->num_procs); + return 0; +} + +static void attach_spa(struct cxl_afu *afu) +{ + u64 spap; + afu->sw_command_status = (__be64 *)((char *)afu->spa + ((afu->spa_max_procs + 3) * 128)); @@ -191,14 +218,19 @@ static int alloc_spa(struct cxl_afu *afu) spap |= CXL_PSL_SPAP_V; pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap); cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap); - - return 0; } -static void release_spa(struct cxl_afu *afu) +static inline void detach_spa(struct cxl_afu *afu) { cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); - free_pages((unsigned long) afu->spa, afu->spa_order); +} + +void cxl_release_spa(struct cxl_afu *afu) +{ + if (afu->spa) { + free_pages((unsigned long) afu->spa, afu->spa_order); + afu->spa = NULL; + } } int cxl_tlb_slb_invalidate(struct cxl *adapter) @@ -215,6 +247,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); return -EBUSY; } + if (!cxl_adapter_link_ok(adapter)) + return -EIO; cpu_relax(); } @@ -224,6 +258,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); return -EBUSY; } + if (!cxl_adapter_link_ok(adapter)) + return -EIO; cpu_relax(); } return 0; @@ -240,6 +276,11 @@ int cxl_afu_slbia(struct cxl_afu *afu) dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); return -EBUSY; } + /* If the adapter has gone down, we can assume that we + * will PERST it and that will invalidate everything. + */ + if (!cxl_adapter_link_ok(afu->adapter)) + return -EIO; cpu_relax(); } return 0; @@ -279,6 +320,8 @@ static void slb_invalid(struct cxl_context *ctx) cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); while (1) { + if (!cxl_adapter_link_ok(adapter)) + break; slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); if (!(slbia & CXL_TLB_SLB_P)) break; @@ -308,6 +351,11 @@ static int do_process_element_cmd(struct cxl_context *ctx, rc = -EBUSY; goto out; } + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n"); + rc = -EIO; + goto out; + } state = be64_to_cpup(ctx->afu->sw_command_status); if (state == ~0ULL) { pr_err("cxl: Error adding process element to AFU\n"); @@ -355,8 +403,13 @@ static int terminate_process_element(struct cxl_context *ctx) mutex_lock(&ctx->afu->spa_mutex); pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe); - rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, - CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); + /* We could be asked to terminate when the hw is down. That + * should always succeed: it's not running if the hw has gone + * away and is being reset. + */ + if (cxl_adapter_link_ok(ctx->afu->adapter)) + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, + CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); ctx->elem->software_state = 0; /* Remove Valid bit */ pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe); mutex_unlock(&ctx->afu->spa_mutex); @@ -369,7 +422,14 @@ static int remove_process_element(struct cxl_context *ctx) mutex_lock(&ctx->afu->spa_mutex); pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe); - if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0))) + + /* We could be asked to remove when the hw is down. Again, if + * the hw is down, the PE is gone, so we succeed. + */ + if (cxl_adapter_link_ok(ctx->afu->adapter)) + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0); + + if (!rc) ctx->pe_inserted = false; slb_invalid(ctx); pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe); @@ -397,15 +457,18 @@ static int activate_afu_directed(struct cxl_afu *afu) dev_info(&afu->dev, "Activating AFU directed mode\n"); - if (alloc_spa(afu)) - return -ENOMEM; + afu->num_procs = afu->max_procs_virtualised; + if (afu->spa == NULL) { + if (cxl_alloc_spa(afu)) + return -ENOMEM; + } + attach_spa(afu); cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU); cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL); cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L); afu->current_mode = CXL_MODE_DIRECTED; - afu->num_procs = afu->max_procs_virtualised; if ((rc = cxl_chardev_m_afu_add(afu))) return rc; @@ -492,9 +555,7 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) if ((result = cxl_afu_check_and_enable(ctx->afu))) return result; - add_process_element(ctx); - - return 0; + return add_process_element(ctx); } static int deactivate_afu_directed(struct cxl_afu *afu) @@ -511,8 +572,6 @@ static int deactivate_afu_directed(struct cxl_afu *afu) cxl_afu_disable(afu); cxl_psl_purge(afu); - release_spa(afu); - return 0; } @@ -614,6 +673,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) if (!(mode & afu->modes_supported)) return -EINVAL; + if (!cxl_adapter_link_ok(afu->adapter)) { + WARN(1, "Device link is down, refusing to activate!\n"); + return -EIO; + } + if (mode == CXL_MODE_DIRECTED) return activate_afu_directed(afu); if (mode == CXL_MODE_DEDICATED) @@ -624,6 +688,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) { + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + WARN(1, "Device link is down, refusing to attach process!\n"); + return -EIO; + } + ctx->kernel = kernel; if (ctx->afu->current_mode == CXL_MODE_DIRECTED) return attach_afu_directed(ctx, wed, amr); @@ -668,6 +737,12 @@ int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) { u64 pidtid; + /* If the adapter has gone away, we can't get any meaningful + * information. + */ + if (!cxl_adapter_link_ok(afu->adapter)) + return -EIO; + info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An); @@ -684,7 +759,7 @@ static void recover_psl_err(struct cxl_afu *afu, u64 errstat) { u64 dsisr; - pr_devel("RECOVERING FROM PSL ERROR... (0x%.16llx)\n", errstat); + pr_devel("RECOVERING FROM PSL ERROR... (0x%016llx)\n", errstat); /* Clear PSL_DSISR[PE] */ dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index dc836071c..85761d7eb 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -24,6 +24,7 @@ #include <asm/io.h> #include "cxl.h" +#include <misc/cxl.h> #define CXL_PCI_VSEC_ID 0x1280 @@ -133,7 +134,7 @@ u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off) return (val >> ((off & 0x3) * 8)) & 0xff; } -static DEFINE_PCI_DEVICE_TABLE(cxl_pci_tbl) = { +static const struct pci_device_id cxl_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), }, { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), }, { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), }, @@ -369,6 +370,55 @@ static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev return 0; } +#define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6)) +#define _2048_250MHZ_CYCLES 1 + +static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) +{ + u64 psl_tb; + int delta; + unsigned int retry = 0; + struct device_node *np; + + if (!(np = pnv_pci_get_phb_node(dev))) + return -ENODEV; + + /* Do not fail when CAPP timebase sync is not supported by OPAL */ + of_node_get(np); + if (! of_get_property(np, "ibm,capp-timebase-sync", NULL)) { + of_node_put(np); + pr_err("PSL: Timebase sync: OPAL support missing\n"); + return 0; + } + of_node_put(np); + + /* + * Setup PSL Timebase Control and Status register + * with the recommended Timebase Sync Count value + */ + cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, + TBSYNC_CNT(2 * _2048_250MHZ_CYCLES)); + + /* Enable PSL Timebase */ + cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000); + cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb); + + /* Wait until CORE TB and PSL TB difference <= 16usecs */ + do { + msleep(1); + if (retry++ > 5) { + pr_err("PSL: Timebase sync: giving up!\n"); + return -EIO; + } + psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase); + delta = mftb() - psl_tb; + if (delta < 0) + delta = -delta; + } while (cputime_to_usecs(delta) > 16); + + return 0; +} + static int init_implementation_afu_regs(struct cxl_afu *afu) { /* read/write masks for this slice */ @@ -539,10 +589,18 @@ err: static void cxl_unmap_slice_regs(struct cxl_afu *afu) { - if (afu->p2n_mmio) + if (afu->p2n_mmio) { iounmap(afu->p2n_mmio); - if (afu->p1n_mmio) + afu->p2n_mmio = NULL; + } + if (afu->p1n_mmio) { iounmap(afu->p1n_mmio); + afu->p1n_mmio = NULL; + } + if (afu->afu_desc_mmio) { + iounmap(afu->afu_desc_mmio); + afu->afu_desc_mmio = NULL; + } } static void cxl_release_afu(struct device *dev) @@ -551,6 +609,9 @@ static void cxl_release_afu(struct device *dev) pr_devel("cxl_release_afu\n"); + idr_destroy(&afu->contexts_idr); + cxl_release_spa(afu); + kfree(afu); } @@ -656,7 +717,7 @@ static int sanitise_afu_regs(struct cxl_afu *afu) */ reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { - dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#.16llx\n", reg); + dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg); if (__cxl_afu_reset(afu)) return -EIO; if (cxl_afu_disable(afu)) @@ -677,7 +738,7 @@ static int sanitise_afu_regs(struct cxl_afu *afu) cxl_p2n_write(afu, CXL_SSTP0_An, 0x0000000000000000); reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An); if (reg) { - dev_warn(&afu->dev, "AFU had pending DSISR: %#.16llx\n", reg); + dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg); if (reg & CXL_PSL_DSISR_TRANS) cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); else @@ -686,12 +747,12 @@ static int sanitise_afu_regs(struct cxl_afu *afu) reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); if (reg) { if (reg & ~0xffff) - dev_warn(&afu->dev, "AFU had pending SERR: %#.16llx\n", reg); + dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg); cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); } reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); if (reg) { - dev_warn(&afu->dev, "AFU had pending error status: %#.16llx\n", reg); + dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg); cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg); } @@ -742,45 +803,70 @@ ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, return count; } -static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) +static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) { - struct cxl_afu *afu; - bool free = true; int rc; - if (!(afu = cxl_alloc_afu(adapter, slice))) - return -ENOMEM; - - if ((rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice))) - goto err1; - if ((rc = cxl_map_slice_regs(afu, adapter, dev))) - goto err1; + return rc; if ((rc = sanitise_afu_regs(afu))) - goto err2; + goto err1; /* We need to reset the AFU before we can read the AFU descriptor */ if ((rc = __cxl_afu_reset(afu))) - goto err2; + goto err1; if (cxl_verbose) dump_afu_descriptor(afu); if ((rc = cxl_read_afu_descriptor(afu))) - goto err2; + goto err1; if ((rc = cxl_afu_descriptor_looks_ok(afu))) - goto err2; + goto err1; if ((rc = init_implementation_afu_regs(afu))) - goto err2; + goto err1; if ((rc = cxl_register_serr_irq(afu))) - goto err2; + goto err1; if ((rc = cxl_register_psl_irq(afu))) - goto err3; + goto err2; + + return 0; + +err2: + cxl_release_serr_irq(afu); +err1: + cxl_unmap_slice_regs(afu); + return rc; +} + +static void cxl_deconfigure_afu(struct cxl_afu *afu) +{ + cxl_release_psl_irq(afu); + cxl_release_serr_irq(afu); + cxl_unmap_slice_regs(afu); +} + +static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) +{ + struct cxl_afu *afu; + int rc; + + afu = cxl_alloc_afu(adapter, slice); + if (!afu) + return -ENOMEM; + + rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice); + if (rc) + goto err_free; + + rc = cxl_configure_afu(afu, adapter, dev); + if (rc) + goto err_free; /* Don't care if this fails */ cxl_debugfs_afu_add(afu); @@ -795,10 +881,6 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) if ((rc = cxl_sysfs_afu_add(afu))) goto err_put1; - - if ((rc = cxl_afu_select_best_mode(afu))) - goto err_put2; - adapter->afu[afu->slice] = afu; if ((rc = cxl_pci_vphb_add(afu))) @@ -806,21 +888,16 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) return 0; -err_put2: - cxl_sysfs_afu_remove(afu); err_put1: - device_unregister(&afu->dev); - free = false; + cxl_deconfigure_afu(afu); cxl_debugfs_afu_remove(afu); - cxl_release_psl_irq(afu); -err3: - cxl_release_serr_irq(afu); -err2: - cxl_unmap_slice_regs(afu); -err1: - if (free) - kfree(afu); + device_unregister(&afu->dev); return rc; + +err_free: + kfree(afu); + return rc; + } static void cxl_remove_afu(struct cxl_afu *afu) @@ -840,10 +917,7 @@ static void cxl_remove_afu(struct cxl_afu *afu) cxl_context_detach_all(afu); cxl_afu_deactivate_mode(afu); - cxl_release_psl_irq(afu); - cxl_release_serr_irq(afu); - cxl_unmap_slice_regs(afu); - + cxl_deconfigure_afu(afu); device_unregister(&afu->dev); } @@ -852,6 +926,12 @@ int cxl_reset(struct cxl *adapter) struct pci_dev *dev = to_pci_dev(adapter->dev.parent); int rc; + if (adapter->perst_same_image) { + dev_warn(&dev->dev, + "cxl: refusing to reset/reflash when perst_reloads_same_image is set.\n"); + return -EINVAL; + } + dev_info(&dev->dev, "CXL reset\n"); /* pcie_warm_reset requests a fundamental pci reset which includes a @@ -872,7 +952,7 @@ static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev) if (pci_request_region(dev, 0, "priv 1 regs")) goto err2; - pr_devel("cxl_map_adapter_regs: p1: %#.16llx %#llx, p2: %#.16llx %#llx", + pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx", p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev)); if (!(adapter->p1_mmio = ioremap(p1_base(dev), p1_size(dev)))) @@ -896,10 +976,16 @@ err1: static void cxl_unmap_adapter_regs(struct cxl *adapter) { - if (adapter->p1_mmio) + if (adapter->p1_mmio) { iounmap(adapter->p1_mmio); - if (adapter->p2_mmio) + adapter->p1_mmio = NULL; + pci_release_region(to_pci_dev(adapter->dev.parent), 2); + } + if (adapter->p2_mmio) { iounmap(adapter->p2_mmio); + adapter->p2_mmio = NULL; + pci_release_region(to_pci_dev(adapter->dev.parent), 0); + } } static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) @@ -928,7 +1014,6 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image); CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state); adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); - adapter->perst_loads_image = true; adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices); @@ -950,6 +1035,32 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) return 0; } +/* + * Workaround a PCIe Host Bridge defect on some cards, that can cause + * malformed Transaction Layer Packet (TLP) errors to be erroneously + * reported. Mask this error in the Uncorrectable Error Mask Register. + * + * The upper nibble of the PSL revision is used to distinguish between + * different cards. The affected ones have it set to 0. + */ +static void cxl_fixup_malformed_tlp(struct cxl *adapter, struct pci_dev *dev) +{ + int aer; + u32 data; + + if (adapter->psl_rev & 0xf000) + return; + if (!(aer = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))) + return; + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &data); + if (data & PCI_ERR_UNC_MALF_TLP) + if (data & PCI_ERR_UNC_INTN) + return; + data |= PCI_ERR_UNC_MALF_TLP; + data |= PCI_ERR_UNC_INTN; + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, data); +} + static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev) { if (adapter->vsec_status & CXL_STATUS_SECOND_PORT) @@ -988,81 +1099,140 @@ static void cxl_release_adapter(struct device *dev) pr_devel("cxl_release_adapter\n"); + cxl_remove_adapter_nr(adapter); + kfree(adapter); } -static struct cxl *cxl_alloc_adapter(struct pci_dev *dev) +static struct cxl *cxl_alloc_adapter(void) { struct cxl *adapter; if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) return NULL; - adapter->dev.parent = &dev->dev; - adapter->dev.release = cxl_release_adapter; - pci_set_drvdata(dev, adapter); spin_lock_init(&adapter->afu_list_lock); + if (cxl_alloc_adapter_nr(adapter)) + goto err1; + + if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) + goto err2; + return adapter; + +err2: + cxl_remove_adapter_nr(adapter); +err1: + kfree(adapter); + return NULL; } +#define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31)) + static int sanitise_adapter_regs(struct cxl *adapter) { - cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); + /* Clear PSL tberror bit by writing 1 to it */ + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror); return cxl_tlb_slb_invalidate(adapter); } -static struct cxl *cxl_init_adapter(struct pci_dev *dev) +/* This should contain *only* operations that can safely be done in + * both creation and recovery. + */ +static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) { - struct cxl *adapter; - bool free = true; int rc; + adapter->dev.parent = &dev->dev; + adapter->dev.release = cxl_release_adapter; + pci_set_drvdata(dev, adapter); - if (!(adapter = cxl_alloc_adapter(dev))) - return ERR_PTR(-ENOMEM); + rc = pci_enable_device(dev); + if (rc) { + dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); + return rc; + } if ((rc = cxl_read_vsec(adapter, dev))) - goto err1; + return rc; if ((rc = cxl_vsec_looks_ok(adapter, dev))) - goto err1; + return rc; + + cxl_fixup_malformed_tlp(adapter, dev); if ((rc = setup_cxl_bars(dev))) - goto err1; + return rc; if ((rc = switch_card_to_cxl(dev))) - goto err1; - - if ((rc = cxl_alloc_adapter_nr(adapter))) - goto err1; - - if ((rc = dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))) - goto err2; + return rc; if ((rc = cxl_update_image_control(adapter))) - goto err2; + return rc; if ((rc = cxl_map_adapter_regs(adapter, dev))) - goto err2; + return rc; if ((rc = sanitise_adapter_regs(adapter))) - goto err2; + goto err; if ((rc = init_implementation_adapter_regs(adapter, dev))) - goto err3; + goto err; if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI))) - goto err3; + goto err; /* If recovery happened, the last step is to turn on snooping. * In the non-recovery case this has no effect */ - if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) { - goto err3; - } + if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) + goto err; + + if ((rc = cxl_setup_psl_timebase(adapter, dev))) + goto err; if ((rc = cxl_register_psl_err_irq(adapter))) - goto err3; + goto err; + + return 0; + +err: + cxl_unmap_adapter_regs(adapter); + return rc; + +} + +static void cxl_deconfigure_adapter(struct cxl *adapter) +{ + struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); + + cxl_release_psl_err_irq(adapter); + cxl_unmap_adapter_regs(adapter); + + pci_disable_device(pdev); +} + +static struct cxl *cxl_init_adapter(struct pci_dev *dev) +{ + struct cxl *adapter; + int rc; + + adapter = cxl_alloc_adapter(); + if (!adapter) + return ERR_PTR(-ENOMEM); + + /* Set defaults for parameters which need to persist over + * configure/reconfigure + */ + adapter->perst_loads_image = true; + adapter->perst_same_image = false; + + rc = cxl_configure_adapter(adapter, dev); + if (rc) { + pci_disable_device(dev); + cxl_release_adapter(&adapter->dev); + return ERR_PTR(rc); + } /* Don't care if this one fails: */ cxl_debugfs_adapter_add(adapter); @@ -1080,37 +1250,25 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev) return adapter; err_put1: - device_unregister(&adapter->dev); - free = false; + /* This should mirror cxl_remove_adapter, except without the + * sysfs parts + */ cxl_debugfs_adapter_remove(adapter); - cxl_release_psl_err_irq(adapter); -err3: - cxl_unmap_adapter_regs(adapter); -err2: - cxl_remove_adapter_nr(adapter); -err1: - if (free) - kfree(adapter); + cxl_deconfigure_adapter(adapter); + device_unregister(&adapter->dev); return ERR_PTR(rc); } static void cxl_remove_adapter(struct cxl *adapter) { - struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); - - pr_devel("cxl_release_adapter\n"); + pr_devel("cxl_remove_adapter\n"); cxl_sysfs_adapter_remove(adapter); cxl_debugfs_adapter_remove(adapter); - cxl_release_psl_err_irq(adapter); - cxl_unmap_adapter_regs(adapter); - cxl_remove_adapter_nr(adapter); - device_unregister(&adapter->dev); + cxl_deconfigure_adapter(adapter); - pci_release_region(pdev, 0); - pci_release_region(pdev, 2); - pci_disable_device(pdev); + device_unregister(&adapter->dev); } static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) @@ -1122,21 +1280,21 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) if (cxl_verbose) dump_cxl_config_space(dev); - if ((rc = pci_enable_device(dev))) { - dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); - return rc; - } - adapter = cxl_init_adapter(dev); if (IS_ERR(adapter)) { dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter)); - pci_disable_device(dev); return PTR_ERR(adapter); } for (slice = 0; slice < adapter->slices; slice++) { - if ((rc = cxl_init_afu(adapter, slice, dev))) + if ((rc = cxl_init_afu(adapter, slice, dev))) { dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc); + continue; + } + + rc = cxl_afu_select_best_mode(adapter->afu[slice]); + if (rc) + dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc); } return 0; @@ -1160,10 +1318,262 @@ static void cxl_remove(struct pci_dev *dev) cxl_remove_adapter(adapter); } +static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, + pci_channel_state_t state) +{ + struct pci_dev *afu_dev; + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; + pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET; + + /* There should only be one entry, but go through the list + * anyway + */ + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + if (!afu_dev->driver) + continue; + + afu_dev->error_state = state; + + if (afu_dev->driver->err_handler) + afu_result = afu_dev->driver->err_handler->error_detected(afu_dev, + state); + /* Disconnect trumps all, NONE trumps NEED_RESET */ + if (afu_result == PCI_ERS_RESULT_DISCONNECT) + result = PCI_ERS_RESULT_DISCONNECT; + else if ((afu_result == PCI_ERS_RESULT_NONE) && + (result == PCI_ERS_RESULT_NEED_RESET)) + result = PCI_ERS_RESULT_NONE; + } + return result; +} + +static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; + int i; + + /* At this point, we could still have an interrupt pending. + * Let's try to get them out of the way before they do + * anything we don't like. + */ + schedule(); + + /* If we're permanently dead, give up. */ + if (state == pci_channel_io_perm_failure) { + /* Tell the AFU drivers; but we don't care what they + * say, we're going away. + */ + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + cxl_vphb_error_detected(afu, state); + } + return PCI_ERS_RESULT_DISCONNECT; + } + + /* Are we reflashing? + * + * If we reflash, we could come back as something entirely + * different, including a non-CAPI card. As such, by default + * we don't participate in the process. We'll be unbound and + * the slot re-probed. (TODO: check EEH doesn't blindly rebind + * us!) + * + * However, this isn't the entire story: for reliablity + * reasons, we usually want to reflash the FPGA on PERST in + * order to get back to a more reliable known-good state. + * + * This causes us a bit of a problem: if we reflash we can't + * trust that we'll come back the same - we could have a new + * image and been PERSTed in order to load that + * image. However, most of the time we actually *will* come + * back the same - for example a regular EEH event. + * + * Therefore, we allow the user to assert that the image is + * indeed the same and that we should continue on into EEH + * anyway. + */ + if (adapter->perst_loads_image && !adapter->perst_same_image) { + /* TODO take the PHB out of CXL mode */ + dev_info(&pdev->dev, "reflashing, so opting out of EEH!\n"); + return PCI_ERS_RESULT_NONE; + } + + /* + * At this point, we want to try to recover. We'll always + * need a complete slot reset: we don't trust any other reset. + * + * Now, we go through each AFU: + * - We send the driver, if bound, an error_detected callback. + * We expect it to clean up, but it can also tell us to give + * up and permanently detach the card. To simplify things, if + * any bound AFU driver doesn't support EEH, we give up on EEH. + * + * - We detach all contexts associated with the AFU. This + * does not free them, but puts them into a CLOSED state + * which causes any the associated files to return useful + * errors to userland. It also unmaps, but does not free, + * any IRQs. + * + * - We clean up our side: releasing and unmapping resources we hold + * so we can wire them up again when the hardware comes back up. + * + * Driver authors should note: + * + * - Any contexts you create in your kernel driver (except + * those associated with anonymous file descriptors) are + * your responsibility to free and recreate. Likewise with + * any attached resources. + * + * - We will take responsibility for re-initialising the + * device context (the one set up for you in + * cxl_pci_enable_device_hook and accessed through + * cxl_get_context). If you've attached IRQs or other + * resources to it, they remains yours to free. + * + * You can call the same functions to release resources as you + * normally would: we make sure that these functions continue + * to work when the hardware is down. + * + * Two examples: + * + * 1) If you normally free all your resources at the end of + * each request, or if you use anonymous FDs, your + * error_detected callback can simply set a flag to tell + * your driver not to start any new calls. You can then + * clear the flag in the resume callback. + * + * 2) If you normally allocate your resources on startup: + * * Set a flag in error_detected as above. + * * Let CXL detach your contexts. + * * In slot_reset, free the old resources and allocate new ones. + * * In resume, clear the flag to allow things to start. + */ + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + result = cxl_vphb_error_detected(afu, state); + + /* Only continue if everyone agrees on NEED_RESET */ + if (result != PCI_ERS_RESULT_NEED_RESET) + return result; + + cxl_context_detach_all(afu); + cxl_afu_deactivate_mode(afu); + cxl_deconfigure_afu(afu); + } + cxl_deconfigure_adapter(adapter); + + return result; +} + +static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + struct cxl_context *ctx; + struct pci_dev *afu_dev; + pci_ers_result_t afu_result = PCI_ERS_RESULT_RECOVERED; + pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED; + int i; + + if (cxl_configure_adapter(adapter, pdev)) + goto err; + + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + if (cxl_configure_afu(afu, adapter, pdev)) + goto err; + + if (cxl_afu_select_best_mode(afu)) + goto err; + + cxl_pci_vphb_reconfigure(afu); + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + /* Reset the device context. + * TODO: make this less disruptive + */ + ctx = cxl_get_context(afu_dev); + + if (ctx && cxl_release_context(ctx)) + goto err; + + ctx = cxl_dev_context_init(afu_dev); + if (!ctx) + goto err; + + afu_dev->dev.archdata.cxl_ctx = ctx; + + if (cxl_afu_check_and_enable(afu)) + goto err; + + afu_dev->error_state = pci_channel_io_normal; + + /* If there's a driver attached, allow it to + * chime in on recovery. Drivers should check + * if everything has come back OK, but + * shouldn't start new work until we call + * their resume function. + */ + if (!afu_dev->driver) + continue; + + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->slot_reset) + afu_result = afu_dev->driver->err_handler->slot_reset(afu_dev); + + if (afu_result == PCI_ERS_RESULT_DISCONNECT) + result = PCI_ERS_RESULT_DISCONNECT; + } + } + return result; + +err: + /* All the bits that happen in both error_detected and cxl_remove + * should be idempotent, so we don't need to worry about leaving a mix + * of unconfigured and reconfigured resources. + */ + dev_err(&pdev->dev, "EEH recovery failed. Asking to be disconnected.\n"); + return PCI_ERS_RESULT_DISCONNECT; +} + +static void cxl_pci_resume(struct pci_dev *pdev) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + struct pci_dev *afu_dev; + int i; + + /* Everything is back now. Drivers should restart work now. + * This is not the place to be checking if everything came back up + * properly, because there's no return value: do that in slot_reset. + */ + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + if (afu_dev->driver && afu_dev->driver->err_handler && + afu_dev->driver->err_handler->resume) + afu_dev->driver->err_handler->resume(afu_dev); + } + } +} + +static const struct pci_error_handlers cxl_err_handler = { + .error_detected = cxl_pci_error_detected, + .slot_reset = cxl_pci_slot_reset, + .resume = cxl_pci_resume, +}; + struct pci_driver cxl_pci_driver = { .name = "cxl-pci", .id_table = cxl_pci_tbl, .probe = cxl_probe, .remove = cxl_remove, .shutdown = cxl_remove, + .err_handler = &cxl_err_handler, }; diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index 31f38bc71..02006f710 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -112,12 +112,38 @@ static ssize_t load_image_on_perst_store(struct device *device, return count; } +static ssize_t perst_reloads_same_image_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->perst_same_image); +} + +static ssize_t perst_reloads_same_image_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl *adapter = to_cxl_adapter(device); + int rc; + int val; + + rc = sscanf(buf, "%i", &val); + if ((rc != 1) || !(val == 1 || val == 0)) + return -EINVAL; + + adapter->perst_same_image = (val == 1 ? true : false); + return count; +} + static struct device_attribute adapter_attrs[] = { __ATTR_RO(caia_version), __ATTR_RO(psl_revision), __ATTR_RO(base_image), __ATTR_RO(image_loaded), __ATTR_RW(load_image_on_perst), + __ATTR_RW(perst_reloads_same_image), __ATTR(reset, S_IWUSR, NULL, reset_adapter_store), }; @@ -443,12 +469,7 @@ static ssize_t afu_read_config(struct file *filp, struct kobject *kobj, struct afu_config_record *cr = to_cr(kobj); struct cxl_afu *afu = to_cxl_afu(container_of(kobj->parent, struct device, kobj)); - u64 i, j, val, size = afu->crs_len; - - if (off > size) - return 0; - if (off + count > size) - count = size - off; + u64 i, j, val; for (i = 0; i < count;) { val = cxl_afu_cr_read64(afu, cr->cr, off & ~0x7); @@ -571,6 +592,8 @@ int cxl_sysfs_afu_add(struct cxl_afu *afu) /* conditionally create the add the binary file for error info buffer */ if (afu->eb_len) { + sysfs_attr_init(&afu->attr_eb.attr); + afu->attr_eb.attr.name = "afu_err_buff"; afu->attr_eb.attr.mode = S_IRUGO; afu->attr_eb.size = afu->eb_len; diff --git a/drivers/misc/cxl/trace.h b/drivers/misc/cxl/trace.h index ae434d878..6e1e2adfb 100644 --- a/drivers/misc/cxl/trace.h +++ b/drivers/misc/cxl/trace.h @@ -105,7 +105,7 @@ TRACE_EVENT(cxl_attach, __entry->num_interrupts = num_interrupts; ), - TP_printk("afu%i.%i pid=%i pe=%i wed=0x%.16llx irqs=%i amr=0x%llx", + TP_printk("afu%i.%i pid=%i pe=%i wed=0x%016llx irqs=%i amr=0x%llx", __entry->card, __entry->afu, __entry->pid, @@ -177,7 +177,7 @@ TRACE_EVENT(cxl_psl_irq, __entry->dar = dar; ), - TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%.16llx", + TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%016llx", __entry->card, __entry->afu, __entry->pe, @@ -233,7 +233,7 @@ TRACE_EVENT(cxl_ste_miss, __entry->dar = dar; ), - TP_printk("afu%i.%i pe=%i dar=0x%.16llx", + TP_printk("afu%i.%i pe=%i dar=0x%016llx", __entry->card, __entry->afu, __entry->pe, @@ -264,7 +264,7 @@ TRACE_EVENT(cxl_ste_write, __entry->v = v; ), - TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%.16llx V=0x%.16llx", + TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%016llx V=0x%016llx", __entry->card, __entry->afu, __entry->pe, @@ -295,7 +295,7 @@ TRACE_EVENT(cxl_pte_miss, __entry->dar = dar; ), - TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%.16llx", + TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%016llx", __entry->card, __entry->afu, __entry->pe, diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 2eba002b5..94b520896 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -48,6 +48,12 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) phb = pci_bus_to_host(dev->bus); afu = (struct cxl_afu *)phb->private_data; + + if (!cxl_adapter_link_ok(afu->adapter)) { + dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__); + return false; + } + set_dma_ops(&dev->dev, &dma_direct_ops); set_dma_offset(&dev->dev, PAGE_OFFSET); @@ -138,6 +144,26 @@ static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, return 0; } + +static inline bool cxl_config_link_ok(struct pci_bus *bus) +{ + struct pci_controller *phb; + struct cxl_afu *afu; + + /* Config space IO is based on phb->cfg_addr, which is based on + * afu_desc_mmio. This isn't safe to read/write when the link + * goes down, as EEH tears down MMIO space. + * + * Check if the link is OK before proceeding. + */ + + phb = pci_bus_to_host(bus); + if (phb == NULL) + return false; + afu = (struct cxl_afu *)phb->private_data; + return cxl_adapter_link_ok(afu->adapter); +} + static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 *val) { @@ -150,6 +176,9 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, if (rc) return rc; + if (!cxl_config_link_ok(bus)) + return PCIBIOS_DEVICE_NOT_FOUND; + /* Can only read 32 bits */ *val = (in_le32(ioaddr) >> shift) & mask; return PCIBIOS_SUCCESSFUL; @@ -167,6 +196,9 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, if (rc) return rc; + if (!cxl_config_link_ok(bus)) + return PCIBIOS_DEVICE_NOT_FOUND; + /* Can only write 32 bits so do read-modify-write */ mask <<= shift; val <<= shift; @@ -240,6 +272,14 @@ int cxl_pci_vphb_add(struct cxl_afu *afu) return 0; } +void cxl_pci_vphb_reconfigure(struct cxl_afu *afu) +{ + /* When we are reconfigured, the AFU's MMIO space is unmapped + * and remapped. We need to reflect this in the PHB's view of + * the world. + */ + afu->phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset; +} void cxl_pci_vphb_remove(struct cxl_afu *afu) { |