diff options
Diffstat (limited to 'drivers/nvdimm/pmem.c')
-rw-r--r-- | drivers/nvdimm/pmem.c | 163 |
1 files changed, 114 insertions, 49 deletions
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 8ee79893d..8d0b54670 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -21,9 +21,11 @@ #include <linux/init.h> #include <linux/platform_device.h> #include <linux/module.h> -#include <linux/memory_hotplug.h> #include <linux/moduleparam.h> +#include <linux/badblocks.h> +#include <linux/memremap.h> #include <linux/vmalloc.h> +#include <linux/pfn_t.h> #include <linux/slab.h> #include <linux/pmem.h> #include <linux/nd.h> @@ -39,13 +41,28 @@ struct pmem_device { phys_addr_t phys_addr; /* when non-zero this device is hosting a 'pfn' instance */ phys_addr_t data_offset; + u64 pfn_flags; void __pmem *virt_addr; size_t size; + struct badblocks bb; }; static int pmem_major; -static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, +static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) +{ + if (bb->count) { + sector_t first_bad; + int num_bad; + + return !!badblocks_check(bb, sector, len / 512, &first_bad, + &num_bad); + } + + return false; +} + +static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, int rw, sector_t sector) { @@ -54,6 +71,8 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, void __pmem *pmem_addr = pmem->virt_addr + pmem_off; if (rw == READ) { + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) + return -EIO; memcpy_from_pmem(mem + off, pmem_addr, len); flush_dcache_page(page); } else { @@ -62,10 +81,12 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, } kunmap_atomic(mem); + return 0; } static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) { + int rc = 0; bool do_acct; unsigned long start; struct bio_vec bvec; @@ -74,9 +95,15 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) struct pmem_device *pmem = bdev->bd_disk->private_data; do_acct = nd_iostat_start(bio, &start); - bio_for_each_segment(bvec, bio, iter) - pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset, - bio_data_dir(bio), iter.bi_sector); + bio_for_each_segment(bvec, bio, iter) { + rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, + bvec.bv_offset, bio_data_dir(bio), + iter.bi_sector); + if (rc) { + bio->bi_error = rc; + break; + } + } if (do_acct) nd_iostat_end(bio, start); @@ -91,23 +118,32 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, struct page *page, int rw) { struct pmem_device *pmem = bdev->bd_disk->private_data; + int rc; - pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector); + rc = pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector); if (rw & WRITE) wmb_pmem(); - page_endio(page, rw & WRITE, 0); - return 0; + /* + * The ->rw_page interface is subtle and tricky. The core + * retries on any error, so we can only invoke page_endio() in + * the successful completion case. Otherwise, we'll see crashes + * caused by double completion. + */ + if (rc == 0) + page_endio(page, rw & WRITE, 0); + + return rc; } static long pmem_direct_access(struct block_device *bdev, sector_t sector, - void __pmem **kaddr, unsigned long *pfn) + void __pmem **kaddr, pfn_t *pfn) { struct pmem_device *pmem = bdev->bd_disk->private_data; resource_size_t offset = sector * 512 + pmem->data_offset; *kaddr = pmem->virt_addr + offset; - *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; + *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); return pmem->size - offset; } @@ -123,6 +159,7 @@ static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res, int id) { struct pmem_device *pmem; + struct request_queue *q; pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) @@ -140,16 +177,26 @@ static struct pmem_device *pmem_alloc(struct device *dev, return ERR_PTR(-EBUSY); } - if (pmem_should_map_pages(dev)) - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res); - else + q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); + if (!q) + return ERR_PTR(-ENOMEM); + + pmem->pfn_flags = PFN_DEV; + if (pmem_should_map_pages(dev)) { + pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res, + &q->q_usage_counter, NULL); + pmem->pfn_flags |= PFN_MAP; + } else pmem->virt_addr = (void __pmem *) devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); - if (IS_ERR(pmem->virt_addr)) + if (IS_ERR(pmem->virt_addr)) { + blk_cleanup_queue(q); return (void __force *) pmem->virt_addr; + } + pmem->pmem_queue = q; return pmem; } @@ -169,10 +216,6 @@ static int pmem_attach_disk(struct device *dev, int nid = dev_to_node(dev); struct gendisk *disk; - pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid); - if (!pmem->pmem_queue) - return -ENOMEM; - blk_queue_make_request(pmem->pmem_queue, pmem_make_request); blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE); blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX); @@ -195,7 +238,12 @@ static int pmem_attach_disk(struct device *dev, disk->driverfs_dev = dev; set_capacity(disk, (pmem->size - pmem->data_offset) / 512); pmem->pmem_disk = disk; + devm_exit_badblocks(dev, &pmem->bb); + if (devm_init_badblocks(dev, &pmem->bb)) + return -ENOMEM; + nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset); + disk->bb = &pmem->bb; add_disk(disk); revalidate_disk(disk); @@ -212,9 +260,13 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns, return -EFAULT; } - if (rw == READ) + if (rw == READ) { + unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512); + + if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align))) + return -EIO; memcpy_from_pmem(buf, pmem->virt_addr + offset, size); - else { + } else { memcpy_to_pmem(pmem->virt_addr + offset, buf, size); wmb_pmem(); } @@ -238,14 +290,11 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) nd_pfn->pfn_sb = pfn_sb; rc = nd_pfn_validate(nd_pfn); - if (rc == 0 || rc == -EBUSY) + if (rc == -ENODEV) + /* no info block, do init */; + else return rc; - /* section alignment for simple hotplug */ - if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN - || pmem->phys_addr & ND_PFN_MASK) - return -ENODEV; - nd_region = to_nd_region(nd_pfn->dev.parent); if (nd_region->ro) { dev_info(&nd_pfn->dev, @@ -263,9 +312,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * ->direct_access() to those that are included in the memmap. */ if (nd_pfn->mode == PFN_MODE_PMEM) - offset = ALIGN(SZ_8K + 64 * npfns, PMD_SIZE); + offset = ALIGN(SZ_8K + 64 * npfns, nd_pfn->align); else if (nd_pfn->mode == PFN_MODE_RAM) - offset = SZ_8K; + offset = ALIGN(SZ_8K, nd_pfn->align); else goto err; @@ -275,6 +324,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) pfn_sb->npfns = cpu_to_le64(npfns); memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN); memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); + memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); @@ -311,12 +361,17 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); struct device *dev = &nd_pfn->dev; - struct vmem_altmap *altmap; struct nd_region *nd_region; + struct vmem_altmap *altmap; struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; + struct request_queue *q; phys_addr_t offset; int rc; + struct vmem_altmap __altmap = { + .base_pfn = __phys_to_pfn(nsio->res.start), + .reserve = __phys_to_pfn(SZ_8K), + }; if (!nd_pfn->uuid || !nd_pfn->ndns) return -ENODEV; @@ -326,24 +381,25 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) if (rc) return rc; - if (PAGE_SIZE != SZ_4K) { - dev_err(dev, "only supported on systems with 4K PAGE_SIZE\n"); - return -ENXIO; - } - if (nsio->res.start & ND_PFN_MASK) { - dev_err(dev, "%s not memory hotplug section aligned\n", - dev_name(&ndns->dev)); - return -ENXIO; - } - pfn_sb = nd_pfn->pfn_sb; offset = le64_to_cpu(pfn_sb->dataoff); nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); if (nd_pfn->mode == PFN_MODE_RAM) { - if (offset != SZ_8K) + if (offset < SZ_8K) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; + } else if (nd_pfn->mode == PFN_MODE_PMEM) { + nd_pfn->npfns = (resource_size(&nsio->res) - offset) + / PAGE_SIZE; + if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) + dev_info(&nd_pfn->dev, + "number of pfns truncated from %lld to %ld\n", + le64_to_cpu(nd_pfn->pfn_sb->npfns), + nd_pfn->npfns); + altmap = & __altmap; + altmap->free = __phys_to_pfn(offset - SZ_8K); + altmap->alloc = 0; } else { rc = -ENXIO; goto err; @@ -351,8 +407,11 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) /* establish pfn range for lookup, and switch to direct map */ pmem = dev_get_drvdata(dev); + q = pmem->pmem_queue; devm_memunmap(dev, (void __force *) pmem->virt_addr); - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res); + pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res, + &q->q_usage_counter, altmap); + pmem->pfn_flags |= PFN_MAP; if (IS_ERR(pmem->virt_addr)) { rc = PTR_ERR(pmem->virt_addr); goto err; @@ -389,20 +448,26 @@ static int nd_pmem_probe(struct device *dev) pmem->ndns = ndns; dev_set_drvdata(dev, pmem); ndns->rw_bytes = pmem_rw_bytes; + if (devm_init_badblocks(dev, &pmem->bb)) + return -ENOMEM; + nvdimm_namespace_add_poison(ndns, &pmem->bb, 0); - if (is_nd_btt(dev)) + if (is_nd_btt(dev)) { + /* btt allocates its own request_queue */ + blk_cleanup_queue(pmem->pmem_queue); + pmem->pmem_queue = NULL; return nvdimm_namespace_attach_btt(ndns); + } if (is_nd_pfn(dev)) return nvdimm_namespace_attach_pfn(ndns); - if (nd_btt_probe(ndns, pmem) == 0) { - /* we'll come back as btt-pmem */ - return -ENXIO; - } - - if (nd_pfn_probe(ndns, pmem) == 0) { - /* we'll come back as pfn-pmem */ + if (nd_btt_probe(ndns, pmem) == 0 || nd_pfn_probe(ndns, pmem) == 0) { + /* + * We'll come back as either btt-pmem, or pfn-pmem, so + * drop the queue allocation for now. + */ + blk_cleanup_queue(pmem->pmem_queue); return -ENXIO; } |