diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-08-05 17:04:01 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-08-05 17:04:01 -0300 |
commit | 57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch) | |
tree | 5e910f0e82173f4ef4f51111366a3f1299037a7b /drivers/mtd/ubi |
Initial import
Diffstat (limited to 'drivers/mtd/ubi')
-rw-r--r-- | drivers/mtd/ubi/Kconfig | 106 | ||||
-rw-r--r-- | drivers/mtd/ubi/Makefile | 8 | ||||
-rw-r--r-- | drivers/mtd/ubi/attach.c | 1768 | ||||
-rw-r--r-- | drivers/mtd/ubi/block.c | 669 | ||||
-rw-r--r-- | drivers/mtd/ubi/build.c | 1515 | ||||
-rw-r--r-- | drivers/mtd/ubi/cdev.c | 1110 | ||||
-rw-r--r-- | drivers/mtd/ubi/debug.c | 545 | ||||
-rw-r--r-- | drivers/mtd/ubi/debug.h | 142 | ||||
-rw-r--r-- | drivers/mtd/ubi/eba.c | 1477 | ||||
-rw-r--r-- | drivers/mtd/ubi/fastmap-wl.c | 362 | ||||
-rw-r--r-- | drivers/mtd/ubi/fastmap.c | 1631 | ||||
-rw-r--r-- | drivers/mtd/ubi/gluebi.c | 521 | ||||
-rw-r--r-- | drivers/mtd/ubi/io.c | 1435 | ||||
-rw-r--r-- | drivers/mtd/ubi/kapi.c | 864 | ||||
-rw-r--r-- | drivers/mtd/ubi/misc.c | 155 | ||||
-rw-r--r-- | drivers/mtd/ubi/ubi-media.h | 513 | ||||
-rw-r--r-- | drivers/mtd/ubi/ubi.h | 1102 | ||||
-rw-r--r-- | drivers/mtd/ubi/upd.c | 433 | ||||
-rw-r--r-- | drivers/mtd/ubi/vmt.c | 863 | ||||
-rw-r--r-- | drivers/mtd/ubi/vtbl.c | 864 | ||||
-rw-r--r-- | drivers/mtd/ubi/wl.c | 1844 | ||||
-rw-r--r-- | drivers/mtd/ubi/wl.h | 28 |
22 files changed, 17955 insertions, 0 deletions
diff --git a/drivers/mtd/ubi/Kconfig b/drivers/mtd/ubi/Kconfig new file mode 100644 index 000000000..f0855ce08 --- /dev/null +++ b/drivers/mtd/ubi/Kconfig @@ -0,0 +1,106 @@ +menuconfig MTD_UBI + tristate "Enable UBI - Unsorted block images" + select CRC32 + help + UBI is a software layer above MTD layer which admits of LVM-like + logical volumes on top of MTD devices, hides some complexities of + flash chips like wear and bad blocks and provides some other useful + capabilities. Please, consult the MTD web site for more details + (www.linux-mtd.infradead.org). + +if MTD_UBI + +config MTD_UBI_WL_THRESHOLD + int "UBI wear-leveling threshold" + default 4096 + range 2 65536 + help + This parameter defines the maximum difference between the highest + erase counter value and the lowest erase counter value of eraseblocks + of UBI devices. When this threshold is exceeded, UBI starts performing + wear leveling by means of moving data from eraseblock with low erase + counter to eraseblocks with high erase counter. + + The default value should be OK for SLC NAND flashes, NOR flashes and + other flashes which have eraseblock life-cycle 100000 or more. + However, in case of MLC NAND flashes which typically have eraseblock + life-cycle less than 10000, the threshold should be lessened (e.g., + to 128 or 256, although it does not have to be power of 2). + +config MTD_UBI_BEB_LIMIT + int "Maximum expected bad eraseblock count per 1024 eraseblocks" + default 20 + range 0 768 + help + This option specifies the maximum bad physical eraseblocks UBI + expects on the MTD device (per 1024 eraseblocks). If the underlying + flash does not admit of bad eraseblocks (e.g. NOR flash), this value + is ignored. + + NAND datasheets often specify the minimum and maximum NVM (Number of + Valid Blocks) for the flashes' endurance lifetime. The maximum + expected bad eraseblocks per 1024 eraseblocks then can be calculated + as "1024 * (1 - MinNVB / MaxNVB)", which gives 20 for most NANDs + (MaxNVB is basically the total count of eraseblocks on the chip). + + To put it differently, if this value is 20, UBI will try to reserve + about 1.9% of physical eraseblocks for bad blocks handling. And that + will be 1.9% of eraseblocks on the entire NAND chip, not just the MTD + partition UBI attaches. This means that if you have, say, a NAND + flash chip admits maximum 40 bad eraseblocks, and it is split on two + MTD partitions of the same size, UBI will reserve 40 eraseblocks when + attaching a partition. + + This option can be overridden by the "mtd=" UBI module parameter or + by the "attach" ioctl. + + Leave the default value if unsure. + +config MTD_UBI_FASTMAP + bool "UBI Fastmap (Experimental feature)" + default n + help + Important: this feature is experimental so far and the on-flash + format for fastmap may change in the next kernel versions + + Fastmap is a mechanism which allows attaching an UBI device + in nearly constant time. Instead of scanning the whole MTD device it + only has to locate a checkpoint (called fastmap) on the device. + The on-flash fastmap contains all information needed to attach + the device. Using fastmap makes only sense on large devices where + attaching by scanning takes long. UBI will not automatically install + a fastmap on old images, but you can set the UBI module parameter + fm_autoconvert to 1 if you want so. Please note that fastmap-enabled + images are still usable with UBI implementations without + fastmap support. On typical flash devices the whole fastmap fits + into one PEB. UBI will reserve PEBs to hold two fastmaps. + + If in doubt, say "N". + +config MTD_UBI_GLUEBI + tristate "MTD devices emulation driver (gluebi)" + help + This option enables gluebi - an additional driver which emulates MTD + devices on top of UBI volumes: for each UBI volumes an MTD device is + created, and all I/O to this MTD device is redirected to the UBI + volume. This is handy to make MTD-oriented software (like JFFS2) + work on top of UBI. Do not enable this unless you use legacy + software. + +config MTD_UBI_BLOCK + bool "Read-only block devices on top of UBI volumes" + default n + depends on BLOCK + help + This option enables read-only UBI block devices support. UBI block + devices will be layered on top of UBI volumes, which means that the + UBI driver will transparently handle things like bad eraseblocks and + bit-flips. You can put any block-oriented file system on top of UBI + volumes in read-only mode (e.g., ext4), but it is probably most + practical for read-only file systems, like squashfs. + + When selected, this feature will be built in the UBI driver. + + If in doubt, say "N". + +endif # MTD_UBI diff --git a/drivers/mtd/ubi/Makefile b/drivers/mtd/ubi/Makefile new file mode 100644 index 000000000..4e3c3d70d --- /dev/null +++ b/drivers/mtd/ubi/Makefile @@ -0,0 +1,8 @@ +obj-$(CONFIG_MTD_UBI) += ubi.o + +ubi-y += vtbl.o vmt.o upd.o build.o cdev.o kapi.o eba.o io.o wl.o attach.o +ubi-y += misc.o debug.o +ubi-$(CONFIG_MTD_UBI_FASTMAP) += fastmap.o +ubi-$(CONFIG_MTD_UBI_BLOCK) += block.o + +obj-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c new file mode 100644 index 000000000..68eea5bef --- /dev/null +++ b/drivers/mtd/ubi/attach.c @@ -0,0 +1,1768 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * UBI attaching sub-system. + * + * This sub-system is responsible for attaching MTD devices and it also + * implements flash media scanning. + * + * The attaching information is represented by a &struct ubi_attach_info' + * object. Information about volumes is represented by &struct ubi_ainf_volume + * objects which are kept in volume RB-tree with root at the @volumes field. + * The RB-tree is indexed by the volume ID. + * + * Logical eraseblocks are represented by &struct ubi_ainf_peb objects. These + * objects are kept in per-volume RB-trees with the root at the corresponding + * &struct ubi_ainf_volume object. To put it differently, we keep an RB-tree of + * per-volume objects and each of these objects is the root of RB-tree of + * per-LEB objects. + * + * Corrupted physical eraseblocks are put to the @corr list, free physical + * eraseblocks are put to the @free list and the physical eraseblock to be + * erased are put to the @erase list. + * + * About corruptions + * ~~~~~~~~~~~~~~~~~ + * + * UBI protects EC and VID headers with CRC-32 checksums, so it can detect + * whether the headers are corrupted or not. Sometimes UBI also protects the + * data with CRC-32, e.g., when it executes the atomic LEB change operation, or + * when it moves the contents of a PEB for wear-leveling purposes. + * + * UBI tries to distinguish between 2 types of corruptions. + * + * 1. Corruptions caused by power cuts. These are expected corruptions and UBI + * tries to handle them gracefully, without printing too many warnings and + * error messages. The idea is that we do not lose important data in these + * cases - we may lose only the data which were being written to the media just + * before the power cut happened, and the upper layers (e.g., UBIFS) are + * supposed to handle such data losses (e.g., by using the FS journal). + * + * When UBI detects a corruption (CRC-32 mismatch) in a PEB, and it looks like + * the reason is a power cut, UBI puts this PEB to the @erase list, and all + * PEBs in the @erase list are scheduled for erasure later. + * + * 2. Unexpected corruptions which are not caused by power cuts. During + * attaching, such PEBs are put to the @corr list and UBI preserves them. + * Obviously, this lessens the amount of available PEBs, and if at some point + * UBI runs out of free PEBs, it switches to R/O mode. UBI also loudly informs + * about such PEBs every time the MTD device is attached. + * + * However, it is difficult to reliably distinguish between these types of + * corruptions and UBI's strategy is as follows (in case of attaching by + * scanning). UBI assumes corruption type 2 if the VID header is corrupted and + * the data area does not contain all 0xFFs, and there were no bit-flips or + * integrity errors (e.g., ECC errors in case of NAND) while reading the data + * area. Otherwise UBI assumes corruption type 1. So the decision criteria + * are as follows. + * o If the data area contains only 0xFFs, there are no data, and it is safe + * to just erase this PEB - this is corruption type 1. + * o If the data area has bit-flips or data integrity errors (ECC errors on + * NAND), it is probably a PEB which was being erased when power cut + * happened, so this is corruption type 1. However, this is just a guess, + * which might be wrong. + * o Otherwise this is corruption type 2. + */ + +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/crc32.h> +#include <linux/math64.h> +#include <linux/random.h> +#include "ubi.h" + +static int self_check_ai(struct ubi_device *ubi, struct ubi_attach_info *ai); + +/* Temporary variables used during scanning */ +static struct ubi_ec_hdr *ech; +static struct ubi_vid_hdr *vidh; + +/** + * add_to_list - add physical eraseblock to a list. + * @ai: attaching information + * @pnum: physical eraseblock number to add + * @vol_id: the last used volume id for the PEB + * @lnum: the last used LEB number for the PEB + * @ec: erase counter of the physical eraseblock + * @to_head: if not zero, add to the head of the list + * @list: the list to add to + * + * This function allocates a 'struct ubi_ainf_peb' object for physical + * eraseblock @pnum and adds it to the "free", "erase", or "alien" lists. + * It stores the @lnum and @vol_id alongside, which can both be + * %UBI_UNKNOWN if they are not available, not readable, or not assigned. + * If @to_head is not zero, PEB will be added to the head of the list, which + * basically means it will be processed first later. E.g., we add corrupted + * PEBs (corrupted due to power cuts) to the head of the erase list to make + * sure we erase them first and get rid of corruptions ASAP. This function + * returns zero in case of success and a negative error code in case of + * failure. + */ +static int add_to_list(struct ubi_attach_info *ai, int pnum, int vol_id, + int lnum, int ec, int to_head, struct list_head *list) +{ + struct ubi_ainf_peb *aeb; + + if (list == &ai->free) { + dbg_bld("add to free: PEB %d, EC %d", pnum, ec); + } else if (list == &ai->erase) { + dbg_bld("add to erase: PEB %d, EC %d", pnum, ec); + } else if (list == &ai->alien) { + dbg_bld("add to alien: PEB %d, EC %d", pnum, ec); + ai->alien_peb_count += 1; + } else + BUG(); + + aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); + if (!aeb) + return -ENOMEM; + + aeb->pnum = pnum; + aeb->vol_id = vol_id; + aeb->lnum = lnum; + aeb->ec = ec; + if (to_head) + list_add(&aeb->u.list, list); + else + list_add_tail(&aeb->u.list, list); + return 0; +} + +/** + * add_corrupted - add a corrupted physical eraseblock. + * @ai: attaching information + * @pnum: physical eraseblock number to add + * @ec: erase counter of the physical eraseblock + * + * This function allocates a 'struct ubi_ainf_peb' object for a corrupted + * physical eraseblock @pnum and adds it to the 'corr' list. The corruption + * was presumably not caused by a power cut. Returns zero in case of success + * and a negative error code in case of failure. + */ +static int add_corrupted(struct ubi_attach_info *ai, int pnum, int ec) +{ + struct ubi_ainf_peb *aeb; + + dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); + + aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); + if (!aeb) + return -ENOMEM; + + ai->corr_peb_count += 1; + aeb->pnum = pnum; + aeb->ec = ec; + list_add(&aeb->u.list, &ai->corr); + return 0; +} + +/** + * validate_vid_hdr - check volume identifier header. + * @ubi: UBI device description object + * @vid_hdr: the volume identifier header to check + * @av: information about the volume this logical eraseblock belongs to + * @pnum: physical eraseblock number the VID header came from + * + * This function checks that data stored in @vid_hdr is consistent. Returns + * non-zero if an inconsistency was found and zero if not. + * + * Note, UBI does sanity check of everything it reads from the flash media. + * Most of the checks are done in the I/O sub-system. Here we check that the + * information in the VID header is consistent to the information in other VID + * headers of the same volume. + */ +static int validate_vid_hdr(const struct ubi_device *ubi, + const struct ubi_vid_hdr *vid_hdr, + const struct ubi_ainf_volume *av, int pnum) +{ + int vol_type = vid_hdr->vol_type; + int vol_id = be32_to_cpu(vid_hdr->vol_id); + int used_ebs = be32_to_cpu(vid_hdr->used_ebs); + int data_pad = be32_to_cpu(vid_hdr->data_pad); + + if (av->leb_count != 0) { + int av_vol_type; + + /* + * This is not the first logical eraseblock belonging to this + * volume. Ensure that the data in its VID header is consistent + * to the data in previous logical eraseblock headers. + */ + + if (vol_id != av->vol_id) { + ubi_err(ubi, "inconsistent vol_id"); + goto bad; + } + + if (av->vol_type == UBI_STATIC_VOLUME) + av_vol_type = UBI_VID_STATIC; + else + av_vol_type = UBI_VID_DYNAMIC; + + if (vol_type != av_vol_type) { + ubi_err(ubi, "inconsistent vol_type"); + goto bad; + } + + if (used_ebs != av->used_ebs) { + ubi_err(ubi, "inconsistent used_ebs"); + goto bad; + } + + if (data_pad != av->data_pad) { + ubi_err(ubi, "inconsistent data_pad"); + goto bad; + } + } + + return 0; + +bad: + ubi_err(ubi, "inconsistent VID header at PEB %d", pnum); + ubi_dump_vid_hdr(vid_hdr); + ubi_dump_av(av); + return -EINVAL; +} + +/** + * add_volume - add volume to the attaching information. + * @ai: attaching information + * @vol_id: ID of the volume to add + * @pnum: physical eraseblock number + * @vid_hdr: volume identifier header + * + * If the volume corresponding to the @vid_hdr logical eraseblock is already + * present in the attaching information, this function does nothing. Otherwise + * it adds corresponding volume to the attaching information. Returns a pointer + * to the allocated "av" object in case of success and a negative error code in + * case of failure. + */ +static struct ubi_ainf_volume *add_volume(struct ubi_attach_info *ai, + int vol_id, int pnum, + const struct ubi_vid_hdr *vid_hdr) +{ + struct ubi_ainf_volume *av; + struct rb_node **p = &ai->volumes.rb_node, *parent = NULL; + + ubi_assert(vol_id == be32_to_cpu(vid_hdr->vol_id)); + + /* Walk the volume RB-tree to look if this volume is already present */ + while (*p) { + parent = *p; + av = rb_entry(parent, struct ubi_ainf_volume, rb); + + if (vol_id == av->vol_id) + return av; + + if (vol_id > av->vol_id) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + /* The volume is absent - add it */ + av = kmalloc(sizeof(struct ubi_ainf_volume), GFP_KERNEL); + if (!av) + return ERR_PTR(-ENOMEM); + + av->highest_lnum = av->leb_count = 0; + av->vol_id = vol_id; + av->root = RB_ROOT; + av->used_ebs = be32_to_cpu(vid_hdr->used_ebs); + av->data_pad = be32_to_cpu(vid_hdr->data_pad); + av->compat = vid_hdr->compat; + av->vol_type = vid_hdr->vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME + : UBI_STATIC_VOLUME; + if (vol_id > ai->highest_vol_id) + ai->highest_vol_id = vol_id; + + rb_link_node(&av->rb, parent, p); + rb_insert_color(&av->rb, &ai->volumes); + ai->vols_found += 1; + dbg_bld("added volume %d", vol_id); + return av; +} + +/** + * ubi_compare_lebs - find out which logical eraseblock is newer. + * @ubi: UBI device description object + * @aeb: first logical eraseblock to compare + * @pnum: physical eraseblock number of the second logical eraseblock to + * compare + * @vid_hdr: volume identifier header of the second logical eraseblock + * + * This function compares 2 copies of a LEB and informs which one is newer. In + * case of success this function returns a positive value, in case of failure, a + * negative error code is returned. The success return codes use the following + * bits: + * o bit 0 is cleared: the first PEB (described by @aeb) is newer than the + * second PEB (described by @pnum and @vid_hdr); + * o bit 0 is set: the second PEB is newer; + * o bit 1 is cleared: no bit-flips were detected in the newer LEB; + * o bit 1 is set: bit-flips were detected in the newer LEB; + * o bit 2 is cleared: the older LEB is not corrupted; + * o bit 2 is set: the older LEB is corrupted. + */ +int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, + int pnum, const struct ubi_vid_hdr *vid_hdr) +{ + int len, err, second_is_newer, bitflips = 0, corrupted = 0; + uint32_t data_crc, crc; + struct ubi_vid_hdr *vh = NULL; + unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum); + + if (sqnum2 == aeb->sqnum) { + /* + * This must be a really ancient UBI image which has been + * created before sequence numbers support has been added. At + * that times we used 32-bit LEB versions stored in logical + * eraseblocks. That was before UBI got into mainline. We do not + * support these images anymore. Well, those images still work, + * but only if no unclean reboots happened. + */ + ubi_err(ubi, "unsupported on-flash UBI format"); + return -EINVAL; + } + + /* Obviously the LEB with lower sequence counter is older */ + second_is_newer = (sqnum2 > aeb->sqnum); + + /* + * Now we know which copy is newer. If the copy flag of the PEB with + * newer version is not set, then we just return, otherwise we have to + * check data CRC. For the second PEB we already have the VID header, + * for the first one - we'll need to re-read it from flash. + * + * Note: this may be optimized so that we wouldn't read twice. + */ + + if (second_is_newer) { + if (!vid_hdr->copy_flag) { + /* It is not a copy, so it is newer */ + dbg_bld("second PEB %d is newer, copy_flag is unset", + pnum); + return 1; + } + } else { + if (!aeb->copy_flag) { + /* It is not a copy, so it is newer */ + dbg_bld("first PEB %d is newer, copy_flag is unset", + pnum); + return bitflips << 1; + } + + vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vh) + return -ENOMEM; + + pnum = aeb->pnum; + err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0); + if (err) { + if (err == UBI_IO_BITFLIPS) + bitflips = 1; + else { + ubi_err(ubi, "VID of PEB %d header is bad, but it was OK earlier, err %d", + pnum, err); + if (err > 0) + err = -EIO; + + goto out_free_vidh; + } + } + + vid_hdr = vh; + } + + /* Read the data of the copy and check the CRC */ + + len = be32_to_cpu(vid_hdr->data_size); + + mutex_lock(&ubi->buf_mutex); + err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, len); + if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) + goto out_unlock; + + data_crc = be32_to_cpu(vid_hdr->data_crc); + crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, len); + if (crc != data_crc) { + dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x", + pnum, crc, data_crc); + corrupted = 1; + bitflips = 0; + second_is_newer = !second_is_newer; + } else { + dbg_bld("PEB %d CRC is OK", pnum); + bitflips |= !!err; + } + mutex_unlock(&ubi->buf_mutex); + + ubi_free_vid_hdr(ubi, vh); + + if (second_is_newer) + dbg_bld("second PEB %d is newer, copy_flag is set", pnum); + else + dbg_bld("first PEB %d is newer, copy_flag is set", pnum); + + return second_is_newer | (bitflips << 1) | (corrupted << 2); + +out_unlock: + mutex_unlock(&ubi->buf_mutex); +out_free_vidh: + ubi_free_vid_hdr(ubi, vh); + return err; +} + +/** + * ubi_add_to_av - add used physical eraseblock to the attaching information. + * @ubi: UBI device description object + * @ai: attaching information + * @pnum: the physical eraseblock number + * @ec: erase counter + * @vid_hdr: the volume identifier header + * @bitflips: if bit-flips were detected when this physical eraseblock was read + * + * This function adds information about a used physical eraseblock to the + * 'used' tree of the corresponding volume. The function is rather complex + * because it has to handle cases when this is not the first physical + * eraseblock belonging to the same logical eraseblock, and the newer one has + * to be picked, while the older one has to be dropped. This function returns + * zero in case of success and a negative error code in case of failure. + */ +int ubi_add_to_av(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum, + int ec, const struct ubi_vid_hdr *vid_hdr, int bitflips) +{ + int err, vol_id, lnum; + unsigned long long sqnum; + struct ubi_ainf_volume *av; + struct ubi_ainf_peb *aeb; + struct rb_node **p, *parent = NULL; + + vol_id = be32_to_cpu(vid_hdr->vol_id); + lnum = be32_to_cpu(vid_hdr->lnum); + sqnum = be64_to_cpu(vid_hdr->sqnum); + + dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d", + pnum, vol_id, lnum, ec, sqnum, bitflips); + + av = add_volume(ai, vol_id, pnum, vid_hdr); + if (IS_ERR(av)) + return PTR_ERR(av); + + if (ai->max_sqnum < sqnum) + ai->max_sqnum = sqnum; + + /* + * Walk the RB-tree of logical eraseblocks of volume @vol_id to look + * if this is the first instance of this logical eraseblock or not. + */ + p = &av->root.rb_node; + while (*p) { + int cmp_res; + + parent = *p; + aeb = rb_entry(parent, struct ubi_ainf_peb, u.rb); + if (lnum != aeb->lnum) { + if (lnum < aeb->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + continue; + } + + /* + * There is already a physical eraseblock describing the same + * logical eraseblock present. + */ + + dbg_bld("this LEB already exists: PEB %d, sqnum %llu, EC %d", + aeb->pnum, aeb->sqnum, aeb->ec); + + /* + * Make sure that the logical eraseblocks have different + * sequence numbers. Otherwise the image is bad. + * + * However, if the sequence number is zero, we assume it must + * be an ancient UBI image from the era when UBI did not have + * sequence numbers. We still can attach these images, unless + * there is a need to distinguish between old and new + * eraseblocks, in which case we'll refuse the image in + * 'ubi_compare_lebs()'. In other words, we attach old clean + * images, but refuse attaching old images with duplicated + * logical eraseblocks because there was an unclean reboot. + */ + if (aeb->sqnum == sqnum && sqnum != 0) { + ubi_err(ubi, "two LEBs with same sequence number %llu", + sqnum); + ubi_dump_aeb(aeb, 0); + ubi_dump_vid_hdr(vid_hdr); + return -EINVAL; + } + + /* + * Now we have to drop the older one and preserve the newer + * one. + */ + cmp_res = ubi_compare_lebs(ubi, aeb, pnum, vid_hdr); + if (cmp_res < 0) + return cmp_res; + + if (cmp_res & 1) { + /* + * This logical eraseblock is newer than the one + * found earlier. + */ + err = validate_vid_hdr(ubi, vid_hdr, av, pnum); + if (err) + return err; + + err = add_to_list(ai, aeb->pnum, aeb->vol_id, + aeb->lnum, aeb->ec, cmp_res & 4, + &ai->erase); + if (err) + return err; + + aeb->ec = ec; + aeb->pnum = pnum; + aeb->vol_id = vol_id; + aeb->lnum = lnum; + aeb->scrub = ((cmp_res & 2) || bitflips); + aeb->copy_flag = vid_hdr->copy_flag; + aeb->sqnum = sqnum; + + if (av->highest_lnum == lnum) + av->last_data_size = + be32_to_cpu(vid_hdr->data_size); + + return 0; + } else { + /* + * This logical eraseblock is older than the one found + * previously. + */ + return add_to_list(ai, pnum, vol_id, lnum, ec, + cmp_res & 4, &ai->erase); + } + } + + /* + * We've met this logical eraseblock for the first time, add it to the + * attaching information. + */ + + err = validate_vid_hdr(ubi, vid_hdr, av, pnum); + if (err) + return err; + + aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); + if (!aeb) + return -ENOMEM; + + aeb->ec = ec; + aeb->pnum = pnum; + aeb->vol_id = vol_id; + aeb->lnum = lnum; + aeb->scrub = bitflips; + aeb->copy_flag = vid_hdr->copy_flag; + aeb->sqnum = sqnum; + + if (av->highest_lnum <= lnum) { + av->highest_lnum = lnum; + av->last_data_size = be32_to_cpu(vid_hdr->data_size); + } + + av->leb_count += 1; + rb_link_node(&aeb->u.rb, parent, p); + rb_insert_color(&aeb->u.rb, &av->root); + return 0; +} + +/** + * ubi_find_av - find volume in the attaching information. + * @ai: attaching information + * @vol_id: the requested volume ID + * + * This function returns a pointer to the volume description or %NULL if there + * are no data about this volume in the attaching information. + */ +struct ubi_ainf_volume *ubi_find_av(const struct ubi_attach_info *ai, + int vol_id) +{ + struct ubi_ainf_volume *av; + struct rb_node *p = ai->volumes.rb_node; + + while (p) { + av = rb_entry(p, struct ubi_ainf_volume, rb); + + if (vol_id == av->vol_id) + return av; + + if (vol_id > av->vol_id) + p = p->rb_left; + else + p = p->rb_right; + } + + return NULL; +} + +/** + * ubi_remove_av - delete attaching information about a volume. + * @ai: attaching information + * @av: the volume attaching information to delete + */ +void ubi_remove_av(struct ubi_attach_info *ai, struct ubi_ainf_volume *av) +{ + struct rb_node *rb; + struct ubi_ainf_peb *aeb; + + dbg_bld("remove attaching information about volume %d", av->vol_id); + + while ((rb = rb_first(&av->root))) { + aeb = rb_entry(rb, struct ubi_ainf_peb, u.rb); + rb_erase(&aeb->u.rb, &av->root); + list_add_tail(&aeb->u.list, &ai->erase); + } + + rb_erase(&av->rb, &ai->volumes); + kfree(av); + ai->vols_found -= 1; +} + +/** + * early_erase_peb - erase a physical eraseblock. + * @ubi: UBI device description object + * @ai: attaching information + * @pnum: physical eraseblock number to erase; + * @ec: erase counter value to write (%UBI_UNKNOWN if it is unknown) + * + * This function erases physical eraseblock 'pnum', and writes the erase + * counter header to it. This function should only be used on UBI device + * initialization stages, when the EBA sub-system had not been yet initialized. + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int early_erase_peb(struct ubi_device *ubi, + const struct ubi_attach_info *ai, int pnum, int ec) +{ + int err; + struct ubi_ec_hdr *ec_hdr; + + if ((long long)ec >= UBI_MAX_ERASECOUNTER) { + /* + * Erase counter overflow. Upgrade UBI and use 64-bit + * erase counters internally. + */ + ubi_err(ubi, "erase counter overflow at PEB %d, EC %d", + pnum, ec); + return -EINVAL; + } + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ec_hdr) + return -ENOMEM; + + ec_hdr->ec = cpu_to_be64(ec); + + err = ubi_io_sync_erase(ubi, pnum, 0); + if (err < 0) + goto out_free; + + err = ubi_io_write_ec_hdr(ubi, pnum, ec_hdr); + +out_free: + kfree(ec_hdr); + return err; +} + +/** + * ubi_early_get_peb - get a free physical eraseblock. + * @ubi: UBI device description object + * @ai: attaching information + * + * This function returns a free physical eraseblock. It is supposed to be + * called on the UBI initialization stages when the wear-leveling sub-system is + * not initialized yet. This function picks a physical eraseblocks from one of + * the lists, writes the EC header if it is needed, and removes it from the + * list. + * + * This function returns a pointer to the "aeb" of the found free PEB in case + * of success and an error code in case of failure. + */ +struct ubi_ainf_peb *ubi_early_get_peb(struct ubi_device *ubi, + struct ubi_attach_info *ai) +{ + int err = 0; + struct ubi_ainf_peb *aeb, *tmp_aeb; + + if (!list_empty(&ai->free)) { + aeb = list_entry(ai->free.next, struct ubi_ainf_peb, u.list); + list_del(&aeb->u.list); + dbg_bld("return free PEB %d, EC %d", aeb->pnum, aeb->ec); + return aeb; + } + + /* + * We try to erase the first physical eraseblock from the erase list + * and pick it if we succeed, or try to erase the next one if not. And + * so forth. We don't want to take care about bad eraseblocks here - + * they'll be handled later. + */ + list_for_each_entry_safe(aeb, tmp_aeb, &ai->erase, u.list) { + if (aeb->ec == UBI_UNKNOWN) + aeb->ec = ai->mean_ec; + + err = early_erase_peb(ubi, ai, aeb->pnum, aeb->ec+1); + if (err) + continue; + + aeb->ec += 1; + list_del(&aeb->u.list); + dbg_bld("return PEB %d, EC %d", aeb->pnum, aeb->ec); + return aeb; + } + + ubi_err(ubi, "no free eraseblocks"); + return ERR_PTR(-ENOSPC); +} + +/** + * check_corruption - check the data area of PEB. + * @ubi: UBI device description object + * @vid_hdr: the (corrupted) VID header of this PEB + * @pnum: the physical eraseblock number to check + * + * This is a helper function which is used to distinguish between VID header + * corruptions caused by power cuts and other reasons. If the PEB contains only + * 0xFF bytes in the data area, the VID header is most probably corrupted + * because of a power cut (%0 is returned in this case). Otherwise, it was + * probably corrupted for some other reasons (%1 is returned in this case). A + * negative error code is returned if a read error occurred. + * + * If the corruption reason was a power cut, UBI can safely erase this PEB. + * Otherwise, it should preserve it to avoid possibly destroying important + * information. + */ +static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, + int pnum) +{ + int err; + + mutex_lock(&ubi->buf_mutex); + memset(ubi->peb_buf, 0x00, ubi->leb_size); + + err = ubi_io_read(ubi, ubi->peb_buf, pnum, ubi->leb_start, + ubi->leb_size); + if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) { + /* + * Bit-flips or integrity errors while reading the data area. + * It is difficult to say for sure what type of corruption is + * this, but presumably a power cut happened while this PEB was + * erased, so it became unstable and corrupted, and should be + * erased. + */ + err = 0; + goto out_unlock; + } + + if (err) + goto out_unlock; + + if (ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->leb_size)) + goto out_unlock; + + ubi_err(ubi, "PEB %d contains corrupted VID header, and the data does not contain all 0xFF", + pnum); + ubi_err(ubi, "this may be a non-UBI PEB or a severe VID header corruption which requires manual inspection"); + ubi_dump_vid_hdr(vid_hdr); + pr_err("hexdump of PEB %d offset %d, length %d", + pnum, ubi->leb_start, ubi->leb_size); + ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + ubi->peb_buf, ubi->leb_size, 1); + err = 1; + +out_unlock: + mutex_unlock(&ubi->buf_mutex); + return err; +} + +/** + * scan_peb - scan and process UBI headers of a PEB. + * @ubi: UBI device description object + * @ai: attaching information + * @pnum: the physical eraseblock number + * @vid: The volume ID of the found volume will be stored in this pointer + * @sqnum: The sqnum of the found volume will be stored in this pointer + * + * This function reads UBI headers of PEB @pnum, checks them, and adds + * information about this PEB to the corresponding list or RB-tree in the + * "attaching info" structure. Returns zero if the physical eraseblock was + * successfully handled and a negative error code in case of failure. + */ +static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, + int pnum, int *vid, unsigned long long *sqnum) +{ + long long uninitialized_var(ec); + int err, bitflips = 0, vol_id = -1, ec_err = 0; + + dbg_bld("scan PEB %d", pnum); + + /* Skip bad physical eraseblocks */ + err = ubi_io_is_bad(ubi, pnum); + if (err < 0) + return err; + else if (err) { + ai->bad_peb_count += 1; + return 0; + } + + err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); + if (err < 0) + return err; + switch (err) { + case 0: + break; + case UBI_IO_BITFLIPS: + bitflips = 1; + break; + case UBI_IO_FF: + ai->empty_peb_count += 1; + return add_to_list(ai, pnum, UBI_UNKNOWN, UBI_UNKNOWN, + UBI_UNKNOWN, 0, &ai->erase); + case UBI_IO_FF_BITFLIPS: + ai->empty_peb_count += 1; + return add_to_list(ai, pnum, UBI_UNKNOWN, UBI_UNKNOWN, + UBI_UNKNOWN, 1, &ai->erase); + case UBI_IO_BAD_HDR_EBADMSG: + case UBI_IO_BAD_HDR: + /* + * We have to also look at the VID header, possibly it is not + * corrupted. Set %bitflips flag in order to make this PEB be + * moved and EC be re-created. + */ + ec_err = err; + ec = UBI_UNKNOWN; + bitflips = 1; + break; + default: + ubi_err(ubi, "'ubi_io_read_ec_hdr()' returned unknown code %d", + err); + return -EINVAL; + } + + if (!ec_err) { + int image_seq; + + /* Make sure UBI version is OK */ + if (ech->version != UBI_VERSION) { + ubi_err(ubi, "this UBI version is %d, image version is %d", + UBI_VERSION, (int)ech->version); + return -EINVAL; + } + + ec = be64_to_cpu(ech->ec); + if (ec > UBI_MAX_ERASECOUNTER) { + /* + * Erase counter overflow. The EC headers have 64 bits + * reserved, but we anyway make use of only 31 bit + * values, as this seems to be enough for any existing + * flash. Upgrade UBI and use 64-bit erase counters + * internally. + */ + ubi_err(ubi, "erase counter overflow, max is %d", + UBI_MAX_ERASECOUNTER); + ubi_dump_ec_hdr(ech); + return -EINVAL; + } + + /* + * Make sure that all PEBs have the same image sequence number. + * This allows us to detect situations when users flash UBI + * images incorrectly, so that the flash has the new UBI image + * and leftovers from the old one. This feature was added + * relatively recently, and the sequence number was always + * zero, because old UBI implementations always set it to zero. + * For this reasons, we do not panic if some PEBs have zero + * sequence number, while other PEBs have non-zero sequence + * number. + */ + image_seq = be32_to_cpu(ech->image_seq); + if (!ubi->image_seq) + ubi->image_seq = image_seq; + if (image_seq && ubi->image_seq != image_seq) { + ubi_err(ubi, "bad image sequence number %d in PEB %d, expected %d", + image_seq, pnum, ubi->image_seq); + ubi_dump_ec_hdr(ech); + return -EINVAL; + } + } + + /* OK, we've done with the EC header, let's look at the VID header */ + + err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0); + if (err < 0) + return err; + switch (err) { + case 0: + break; + case UBI_IO_BITFLIPS: + bitflips = 1; + break; + case UBI_IO_BAD_HDR_EBADMSG: + if (ec_err == UBI_IO_BAD_HDR_EBADMSG) + /* + * Both EC and VID headers are corrupted and were read + * with data integrity error, probably this is a bad + * PEB, bit it is not marked as bad yet. This may also + * be a result of power cut during erasure. + */ + ai->maybe_bad_peb_count += 1; + case UBI_IO_BAD_HDR: + if (ec_err) + /* + * Both headers are corrupted. There is a possibility + * that this a valid UBI PEB which has corresponding + * LEB, but the headers are corrupted. However, it is + * impossible to distinguish it from a PEB which just + * contains garbage because of a power cut during erase + * operation. So we just schedule this PEB for erasure. + * + * Besides, in case of NOR flash, we deliberately + * corrupt both headers because NOR flash erasure is + * slow and can start from the end. + */ + err = 0; + else + /* + * The EC was OK, but the VID header is corrupted. We + * have to check what is in the data area. + */ + err = check_corruption(ubi, vidh, pnum); + + if (err < 0) + return err; + else if (!err) + /* This corruption is caused by a power cut */ + err = add_to_list(ai, pnum, UBI_UNKNOWN, + UBI_UNKNOWN, ec, 1, &ai->erase); + else + /* This is an unexpected corruption */ + err = add_corrupted(ai, pnum, ec); + if (err) + return err; + goto adjust_mean_ec; + case UBI_IO_FF_BITFLIPS: + err = add_to_list(ai, pnum, UBI_UNKNOWN, UBI_UNKNOWN, + ec, 1, &ai->erase); + if (err) + return err; + goto adjust_mean_ec; + case UBI_IO_FF: + if (ec_err || bitflips) + err = add_to_list(ai, pnum, UBI_UNKNOWN, + UBI_UNKNOWN, ec, 1, &ai->erase); + else + err = add_to_list(ai, pnum, UBI_UNKNOWN, + UBI_UNKNOWN, ec, 0, &ai->free); + if (err) + return err; + goto adjust_mean_ec; + default: + ubi_err(ubi, "'ubi_io_read_vid_hdr()' returned unknown code %d", + err); + return -EINVAL; + } + + vol_id = be32_to_cpu(vidh->vol_id); + if (vid) + *vid = vol_id; + if (sqnum) + *sqnum = be64_to_cpu(vidh->sqnum); + if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) { + int lnum = be32_to_cpu(vidh->lnum); + + /* Unsupported internal volume */ + switch (vidh->compat) { + case UBI_COMPAT_DELETE: + if (vol_id != UBI_FM_SB_VOLUME_ID + && vol_id != UBI_FM_DATA_VOLUME_ID) { + ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it", + vol_id, lnum); + } + err = add_to_list(ai, pnum, vol_id, lnum, + ec, 1, &ai->erase); + if (err) + return err; + return 0; + + case UBI_COMPAT_RO: + ubi_msg(ubi, "read-only compatible internal volume %d:%d found, switch to read-only mode", + vol_id, lnum); + ubi->ro_mode = 1; + break; + + case UBI_COMPAT_PRESERVE: + ubi_msg(ubi, "\"preserve\" compatible internal volume %d:%d found", + vol_id, lnum); + err = add_to_list(ai, pnum, vol_id, lnum, + ec, 0, &ai->alien); + if (err) + return err; + return 0; + + case UBI_COMPAT_REJECT: + ubi_err(ubi, "incompatible internal volume %d:%d found", + vol_id, lnum); + return -EINVAL; + } + } + + if (ec_err) + ubi_warn(ubi, "valid VID header but corrupted EC header at PEB %d", + pnum); + err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips); + if (err) + return err; + +adjust_mean_ec: + if (!ec_err) { + ai->ec_sum += ec; + ai->ec_count += 1; + if (ec > ai->max_ec) + ai->max_ec = ec; + if (ec < ai->min_ec) + ai->min_ec = ec; + } + + return 0; +} + +/** + * late_analysis - analyze the overall situation with PEB. + * @ubi: UBI device description object + * @ai: attaching information + * + * This is a helper function which takes a look what PEBs we have after we + * gather information about all of them ("ai" is compete). It decides whether + * the flash is empty and should be formatted of whether there are too many + * corrupted PEBs and we should not attach this MTD device. Returns zero if we + * should proceed with attaching the MTD device, and %-EINVAL if we should not. + */ +static int late_analysis(struct ubi_device *ubi, struct ubi_attach_info *ai) +{ + struct ubi_ainf_peb *aeb; + int max_corr, peb_count; + + peb_count = ubi->peb_count - ai->bad_peb_count - ai->alien_peb_count; + max_corr = peb_count / 20 ?: 8; + + /* + * Few corrupted PEBs is not a problem and may be just a result of + * unclean reboots. However, many of them may indicate some problems + * with the flash HW or driver. + */ + if (ai->corr_peb_count) { + ubi_err(ubi, "%d PEBs are corrupted and preserved", + ai->corr_peb_count); + pr_err("Corrupted PEBs are:"); + list_for_each_entry(aeb, &ai->corr, u.list) + pr_cont(" %d", aeb->pnum); + pr_cont("\n"); + + /* + * If too many PEBs are corrupted, we refuse attaching, + * otherwise, only print a warning. + */ + if (ai->corr_peb_count >= max_corr) { + ubi_err(ubi, "too many corrupted PEBs, refusing"); + return -EINVAL; + } + } + + if (ai->empty_peb_count + ai->maybe_bad_peb_count == peb_count) { + /* + * All PEBs are empty, or almost all - a couple PEBs look like + * they may be bad PEBs which were not marked as bad yet. + * + * This piece of code basically tries to distinguish between + * the following situations: + * + * 1. Flash is empty, but there are few bad PEBs, which are not + * marked as bad so far, and which were read with error. We + * want to go ahead and format this flash. While formatting, + * the faulty PEBs will probably be marked as bad. + * + * 2. Flash contains non-UBI data and we do not want to format + * it and destroy possibly important information. + */ + if (ai->maybe_bad_peb_count <= 2) { + ai->is_empty = 1; + ubi_msg(ubi, "empty MTD device detected"); + get_random_bytes(&ubi->image_seq, + sizeof(ubi->image_seq)); + } else { + ubi_err(ubi, "MTD device is not UBI-formatted and possibly contains non-UBI data - refusing it"); + return -EINVAL; + } + + } + + return 0; +} + +/** + * destroy_av - free volume attaching information. + * @av: volume attaching information + * @ai: attaching information + * + * This function destroys the volume attaching information. + */ +static void destroy_av(struct ubi_attach_info *ai, struct ubi_ainf_volume *av) +{ + struct ubi_ainf_peb *aeb; + struct rb_node *this = av->root.rb_node; + + while (this) { + if (this->rb_left) + this = this->rb_left; + else if (this->rb_right) + this = this->rb_right; + else { + aeb = rb_entry(this, struct ubi_ainf_peb, u.rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &aeb->u.rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + + kmem_cache_free(ai->aeb_slab_cache, aeb); + } + } + kfree(av); +} + +/** + * destroy_ai - destroy attaching information. + * @ai: attaching information + */ +static void destroy_ai(struct ubi_attach_info *ai) +{ + struct ubi_ainf_peb *aeb, *aeb_tmp; + struct ubi_ainf_volume *av; + struct rb_node *rb; + + list_for_each_entry_safe(aeb, aeb_tmp, &ai->alien, u.list) { + list_del(&aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, aeb); + } + list_for_each_entry_safe(aeb, aeb_tmp, &ai->erase, u.list) { + list_del(&aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, aeb); + } + list_for_each_entry_safe(aeb, aeb_tmp, &ai->corr, u.list) { + list_del(&aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, aeb); + } + list_for_each_entry_safe(aeb, aeb_tmp, &ai->free, u.list) { + list_del(&aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, aeb); + } + + /* Destroy the volume RB-tree */ + rb = ai->volumes.rb_node; + while (rb) { + if (rb->rb_left) + rb = rb->rb_left; + else if (rb->rb_right) + rb = rb->rb_right; + else { + av = rb_entry(rb, struct ubi_ainf_volume, rb); + + rb = rb_parent(rb); + if (rb) { + if (rb->rb_left == &av->rb) + rb->rb_left = NULL; + else + rb->rb_right = NULL; + } + + destroy_av(ai, av); + } + } + + if (ai->aeb_slab_cache) + kmem_cache_destroy(ai->aeb_slab_cache); + + kfree(ai); +} + +/** + * scan_all - scan entire MTD device. + * @ubi: UBI device description object + * @ai: attach info object + * @start: start scanning at this PEB + * + * This function does full scanning of an MTD device and returns complete + * information about it in form of a "struct ubi_attach_info" object. In case + * of failure, an error code is returned. + */ +static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai, + int start) +{ + int err, pnum; + struct rb_node *rb1, *rb2; + struct ubi_ainf_volume *av; + struct ubi_ainf_peb *aeb; + + err = -ENOMEM; + + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) + return err; + + vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vidh) + goto out_ech; + + for (pnum = start; pnum < ubi->peb_count; pnum++) { + cond_resched(); + + dbg_gen("process PEB %d", pnum); + err = scan_peb(ubi, ai, pnum, NULL, NULL); + if (err < 0) + goto out_vidh; + } + + ubi_msg(ubi, "scanning is finished"); + + /* Calculate mean erase counter */ + if (ai->ec_count) + ai->mean_ec = div_u64(ai->ec_sum, ai->ec_count); + + err = late_analysis(ubi, ai); + if (err) + goto out_vidh; + + /* + * In case of unknown erase counter we use the mean erase counter + * value. + */ + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) + if (aeb->ec == UBI_UNKNOWN) + aeb->ec = ai->mean_ec; + } + + list_for_each_entry(aeb, &ai->free, u.list) { + if (aeb->ec == UBI_UNKNOWN) + aeb->ec = ai->mean_ec; + } + + list_for_each_entry(aeb, &ai->corr, u.list) + if (aeb->ec == UBI_UNKNOWN) + aeb->ec = ai->mean_ec; + + list_for_each_entry(aeb, &ai->erase, u.list) + if (aeb->ec == UBI_UNKNOWN) + aeb->ec = ai->mean_ec; + + err = self_check_ai(ubi, ai); + if (err) + goto out_vidh; + + ubi_free_vid_hdr(ubi, vidh); + kfree(ech); + + return 0; + +out_vidh: + ubi_free_vid_hdr(ubi, vidh); +out_ech: + kfree(ech); + return err; +} + +static struct ubi_attach_info *alloc_ai(void) +{ + struct ubi_attach_info *ai; + + ai = kzalloc(sizeof(struct ubi_attach_info), GFP_KERNEL); + if (!ai) + return ai; + + INIT_LIST_HEAD(&ai->corr); + INIT_LIST_HEAD(&ai->free); + INIT_LIST_HEAD(&ai->erase); + INIT_LIST_HEAD(&ai->alien); + ai->volumes = RB_ROOT; + ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache", + sizeof(struct ubi_ainf_peb), + 0, 0, NULL); + if (!ai->aeb_slab_cache) { + kfree(ai); + ai = NULL; + } + + return ai; +} + +#ifdef CONFIG_MTD_UBI_FASTMAP + +/** + * scan_fastmap - try to find a fastmap and attach from it. + * @ubi: UBI device description object + * @ai: attach info object + * + * Returns 0 on success, negative return values indicate an internal + * error. + * UBI_NO_FASTMAP denotes that no fastmap was found. + * UBI_BAD_FASTMAP denotes that the found fastmap was invalid. + */ +static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai) +{ + int err, pnum, fm_anchor = -1; + unsigned long long max_sqnum = 0; + + err = -ENOMEM; + + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) + goto out; + + vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vidh) + goto out_ech; + + for (pnum = 0; pnum < UBI_FM_MAX_START; pnum++) { + int vol_id = -1; + unsigned long long sqnum = -1; + cond_resched(); + + dbg_gen("process PEB %d", pnum); + err = scan_peb(ubi, *ai, pnum, &vol_id, &sqnum); + if (err < 0) + goto out_vidh; + + if (vol_id == UBI_FM_SB_VOLUME_ID && sqnum > max_sqnum) { + max_sqnum = sqnum; + fm_anchor = pnum; + } + } + + ubi_free_vid_hdr(ubi, vidh); + kfree(ech); + + if (fm_anchor < 0) + return UBI_NO_FASTMAP; + + destroy_ai(*ai); + *ai = alloc_ai(); + if (!*ai) + return -ENOMEM; + + return ubi_scan_fastmap(ubi, *ai, fm_anchor); + +out_vidh: + ubi_free_vid_hdr(ubi, vidh); +out_ech: + kfree(ech); +out: + return err; +} + +#endif + +/** + * ubi_attach - attach an MTD device. + * @ubi: UBI device descriptor + * @force_scan: if set to non-zero attach by scanning + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_attach(struct ubi_device *ubi, int force_scan) +{ + int err; + struct ubi_attach_info *ai; + + ai = alloc_ai(); + if (!ai) + return -ENOMEM; + +#ifdef CONFIG_MTD_UBI_FASTMAP + /* On small flash devices we disable fastmap in any case. */ + if ((int)mtd_div_by_eb(ubi->mtd->size, ubi->mtd) <= UBI_FM_MAX_START) { + ubi->fm_disabled = 1; + force_scan = 1; + } + + if (force_scan) + err = scan_all(ubi, ai, 0); + else { + err = scan_fast(ubi, &ai); + if (err > 0 || mtd_is_eccerr(err)) { + if (err != UBI_NO_FASTMAP) { + destroy_ai(ai); + ai = alloc_ai(); + if (!ai) + return -ENOMEM; + + err = scan_all(ubi, ai, 0); + } else { + err = scan_all(ubi, ai, UBI_FM_MAX_START); + } + } + } +#else + err = scan_all(ubi, ai, 0); +#endif + if (err) + goto out_ai; + + ubi->bad_peb_count = ai->bad_peb_count; + ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count; + ubi->corr_peb_count = ai->corr_peb_count; + ubi->max_ec = ai->max_ec; + ubi->mean_ec = ai->mean_ec; + dbg_gen("max. sequence number: %llu", ai->max_sqnum); + + err = ubi_read_volume_table(ubi, ai); + if (err) + goto out_ai; + + err = ubi_wl_init(ubi, ai); + if (err) + goto out_vtbl; + + err = ubi_eba_init(ubi, ai); + if (err) + goto out_wl; + +#ifdef CONFIG_MTD_UBI_FASTMAP + if (ubi->fm && ubi_dbg_chk_fastmap(ubi)) { + struct ubi_attach_info *scan_ai; + + scan_ai = alloc_ai(); + if (!scan_ai) { + err = -ENOMEM; + goto out_wl; + } + + err = scan_all(ubi, scan_ai, 0); + if (err) { + destroy_ai(scan_ai); + goto out_wl; + } + + err = self_check_eba(ubi, ai, scan_ai); + destroy_ai(scan_ai); + + if (err) + goto out_wl; + } +#endif + + destroy_ai(ai); + return 0; + +out_wl: + ubi_wl_close(ubi); +out_vtbl: + ubi_free_internal_volumes(ubi); + vfree(ubi->vtbl); +out_ai: + destroy_ai(ai); + return err; +} + +/** + * self_check_ai - check the attaching information. + * @ubi: UBI device description object + * @ai: attaching information + * + * This function returns zero if the attaching information is all right, and a + * negative error code if not or if an error occurred. + */ +static int self_check_ai(struct ubi_device *ubi, struct ubi_attach_info *ai) +{ + int pnum, err, vols_found = 0; + struct rb_node *rb1, *rb2; + struct ubi_ainf_volume *av; + struct ubi_ainf_peb *aeb, *last_aeb; + uint8_t *buf; + + if (!ubi_dbg_chk_gen(ubi)) + return 0; + + /* + * At first, check that attaching information is OK. + */ + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { + int leb_count = 0; + + cond_resched(); + + vols_found += 1; + + if (ai->is_empty) { + ubi_err(ubi, "bad is_empty flag"); + goto bad_av; + } + + if (av->vol_id < 0 || av->highest_lnum < 0 || + av->leb_count < 0 || av->vol_type < 0 || av->used_ebs < 0 || + av->data_pad < 0 || av->last_data_size < 0) { + ubi_err(ubi, "negative values"); + goto bad_av; + } + + if (av->vol_id >= UBI_MAX_VOLUMES && + av->vol_id < UBI_INTERNAL_VOL_START) { + ubi_err(ubi, "bad vol_id"); + goto bad_av; + } + + if (av->vol_id > ai->highest_vol_id) { + ubi_err(ubi, "highest_vol_id is %d, but vol_id %d is there", + ai->highest_vol_id, av->vol_id); + goto out; + } + + if (av->vol_type != UBI_DYNAMIC_VOLUME && + av->vol_type != UBI_STATIC_VOLUME) { + ubi_err(ubi, "bad vol_type"); + goto bad_av; + } + + if (av->data_pad > ubi->leb_size / 2) { + ubi_err(ubi, "bad data_pad"); + goto bad_av; + } + + last_aeb = NULL; + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) { + cond_resched(); + + last_aeb = aeb; + leb_count += 1; + + if (aeb->pnum < 0 || aeb->ec < 0) { + ubi_err(ubi, "negative values"); + goto bad_aeb; + } + + if (aeb->ec < ai->min_ec) { + ubi_err(ubi, "bad ai->min_ec (%d), %d found", + ai->min_ec, aeb->ec); + goto bad_aeb; + } + + if (aeb->ec > ai->max_ec) { + ubi_err(ubi, "bad ai->max_ec (%d), %d found", + ai->max_ec, aeb->ec); + goto bad_aeb; + } + + if (aeb->pnum >= ubi->peb_count) { + ubi_err(ubi, "too high PEB number %d, total PEBs %d", + aeb->pnum, ubi->peb_count); + goto bad_aeb; + } + + if (av->vol_type == UBI_STATIC_VOLUME) { + if (aeb->lnum >= av->used_ebs) { + ubi_err(ubi, "bad lnum or used_ebs"); + goto bad_aeb; + } + } else { + if (av->used_ebs != 0) { + ubi_err(ubi, "non-zero used_ebs"); + goto bad_aeb; + } + } + + if (aeb->lnum > av->highest_lnum) { + ubi_err(ubi, "incorrect highest_lnum or lnum"); + goto bad_aeb; + } + } + + if (av->leb_count != leb_count) { + ubi_err(ubi, "bad leb_count, %d objects in the tree", + leb_count); + goto bad_av; + } + + if (!last_aeb) + continue; + + aeb = last_aeb; + + if (aeb->lnum != av->highest_lnum) { + ubi_err(ubi, "bad highest_lnum"); + goto bad_aeb; + } + } + + if (vols_found != ai->vols_found) { + ubi_err(ubi, "bad ai->vols_found %d, should be %d", + ai->vols_found, vols_found); + goto out; + } + + /* Check that attaching information is correct */ + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { + last_aeb = NULL; + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) { + int vol_type; + + cond_resched(); + + last_aeb = aeb; + + err = ubi_io_read_vid_hdr(ubi, aeb->pnum, vidh, 1); + if (err && err != UBI_IO_BITFLIPS) { + ubi_err(ubi, "VID header is not OK (%d)", + err); + if (err > 0) + err = -EIO; + return err; + } + + vol_type = vidh->vol_type == UBI_VID_DYNAMIC ? + UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; + if (av->vol_type != vol_type) { + ubi_err(ubi, "bad vol_type"); + goto bad_vid_hdr; + } + + if (aeb->sqnum != be64_to_cpu(vidh->sqnum)) { + ubi_err(ubi, "bad sqnum %llu", aeb->sqnum); + goto bad_vid_hdr; + } + + if (av->vol_id != be32_to_cpu(vidh->vol_id)) { + ubi_err(ubi, "bad vol_id %d", av->vol_id); + goto bad_vid_hdr; + } + + if (av->compat != vidh->compat) { + ubi_err(ubi, "bad compat %d", vidh->compat); + goto bad_vid_hdr; + } + + if (aeb->lnum != be32_to_cpu(vidh->lnum)) { + ubi_err(ubi, "bad lnum %d", aeb->lnum); + goto bad_vid_hdr; + } + + if (av->used_ebs != be32_to_cpu(vidh->used_ebs)) { + ubi_err(ubi, "bad used_ebs %d", av->used_ebs); + goto bad_vid_hdr; + } + + if (av->data_pad != be32_to_cpu(vidh->data_pad)) { + ubi_err(ubi, "bad data_pad %d", av->data_pad); + goto bad_vid_hdr; + } + } + + if (!last_aeb) + continue; + + if (av->highest_lnum != be32_to_cpu(vidh->lnum)) { + ubi_err(ubi, "bad highest_lnum %d", av->highest_lnum); + goto bad_vid_hdr; + } + + if (av->last_data_size != be32_to_cpu(vidh->data_size)) { + ubi_err(ubi, "bad last_data_size %d", + av->last_data_size); + goto bad_vid_hdr; + } + } + + /* + * Make sure that all the physical eraseblocks are in one of the lists + * or trees. + */ + buf = kzalloc(ubi->peb_count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (pnum = 0; pnum < ubi->peb_count; pnum++) { + err = ubi_io_is_bad(ubi, pnum); + if (err < 0) { + kfree(buf); + return err; + } else if (err) + buf[pnum] = 1; + } + + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) + buf[aeb->pnum] = 1; + + list_for_each_entry(aeb, &ai->free, u.list) + buf[aeb->pnum] = 1; + + list_for_each_entry(aeb, &ai->corr, u.list) + buf[aeb->pnum] = 1; + + list_for_each_entry(aeb, &ai->erase, u.list) + buf[aeb->pnum] = 1; + + list_for_each_entry(aeb, &ai->alien, u.list) + buf[aeb->pnum] = 1; + + err = 0; + for (pnum = 0; pnum < ubi->peb_count; pnum++) + if (!buf[pnum]) { + ubi_err(ubi, "PEB %d is not referred", pnum); + err = 1; + } + + kfree(buf); + if (err) + goto out; + return 0; + +bad_aeb: + ubi_err(ubi, "bad attaching information about LEB %d", aeb->lnum); + ubi_dump_aeb(aeb, 0); + ubi_dump_av(av); + goto out; + +bad_av: + ubi_err(ubi, "bad attaching information about volume %d", av->vol_id); + ubi_dump_av(av); + goto out; + +bad_vid_hdr: + ubi_err(ubi, "bad attaching information about volume %d", av->vol_id); + ubi_dump_av(av); + ubi_dump_vid_hdr(vidh); + +out: + dump_stack(); + return -EINVAL; +} diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c new file mode 100644 index 000000000..c9eb78f10 --- /dev/null +++ b/drivers/mtd/ubi/block.c @@ -0,0 +1,669 @@ +/* + * Copyright (c) 2014 Ezequiel Garcia + * Copyright (c) 2011 Free Electrons + * + * Driver parameter handling strongly based on drivers/mtd/ubi/build.c + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2007 + * Authors: Artem Bityutskiy, Frank Haverkamp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + */ + +/* + * Read-only block devices on top of UBI volumes + * + * A simple implementation to allow a block device to be layered on top of a + * UBI volume. The implementation is provided by creating a static 1-to-1 + * mapping between the block device and the UBI volume. + * + * The addressed byte is obtained from the addressed block sector, which is + * mapped linearly into the corresponding LEB: + * + * LEB number = addressed byte / LEB size + * + * This feature is compiled in the UBI core, and adds a 'block' parameter + * to allow early creation of block devices on top of UBI volumes. Runtime + * block creation/removal for UBI volumes is provided through two UBI ioctls: + * UBI_IOCVOLCRBLK and UBI_IOCVOLRMBLK. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/mtd/ubi.h> +#include <linux/workqueue.h> +#include <linux/blkdev.h> +#include <linux/blk-mq.h> +#include <linux/hdreg.h> +#include <linux/scatterlist.h> +#include <asm/div64.h> + +#include "ubi-media.h" +#include "ubi.h" + +/* Maximum number of supported devices */ +#define UBIBLOCK_MAX_DEVICES 32 + +/* Maximum length of the 'block=' parameter */ +#define UBIBLOCK_PARAM_LEN 63 + +/* Maximum number of comma-separated items in the 'block=' parameter */ +#define UBIBLOCK_PARAM_COUNT 2 + +struct ubiblock_param { + int ubi_num; + int vol_id; + char name[UBIBLOCK_PARAM_LEN+1]; +}; + +struct ubiblock_pdu { + struct work_struct work; + struct ubi_sgl usgl; +}; + +/* Numbers of elements set in the @ubiblock_param array */ +static int ubiblock_devs __initdata; + +/* MTD devices specification parameters */ +static struct ubiblock_param ubiblock_param[UBIBLOCK_MAX_DEVICES] __initdata; + +struct ubiblock { + struct ubi_volume_desc *desc; + int ubi_num; + int vol_id; + int refcnt; + int leb_size; + + struct gendisk *gd; + struct request_queue *rq; + + struct workqueue_struct *wq; + + struct mutex dev_mutex; + struct list_head list; + struct blk_mq_tag_set tag_set; +}; + +/* Linked list of all ubiblock instances */ +static LIST_HEAD(ubiblock_devices); +static DEFINE_MUTEX(devices_mutex); +static int ubiblock_major; + +static int __init ubiblock_set_param(const char *val, + const struct kernel_param *kp) +{ + int i, ret; + size_t len; + struct ubiblock_param *param; + char buf[UBIBLOCK_PARAM_LEN]; + char *pbuf = &buf[0]; + char *tokens[UBIBLOCK_PARAM_COUNT]; + + if (!val) + return -EINVAL; + + len = strnlen(val, UBIBLOCK_PARAM_LEN); + if (len == 0) { + pr_warn("UBI: block: empty 'block=' parameter - ignored\n"); + return 0; + } + + if (len == UBIBLOCK_PARAM_LEN) { + pr_err("UBI: block: parameter \"%s\" is too long, max. is %d\n", + val, UBIBLOCK_PARAM_LEN); + return -EINVAL; + } + + strcpy(buf, val); + + /* Get rid of the final newline */ + if (buf[len - 1] == '\n') + buf[len - 1] = '\0'; + + for (i = 0; i < UBIBLOCK_PARAM_COUNT; i++) + tokens[i] = strsep(&pbuf, ","); + + param = &ubiblock_param[ubiblock_devs]; + if (tokens[1]) { + /* Two parameters: can be 'ubi, vol_id' or 'ubi, vol_name' */ + ret = kstrtoint(tokens[0], 10, ¶m->ubi_num); + if (ret < 0) + return -EINVAL; + + /* Second param can be a number or a name */ + ret = kstrtoint(tokens[1], 10, ¶m->vol_id); + if (ret < 0) { + param->vol_id = -1; + strcpy(param->name, tokens[1]); + } + + } else { + /* One parameter: must be device path */ + strcpy(param->name, tokens[0]); + param->ubi_num = -1; + param->vol_id = -1; + } + + ubiblock_devs++; + + return 0; +} + +static struct kernel_param_ops ubiblock_param_ops = { + .set = ubiblock_set_param, +}; +module_param_cb(block, &ubiblock_param_ops, NULL, 0); +MODULE_PARM_DESC(block, "Attach block devices to UBI volumes. Parameter format: block=<path|dev,num|dev,name>.\n" + "Multiple \"block\" parameters may be specified.\n" + "UBI volumes may be specified by their number, name, or path to the device node.\n" + "Examples\n" + "Using the UBI volume path:\n" + "ubi.block=/dev/ubi0_0\n" + "Using the UBI device, and the volume name:\n" + "ubi.block=0,rootfs\n" + "Using both UBI device number and UBI volume number:\n" + "ubi.block=0,0\n"); + +static struct ubiblock *find_dev_nolock(int ubi_num, int vol_id) +{ + struct ubiblock *dev; + + list_for_each_entry(dev, &ubiblock_devices, list) + if (dev->ubi_num == ubi_num && dev->vol_id == vol_id) + return dev; + return NULL; +} + +static int ubiblock_read(struct ubiblock_pdu *pdu) +{ + int ret, leb, offset, bytes_left, to_read; + u64 pos; + struct request *req = blk_mq_rq_from_pdu(pdu); + struct ubiblock *dev = req->q->queuedata; + + to_read = blk_rq_bytes(req); + pos = blk_rq_pos(req) << 9; + + /* Get LEB:offset address to read from */ + offset = do_div(pos, dev->leb_size); + leb = pos; + bytes_left = to_read; + + while (bytes_left) { + /* + * We can only read one LEB at a time. Therefore if the read + * length is larger than one LEB size, we split the operation. + */ + if (offset + to_read > dev->leb_size) + to_read = dev->leb_size - offset; + + ret = ubi_read_sg(dev->desc, leb, &pdu->usgl, offset, to_read); + if (ret < 0) + return ret; + + bytes_left -= to_read; + to_read = bytes_left; + leb += 1; + offset = 0; + } + return 0; +} + +static int ubiblock_open(struct block_device *bdev, fmode_t mode) +{ + struct ubiblock *dev = bdev->bd_disk->private_data; + int ret; + + mutex_lock(&dev->dev_mutex); + if (dev->refcnt > 0) { + /* + * The volume is already open, just increase the reference + * counter. + */ + goto out_done; + } + + /* + * We want users to be aware they should only mount us as read-only. + * It's just a paranoid check, as write requests will get rejected + * in any case. + */ + if (mode & FMODE_WRITE) { + ret = -EPERM; + goto out_unlock; + } + + dev->desc = ubi_open_volume(dev->ubi_num, dev->vol_id, UBI_READONLY); + if (IS_ERR(dev->desc)) { + dev_err(disk_to_dev(dev->gd), "failed to open ubi volume %d_%d", + dev->ubi_num, dev->vol_id); + ret = PTR_ERR(dev->desc); + dev->desc = NULL; + goto out_unlock; + } + +out_done: + dev->refcnt++; + mutex_unlock(&dev->dev_mutex); + return 0; + +out_unlock: + mutex_unlock(&dev->dev_mutex); + return ret; +} + +static void ubiblock_release(struct gendisk *gd, fmode_t mode) +{ + struct ubiblock *dev = gd->private_data; + + mutex_lock(&dev->dev_mutex); + dev->refcnt--; + if (dev->refcnt == 0) { + ubi_close_volume(dev->desc); + dev->desc = NULL; + } + mutex_unlock(&dev->dev_mutex); +} + +static int ubiblock_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + /* Some tools might require this information */ + geo->heads = 1; + geo->cylinders = 1; + geo->sectors = get_capacity(bdev->bd_disk); + geo->start = 0; + return 0; +} + +static const struct block_device_operations ubiblock_ops = { + .owner = THIS_MODULE, + .open = ubiblock_open, + .release = ubiblock_release, + .getgeo = ubiblock_getgeo, +}; + +static void ubiblock_do_work(struct work_struct *work) +{ + int ret; + struct ubiblock_pdu *pdu = container_of(work, struct ubiblock_pdu, work); + struct request *req = blk_mq_rq_from_pdu(pdu); + + blk_mq_start_request(req); + + /* + * It is safe to ignore the return value of blk_rq_map_sg() because + * the number of sg entries is limited to UBI_MAX_SG_COUNT + * and ubi_read_sg() will check that limit. + */ + blk_rq_map_sg(req->q, req, pdu->usgl.sg); + + ret = ubiblock_read(pdu); + rq_flush_dcache_pages(req); + + blk_mq_end_request(req, ret); +} + +static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct request *req = bd->rq; + struct ubiblock *dev = hctx->queue->queuedata; + struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req); + + if (req->cmd_type != REQ_TYPE_FS) + return BLK_MQ_RQ_QUEUE_ERROR; + + if (rq_data_dir(req) != READ) + return BLK_MQ_RQ_QUEUE_ERROR; /* Write not implemented */ + + ubi_sgl_init(&pdu->usgl); + queue_work(dev->wq, &pdu->work); + + return BLK_MQ_RQ_QUEUE_OK; +} + +static int ubiblock_init_request(void *data, struct request *req, + unsigned int hctx_idx, + unsigned int request_idx, + unsigned int numa_node) +{ + struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req); + + sg_init_table(pdu->usgl.sg, UBI_MAX_SG_COUNT); + INIT_WORK(&pdu->work, ubiblock_do_work); + + return 0; +} + +static struct blk_mq_ops ubiblock_mq_ops = { + .queue_rq = ubiblock_queue_rq, + .init_request = ubiblock_init_request, + .map_queue = blk_mq_map_queue, +}; + +int ubiblock_create(struct ubi_volume_info *vi) +{ + struct ubiblock *dev; + struct gendisk *gd; + u64 disk_capacity = vi->used_bytes >> 9; + int ret; + + if ((sector_t)disk_capacity != disk_capacity) + return -EFBIG; + /* Check that the volume isn't already handled */ + mutex_lock(&devices_mutex); + if (find_dev_nolock(vi->ubi_num, vi->vol_id)) { + mutex_unlock(&devices_mutex); + return -EEXIST; + } + mutex_unlock(&devices_mutex); + + dev = kzalloc(sizeof(struct ubiblock), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + mutex_init(&dev->dev_mutex); + + dev->ubi_num = vi->ubi_num; + dev->vol_id = vi->vol_id; + dev->leb_size = vi->usable_leb_size; + + /* Initialize the gendisk of this ubiblock device */ + gd = alloc_disk(1); + if (!gd) { + pr_err("UBI: block: alloc_disk failed"); + ret = -ENODEV; + goto out_free_dev; + } + + gd->fops = &ubiblock_ops; + gd->major = ubiblock_major; + gd->first_minor = dev->ubi_num * UBI_MAX_VOLUMES + dev->vol_id; + gd->private_data = dev; + sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id); + set_capacity(gd, disk_capacity); + dev->gd = gd; + + dev->tag_set.ops = &ubiblock_mq_ops; + dev->tag_set.queue_depth = 64; + dev->tag_set.numa_node = NUMA_NO_NODE; + dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + dev->tag_set.cmd_size = sizeof(struct ubiblock_pdu); + dev->tag_set.driver_data = dev; + dev->tag_set.nr_hw_queues = 1; + + ret = blk_mq_alloc_tag_set(&dev->tag_set); + if (ret) { + dev_err(disk_to_dev(dev->gd), "blk_mq_alloc_tag_set failed"); + goto out_put_disk; + } + + dev->rq = blk_mq_init_queue(&dev->tag_set); + if (IS_ERR(dev->rq)) { + dev_err(disk_to_dev(gd), "blk_mq_init_queue failed"); + ret = PTR_ERR(dev->rq); + goto out_free_tags; + } + blk_queue_max_segments(dev->rq, UBI_MAX_SG_COUNT); + + dev->rq->queuedata = dev; + dev->gd->queue = dev->rq; + + /* + * Create one workqueue per volume (per registered block device). + * Rembember workqueues are cheap, they're not threads. + */ + dev->wq = alloc_workqueue("%s", 0, 0, gd->disk_name); + if (!dev->wq) { + ret = -ENOMEM; + goto out_free_queue; + } + + mutex_lock(&devices_mutex); + list_add_tail(&dev->list, &ubiblock_devices); + mutex_unlock(&devices_mutex); + + /* Must be the last step: anyone can call file ops from now on */ + add_disk(dev->gd); + dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)", + dev->ubi_num, dev->vol_id, vi->name); + return 0; + +out_free_queue: + blk_cleanup_queue(dev->rq); +out_free_tags: + blk_mq_free_tag_set(&dev->tag_set); +out_put_disk: + put_disk(dev->gd); +out_free_dev: + kfree(dev); + + return ret; +} + +static void ubiblock_cleanup(struct ubiblock *dev) +{ + /* Stop new requests to arrive */ + del_gendisk(dev->gd); + /* Flush pending work */ + destroy_workqueue(dev->wq); + /* Finally destroy the blk queue */ + blk_cleanup_queue(dev->rq); + blk_mq_free_tag_set(&dev->tag_set); + dev_info(disk_to_dev(dev->gd), "released"); + put_disk(dev->gd); +} + +int ubiblock_remove(struct ubi_volume_info *vi) +{ + struct ubiblock *dev; + + mutex_lock(&devices_mutex); + dev = find_dev_nolock(vi->ubi_num, vi->vol_id); + if (!dev) { + mutex_unlock(&devices_mutex); + return -ENODEV; + } + + /* Found a device, let's lock it so we can check if it's busy */ + mutex_lock(&dev->dev_mutex); + if (dev->refcnt > 0) { + mutex_unlock(&dev->dev_mutex); + mutex_unlock(&devices_mutex); + return -EBUSY; + } + + /* Remove from device list */ + list_del(&dev->list); + mutex_unlock(&devices_mutex); + + ubiblock_cleanup(dev); + mutex_unlock(&dev->dev_mutex); + kfree(dev); + return 0; +} + +static int ubiblock_resize(struct ubi_volume_info *vi) +{ + struct ubiblock *dev; + u64 disk_capacity = vi->used_bytes >> 9; + + /* + * Need to lock the device list until we stop using the device, + * otherwise the device struct might get released in + * 'ubiblock_remove()'. + */ + mutex_lock(&devices_mutex); + dev = find_dev_nolock(vi->ubi_num, vi->vol_id); + if (!dev) { + mutex_unlock(&devices_mutex); + return -ENODEV; + } + if ((sector_t)disk_capacity != disk_capacity) { + mutex_unlock(&devices_mutex); + dev_warn(disk_to_dev(dev->gd), "the volume is too big (%d LEBs), cannot resize", + vi->size); + return -EFBIG; + } + + mutex_lock(&dev->dev_mutex); + + if (get_capacity(dev->gd) != disk_capacity) { + set_capacity(dev->gd, disk_capacity); + dev_info(disk_to_dev(dev->gd), "resized to %lld bytes", + vi->used_bytes); + } + mutex_unlock(&dev->dev_mutex); + mutex_unlock(&devices_mutex); + return 0; +} + +static int ubiblock_notify(struct notifier_block *nb, + unsigned long notification_type, void *ns_ptr) +{ + struct ubi_notification *nt = ns_ptr; + + switch (notification_type) { + case UBI_VOLUME_ADDED: + /* + * We want to enforce explicit block device creation for + * volumes, so when a volume is added we do nothing. + */ + break; + case UBI_VOLUME_REMOVED: + ubiblock_remove(&nt->vi); + break; + case UBI_VOLUME_RESIZED: + ubiblock_resize(&nt->vi); + break; + case UBI_VOLUME_UPDATED: + /* + * If the volume is static, a content update might mean the + * size (i.e. used_bytes) was also changed. + */ + if (nt->vi.vol_type == UBI_STATIC_VOLUME) + ubiblock_resize(&nt->vi); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block ubiblock_notifier = { + .notifier_call = ubiblock_notify, +}; + +static struct ubi_volume_desc * __init +open_volume_desc(const char *name, int ubi_num, int vol_id) +{ + if (ubi_num == -1) + /* No ubi num, name must be a vol device path */ + return ubi_open_volume_path(name, UBI_READONLY); + else if (vol_id == -1) + /* No vol_id, must be vol_name */ + return ubi_open_volume_nm(ubi_num, name, UBI_READONLY); + else + return ubi_open_volume(ubi_num, vol_id, UBI_READONLY); +} + +static void __init ubiblock_create_from_param(void) +{ + int i, ret = 0; + struct ubiblock_param *p; + struct ubi_volume_desc *desc; + struct ubi_volume_info vi; + + /* + * If there is an error creating one of the ubiblocks, continue on to + * create the following ubiblocks. This helps in a circumstance where + * the kernel command-line specifies multiple block devices and some + * may be broken, but we still want the working ones to come up. + */ + for (i = 0; i < ubiblock_devs; i++) { + p = &ubiblock_param[i]; + + desc = open_volume_desc(p->name, p->ubi_num, p->vol_id); + if (IS_ERR(desc)) { + pr_err( + "UBI: block: can't open volume on ubi%d_%d, err=%ld", + p->ubi_num, p->vol_id, PTR_ERR(desc)); + continue; + } + + ubi_get_volume_info(desc, &vi); + ubi_close_volume(desc); + + ret = ubiblock_create(&vi); + if (ret) { + pr_err( + "UBI: block: can't add '%s' volume on ubi%d_%d, err=%d", + vi.name, p->ubi_num, p->vol_id, ret); + continue; + } + } +} + +static void ubiblock_remove_all(void) +{ + struct ubiblock *next; + struct ubiblock *dev; + + list_for_each_entry_safe(dev, next, &ubiblock_devices, list) { + /* The module is being forcefully removed */ + WARN_ON(dev->desc); + /* Remove from device list */ + list_del(&dev->list); + ubiblock_cleanup(dev); + kfree(dev); + } +} + +int __init ubiblock_init(void) +{ + int ret; + + ubiblock_major = register_blkdev(0, "ubiblock"); + if (ubiblock_major < 0) + return ubiblock_major; + + /* + * Attach block devices from 'block=' module param. + * Even if one block device in the param list fails to come up, + * still allow the module to load and leave any others up. + */ + ubiblock_create_from_param(); + + /* + * Block devices are only created upon user requests, so we ignore + * existing volumes. + */ + ret = ubi_register_volume_notifier(&ubiblock_notifier, 1); + if (ret) + goto err_unreg; + return 0; + +err_unreg: + unregister_blkdev(ubiblock_major, "ubiblock"); + ubiblock_remove_all(); + return ret; +} + +void __exit ubiblock_exit(void) +{ + ubi_unregister_volume_notifier(&ubiblock_notifier); + ubiblock_remove_all(); + unregister_blkdev(ubiblock_major, "ubiblock"); +} diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c new file mode 100644 index 000000000..b7f824d5e --- /dev/null +++ b/drivers/mtd/ubi/build.c @@ -0,0 +1,1515 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём), + * Frank Haverkamp + */ + +/* + * This file includes UBI initialization and building of UBI devices. + * + * When UBI is initialized, it attaches all the MTD devices specified as the + * module load parameters or the kernel boot parameters. If MTD devices were + * specified, UBI does not attach any MTD device, but it is possible to do + * later using the "UBI control device". + */ + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/stringify.h> +#include <linux/namei.h> +#include <linux/stat.h> +#include <linux/miscdevice.h> +#include <linux/mtd/partitions.h> +#include <linux/log2.h> +#include <linux/kthread.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/major.h> +#include "ubi.h" + +/* Maximum length of the 'mtd=' parameter */ +#define MTD_PARAM_LEN_MAX 64 + +/* Maximum number of comma-separated items in the 'mtd=' parameter */ +#define MTD_PARAM_MAX_COUNT 4 + +/* Maximum value for the number of bad PEBs per 1024 PEBs */ +#define MAX_MTD_UBI_BEB_LIMIT 768 + +#ifdef CONFIG_MTD_UBI_MODULE +#define ubi_is_module() 1 +#else +#define ubi_is_module() 0 +#endif + +/** + * struct mtd_dev_param - MTD device parameter description data structure. + * @name: MTD character device node path, MTD device name, or MTD device number + * string + * @vid_hdr_offs: VID header offset + * @max_beb_per1024: maximum expected number of bad PEBs per 1024 PEBs + */ +struct mtd_dev_param { + char name[MTD_PARAM_LEN_MAX]; + int ubi_num; + int vid_hdr_offs; + int max_beb_per1024; +}; + +/* Numbers of elements set in the @mtd_dev_param array */ +static int __initdata mtd_devs; + +/* MTD devices specification parameters */ +static struct mtd_dev_param __initdata mtd_dev_param[UBI_MAX_DEVICES]; +#ifdef CONFIG_MTD_UBI_FASTMAP +/* UBI module parameter to enable fastmap automatically on non-fastmap images */ +static bool fm_autoconvert; +static bool fm_debug; +#endif +/* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */ +struct class *ubi_class; + +/* Slab cache for wear-leveling entries */ +struct kmem_cache *ubi_wl_entry_slab; + +/* UBI control character device */ +static struct miscdevice ubi_ctrl_cdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "ubi_ctrl", + .fops = &ubi_ctrl_cdev_operations, +}; + +/* All UBI devices in system */ +static struct ubi_device *ubi_devices[UBI_MAX_DEVICES]; + +/* Serializes UBI devices creations and removals */ +DEFINE_MUTEX(ubi_devices_mutex); + +/* Protects @ubi_devices and @ubi->ref_count */ +static DEFINE_SPINLOCK(ubi_devices_lock); + +/* "Show" method for files in '/<sysfs>/class/ubi/' */ +static ssize_t ubi_version_show(struct class *class, + struct class_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", UBI_VERSION); +} + +/* UBI version attribute ('/<sysfs>/class/ubi/version') */ +static struct class_attribute ubi_version = + __ATTR(version, S_IRUGO, ubi_version_show, NULL); + +static ssize_t dev_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf); + +/* UBI device attributes (correspond to files in '/<sysfs>/class/ubi/ubiX') */ +static struct device_attribute dev_eraseblock_size = + __ATTR(eraseblock_size, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_avail_eraseblocks = + __ATTR(avail_eraseblocks, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_total_eraseblocks = + __ATTR(total_eraseblocks, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_volumes_count = + __ATTR(volumes_count, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_max_ec = + __ATTR(max_ec, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_reserved_for_bad = + __ATTR(reserved_for_bad, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_bad_peb_count = + __ATTR(bad_peb_count, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_max_vol_count = + __ATTR(max_vol_count, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_min_io_size = + __ATTR(min_io_size, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_bgt_enabled = + __ATTR(bgt_enabled, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_mtd_num = + __ATTR(mtd_num, S_IRUGO, dev_attribute_show, NULL); + +/** + * ubi_volume_notify - send a volume change notification. + * @ubi: UBI device description object + * @vol: volume description object of the changed volume + * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc) + * + * This is a helper function which notifies all subscribers about a volume + * change event (creation, removal, re-sizing, re-naming, updating). Returns + * zero in case of success and a negative error code in case of failure. + */ +int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, int ntype) +{ + int ret; + struct ubi_notification nt; + + ubi_do_get_device_info(ubi, &nt.di); + ubi_do_get_volume_info(ubi, vol, &nt.vi); + + switch (ntype) { + case UBI_VOLUME_ADDED: + case UBI_VOLUME_REMOVED: + case UBI_VOLUME_RESIZED: + case UBI_VOLUME_RENAMED: + ret = ubi_update_fastmap(ubi); + if (ret) + ubi_msg(ubi, "Unable to write a new fastmap: %i", ret); + } + + return blocking_notifier_call_chain(&ubi_notifiers, ntype, &nt); +} + +/** + * ubi_notify_all - send a notification to all volumes. + * @ubi: UBI device description object + * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc) + * @nb: the notifier to call + * + * This function walks all volumes of UBI device @ubi and sends the @ntype + * notification for each volume. If @nb is %NULL, then all registered notifiers + * are called, otherwise only the @nb notifier is called. Returns the number of + * sent notifications. + */ +int ubi_notify_all(struct ubi_device *ubi, int ntype, struct notifier_block *nb) +{ + struct ubi_notification nt; + int i, count = 0; + + ubi_do_get_device_info(ubi, &nt.di); + + mutex_lock(&ubi->device_mutex); + for (i = 0; i < ubi->vtbl_slots; i++) { + /* + * Since the @ubi->device is locked, and we are not going to + * change @ubi->volumes, we do not have to lock + * @ubi->volumes_lock. + */ + if (!ubi->volumes[i]) + continue; + + ubi_do_get_volume_info(ubi, ubi->volumes[i], &nt.vi); + if (nb) + nb->notifier_call(nb, ntype, &nt); + else + blocking_notifier_call_chain(&ubi_notifiers, ntype, + &nt); + count += 1; + } + mutex_unlock(&ubi->device_mutex); + + return count; +} + +/** + * ubi_enumerate_volumes - send "add" notification for all existing volumes. + * @nb: the notifier to call + * + * This function walks all UBI devices and volumes and sends the + * %UBI_VOLUME_ADDED notification for each volume. If @nb is %NULL, then all + * registered notifiers are called, otherwise only the @nb notifier is called. + * Returns the number of sent notifications. + */ +int ubi_enumerate_volumes(struct notifier_block *nb) +{ + int i, count = 0; + + /* + * Since the @ubi_devices_mutex is locked, and we are not going to + * change @ubi_devices, we do not have to lock @ubi_devices_lock. + */ + for (i = 0; i < UBI_MAX_DEVICES; i++) { + struct ubi_device *ubi = ubi_devices[i]; + + if (!ubi) + continue; + count += ubi_notify_all(ubi, UBI_VOLUME_ADDED, nb); + } + + return count; +} + +/** + * ubi_get_device - get UBI device. + * @ubi_num: UBI device number + * + * This function returns UBI device description object for UBI device number + * @ubi_num, or %NULL if the device does not exist. This function increases the + * device reference count to prevent removal of the device. In other words, the + * device cannot be removed if its reference count is not zero. + */ +struct ubi_device *ubi_get_device(int ubi_num) +{ + struct ubi_device *ubi; + + spin_lock(&ubi_devices_lock); + ubi = ubi_devices[ubi_num]; + if (ubi) { + ubi_assert(ubi->ref_count >= 0); + ubi->ref_count += 1; + get_device(&ubi->dev); + } + spin_unlock(&ubi_devices_lock); + + return ubi; +} + +/** + * ubi_put_device - drop an UBI device reference. + * @ubi: UBI device description object + */ +void ubi_put_device(struct ubi_device *ubi) +{ + spin_lock(&ubi_devices_lock); + ubi->ref_count -= 1; + put_device(&ubi->dev); + spin_unlock(&ubi_devices_lock); +} + +/** + * ubi_get_by_major - get UBI device by character device major number. + * @major: major number + * + * This function is similar to 'ubi_get_device()', but it searches the device + * by its major number. + */ +struct ubi_device *ubi_get_by_major(int major) +{ + int i; + struct ubi_device *ubi; + + spin_lock(&ubi_devices_lock); + for (i = 0; i < UBI_MAX_DEVICES; i++) { + ubi = ubi_devices[i]; + if (ubi && MAJOR(ubi->cdev.dev) == major) { + ubi_assert(ubi->ref_count >= 0); + ubi->ref_count += 1; + get_device(&ubi->dev); + spin_unlock(&ubi_devices_lock); + return ubi; + } + } + spin_unlock(&ubi_devices_lock); + + return NULL; +} + +/** + * ubi_major2num - get UBI device number by character device major number. + * @major: major number + * + * This function searches UBI device number object by its major number. If UBI + * device was not found, this function returns -ENODEV, otherwise the UBI device + * number is returned. + */ +int ubi_major2num(int major) +{ + int i, ubi_num = -ENODEV; + + spin_lock(&ubi_devices_lock); + for (i = 0; i < UBI_MAX_DEVICES; i++) { + struct ubi_device *ubi = ubi_devices[i]; + + if (ubi && MAJOR(ubi->cdev.dev) == major) { + ubi_num = ubi->ubi_num; + break; + } + } + spin_unlock(&ubi_devices_lock); + + return ubi_num; +} + +/* "Show" method for files in '/<sysfs>/class/ubi/ubiX/' */ +static ssize_t dev_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t ret; + struct ubi_device *ubi; + + /* + * The below code looks weird, but it actually makes sense. We get the + * UBI device reference from the contained 'struct ubi_device'. But it + * is unclear if the device was removed or not yet. Indeed, if the + * device was removed before we increased its reference count, + * 'ubi_get_device()' will return -ENODEV and we fail. + * + * Remember, 'struct ubi_device' is freed in the release function, so + * we still can use 'ubi->ubi_num'. + */ + ubi = container_of(dev, struct ubi_device, dev); + ubi = ubi_get_device(ubi->ubi_num); + if (!ubi) + return -ENODEV; + + if (attr == &dev_eraseblock_size) + ret = sprintf(buf, "%d\n", ubi->leb_size); + else if (attr == &dev_avail_eraseblocks) + ret = sprintf(buf, "%d\n", ubi->avail_pebs); + else if (attr == &dev_total_eraseblocks) + ret = sprintf(buf, "%d\n", ubi->good_peb_count); + else if (attr == &dev_volumes_count) + ret = sprintf(buf, "%d\n", ubi->vol_count - UBI_INT_VOL_COUNT); + else if (attr == &dev_max_ec) + ret = sprintf(buf, "%d\n", ubi->max_ec); + else if (attr == &dev_reserved_for_bad) + ret = sprintf(buf, "%d\n", ubi->beb_rsvd_pebs); + else if (attr == &dev_bad_peb_count) + ret = sprintf(buf, "%d\n", ubi->bad_peb_count); + else if (attr == &dev_max_vol_count) + ret = sprintf(buf, "%d\n", ubi->vtbl_slots); + else if (attr == &dev_min_io_size) + ret = sprintf(buf, "%d\n", ubi->min_io_size); + else if (attr == &dev_bgt_enabled) + ret = sprintf(buf, "%d\n", ubi->thread_enabled); + else if (attr == &dev_mtd_num) + ret = sprintf(buf, "%d\n", ubi->mtd->index); + else + ret = -EINVAL; + + ubi_put_device(ubi); + return ret; +} + +static void dev_release(struct device *dev) +{ + struct ubi_device *ubi = container_of(dev, struct ubi_device, dev); + + kfree(ubi); +} + +/** + * ubi_sysfs_init - initialize sysfs for an UBI device. + * @ubi: UBI device description object + * @ref: set to %1 on exit in case of failure if a reference to @ubi->dev was + * taken + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int ubi_sysfs_init(struct ubi_device *ubi, int *ref) +{ + int err; + + ubi->dev.release = dev_release; + ubi->dev.devt = ubi->cdev.dev; + ubi->dev.class = ubi_class; + dev_set_name(&ubi->dev, UBI_NAME_STR"%d", ubi->ubi_num); + err = device_register(&ubi->dev); + if (err) + return err; + + *ref = 1; + err = device_create_file(&ubi->dev, &dev_eraseblock_size); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_avail_eraseblocks); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_total_eraseblocks); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_volumes_count); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_max_ec); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_reserved_for_bad); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_bad_peb_count); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_max_vol_count); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_min_io_size); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_bgt_enabled); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_mtd_num); + return err; +} + +/** + * ubi_sysfs_close - close sysfs for an UBI device. + * @ubi: UBI device description object + */ +static void ubi_sysfs_close(struct ubi_device *ubi) +{ + device_remove_file(&ubi->dev, &dev_mtd_num); + device_remove_file(&ubi->dev, &dev_bgt_enabled); + device_remove_file(&ubi->dev, &dev_min_io_size); + device_remove_file(&ubi->dev, &dev_max_vol_count); + device_remove_file(&ubi->dev, &dev_bad_peb_count); + device_remove_file(&ubi->dev, &dev_reserved_for_bad); + device_remove_file(&ubi->dev, &dev_max_ec); + device_remove_file(&ubi->dev, &dev_volumes_count); + device_remove_file(&ubi->dev, &dev_total_eraseblocks); + device_remove_file(&ubi->dev, &dev_avail_eraseblocks); + device_remove_file(&ubi->dev, &dev_eraseblock_size); + device_unregister(&ubi->dev); +} + +/** + * kill_volumes - destroy all user volumes. + * @ubi: UBI device description object + */ +static void kill_volumes(struct ubi_device *ubi) +{ + int i; + + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i]) + ubi_free_volume(ubi, ubi->volumes[i]); +} + +/** + * uif_init - initialize user interfaces for an UBI device. + * @ubi: UBI device description object + * @ref: set to %1 on exit in case of failure if a reference to @ubi->dev was + * taken, otherwise set to %0 + * + * This function initializes various user interfaces for an UBI device. If the + * initialization fails at an early stage, this function frees all the + * resources it allocated, returns an error, and @ref is set to %0. However, + * if the initialization fails after the UBI device was registered in the + * driver core subsystem, this function takes a reference to @ubi->dev, because + * otherwise the release function ('dev_release()') would free whole @ubi + * object. The @ref argument is set to %1 in this case. The caller has to put + * this reference. + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int uif_init(struct ubi_device *ubi, int *ref) +{ + int i, err; + dev_t dev; + + *ref = 0; + sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num); + + /* + * Major numbers for the UBI character devices are allocated + * dynamically. Major numbers of volume character devices are + * equivalent to ones of the corresponding UBI character device. Minor + * numbers of UBI character devices are 0, while minor numbers of + * volume character devices start from 1. Thus, we allocate one major + * number and ubi->vtbl_slots + 1 minor numbers. + */ + err = alloc_chrdev_region(&dev, 0, ubi->vtbl_slots + 1, ubi->ubi_name); + if (err) { + ubi_err(ubi, "cannot register UBI character devices"); + return err; + } + + ubi_assert(MINOR(dev) == 0); + cdev_init(&ubi->cdev, &ubi_cdev_operations); + dbg_gen("%s major is %u", ubi->ubi_name, MAJOR(dev)); + ubi->cdev.owner = THIS_MODULE; + + err = cdev_add(&ubi->cdev, dev, 1); + if (err) { + ubi_err(ubi, "cannot add character device"); + goto out_unreg; + } + + err = ubi_sysfs_init(ubi, ref); + if (err) + goto out_sysfs; + + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i]) { + err = ubi_add_volume(ubi, ubi->volumes[i]); + if (err) { + ubi_err(ubi, "cannot add volume %d", i); + goto out_volumes; + } + } + + return 0; + +out_volumes: + kill_volumes(ubi); +out_sysfs: + if (*ref) + get_device(&ubi->dev); + ubi_sysfs_close(ubi); + cdev_del(&ubi->cdev); +out_unreg: + unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); + ubi_err(ubi, "cannot initialize UBI %s, error %d", + ubi->ubi_name, err); + return err; +} + +/** + * uif_close - close user interfaces for an UBI device. + * @ubi: UBI device description object + * + * Note, since this function un-registers UBI volume device objects (@vol->dev), + * the memory allocated voe the volumes is freed as well (in the release + * function). + */ +static void uif_close(struct ubi_device *ubi) +{ + kill_volumes(ubi); + ubi_sysfs_close(ubi); + cdev_del(&ubi->cdev); + unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); +} + +/** + * ubi_free_internal_volumes - free internal volumes. + * @ubi: UBI device description object + */ +void ubi_free_internal_volumes(struct ubi_device *ubi) +{ + int i; + + for (i = ubi->vtbl_slots; + i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + kfree(ubi->volumes[i]->eba_tbl); + kfree(ubi->volumes[i]); + } +} + +static int get_bad_peb_limit(const struct ubi_device *ubi, int max_beb_per1024) +{ + int limit, device_pebs; + uint64_t device_size; + + if (!max_beb_per1024) + return 0; + + /* + * Here we are using size of the entire flash chip and + * not just the MTD partition size because the maximum + * number of bad eraseblocks is a percentage of the + * whole device and bad eraseblocks are not fairly + * distributed over the flash chip. So the worst case + * is that all the bad eraseblocks of the chip are in + * the MTD partition we are attaching (ubi->mtd). + */ + device_size = mtd_get_device_size(ubi->mtd); + device_pebs = mtd_div_by_eb(device_size, ubi->mtd); + limit = mult_frac(device_pebs, max_beb_per1024, 1024); + + /* Round it up */ + if (mult_frac(limit, 1024, max_beb_per1024) < device_pebs) + limit += 1; + + return limit; +} + +/** + * io_init - initialize I/O sub-system for a given UBI device. + * @ubi: UBI device description object + * @max_beb_per1024: maximum expected number of bad PEB per 1024 PEBs + * + * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are + * assumed: + * o EC header is always at offset zero - this cannot be changed; + * o VID header starts just after the EC header at the closest address + * aligned to @io->hdrs_min_io_size; + * o data starts just after the VID header at the closest address aligned to + * @io->min_io_size + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int io_init(struct ubi_device *ubi, int max_beb_per1024) +{ + dbg_gen("sizeof(struct ubi_ainf_peb) %zu", sizeof(struct ubi_ainf_peb)); + dbg_gen("sizeof(struct ubi_wl_entry) %zu", sizeof(struct ubi_wl_entry)); + + if (ubi->mtd->numeraseregions != 0) { + /* + * Some flashes have several erase regions. Different regions + * may have different eraseblock size and other + * characteristics. It looks like mostly multi-region flashes + * have one "main" region and one or more small regions to + * store boot loader code or boot parameters or whatever. I + * guess we should just pick the largest region. But this is + * not implemented. + */ + ubi_err(ubi, "multiple regions, not implemented"); + return -EINVAL; + } + + if (ubi->vid_hdr_offset < 0) + return -EINVAL; + + /* + * Note, in this implementation we support MTD devices with 0x7FFFFFFF + * physical eraseblocks maximum. + */ + + ubi->peb_size = ubi->mtd->erasesize; + ubi->peb_count = mtd_div_by_eb(ubi->mtd->size, ubi->mtd); + ubi->flash_size = ubi->mtd->size; + + if (mtd_can_have_bb(ubi->mtd)) { + ubi->bad_allowed = 1; + ubi->bad_peb_limit = get_bad_peb_limit(ubi, max_beb_per1024); + } + + if (ubi->mtd->type == MTD_NORFLASH) { + ubi_assert(ubi->mtd->writesize == 1); + ubi->nor_flash = 1; + } + + ubi->min_io_size = ubi->mtd->writesize; + ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft; + + /* + * Make sure minimal I/O unit is power of 2. Note, there is no + * fundamental reason for this assumption. It is just an optimization + * which allows us to avoid costly division operations. + */ + if (!is_power_of_2(ubi->min_io_size)) { + ubi_err(ubi, "min. I/O unit (%d) is not power of 2", + ubi->min_io_size); + return -EINVAL; + } + + ubi_assert(ubi->hdrs_min_io_size > 0); + ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size); + ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0); + + ubi->max_write_size = ubi->mtd->writebufsize; + /* + * Maximum write size has to be greater or equivalent to min. I/O + * size, and be multiple of min. I/O size. + */ + if (ubi->max_write_size < ubi->min_io_size || + ubi->max_write_size % ubi->min_io_size || + !is_power_of_2(ubi->max_write_size)) { + ubi_err(ubi, "bad write buffer size %d for %d min. I/O unit", + ubi->max_write_size, ubi->min_io_size); + return -EINVAL; + } + + /* Calculate default aligned sizes of EC and VID headers */ + ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size); + ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size); + + dbg_gen("min_io_size %d", ubi->min_io_size); + dbg_gen("max_write_size %d", ubi->max_write_size); + dbg_gen("hdrs_min_io_size %d", ubi->hdrs_min_io_size); + dbg_gen("ec_hdr_alsize %d", ubi->ec_hdr_alsize); + dbg_gen("vid_hdr_alsize %d", ubi->vid_hdr_alsize); + + if (ubi->vid_hdr_offset == 0) + /* Default offset */ + ubi->vid_hdr_offset = ubi->vid_hdr_aloffset = + ubi->ec_hdr_alsize; + else { + ubi->vid_hdr_aloffset = ubi->vid_hdr_offset & + ~(ubi->hdrs_min_io_size - 1); + ubi->vid_hdr_shift = ubi->vid_hdr_offset - + ubi->vid_hdr_aloffset; + } + + /* Similar for the data offset */ + ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE; + ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); + + dbg_gen("vid_hdr_offset %d", ubi->vid_hdr_offset); + dbg_gen("vid_hdr_aloffset %d", ubi->vid_hdr_aloffset); + dbg_gen("vid_hdr_shift %d", ubi->vid_hdr_shift); + dbg_gen("leb_start %d", ubi->leb_start); + + /* The shift must be aligned to 32-bit boundary */ + if (ubi->vid_hdr_shift % 4) { + ubi_err(ubi, "unaligned VID header shift %d", + ubi->vid_hdr_shift); + return -EINVAL; + } + + /* Check sanity */ + if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE || + ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE || + ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE || + ubi->leb_start & (ubi->min_io_size - 1)) { + ubi_err(ubi, "bad VID header (%d) or data offsets (%d)", + ubi->vid_hdr_offset, ubi->leb_start); + return -EINVAL; + } + + /* + * Set maximum amount of physical erroneous eraseblocks to be 10%. + * Erroneous PEB are those which have read errors. + */ + ubi->max_erroneous = ubi->peb_count / 10; + if (ubi->max_erroneous < 16) + ubi->max_erroneous = 16; + dbg_gen("max_erroneous %d", ubi->max_erroneous); + + /* + * It may happen that EC and VID headers are situated in one minimal + * I/O unit. In this case we can only accept this UBI image in + * read-only mode. + */ + if (ubi->vid_hdr_offset + UBI_VID_HDR_SIZE <= ubi->hdrs_min_io_size) { + ubi_warn(ubi, "EC and VID headers are in the same minimal I/O unit, switch to read-only mode"); + ubi->ro_mode = 1; + } + + ubi->leb_size = ubi->peb_size - ubi->leb_start; + + if (!(ubi->mtd->flags & MTD_WRITEABLE)) { + ubi_msg(ubi, "MTD device %d is write-protected, attach in read-only mode", + ubi->mtd->index); + ubi->ro_mode = 1; + } + + /* + * Note, ideally, we have to initialize @ubi->bad_peb_count here. But + * unfortunately, MTD does not provide this information. We should loop + * over all physical eraseblocks and invoke mtd->block_is_bad() for + * each physical eraseblock. So, we leave @ubi->bad_peb_count + * uninitialized so far. + */ + + return 0; +} + +/** + * autoresize - re-size the volume which has the "auto-resize" flag set. + * @ubi: UBI device description object + * @vol_id: ID of the volume to re-size + * + * This function re-sizes the volume marked by the %UBI_VTBL_AUTORESIZE_FLG in + * the volume table to the largest possible size. See comments in ubi-header.h + * for more description of the flag. Returns zero in case of success and a + * negative error code in case of failure. + */ +static int autoresize(struct ubi_device *ubi, int vol_id) +{ + struct ubi_volume_desc desc; + struct ubi_volume *vol = ubi->volumes[vol_id]; + int err, old_reserved_pebs = vol->reserved_pebs; + + if (ubi->ro_mode) { + ubi_warn(ubi, "skip auto-resize because of R/O mode"); + return 0; + } + + /* + * Clear the auto-resize flag in the volume in-memory copy of the + * volume table, and 'ubi_resize_volume()' will propagate this change + * to the flash. + */ + ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG; + + if (ubi->avail_pebs == 0) { + struct ubi_vtbl_record vtbl_rec; + + /* + * No available PEBs to re-size the volume, clear the flag on + * flash and exit. + */ + vtbl_rec = ubi->vtbl[vol_id]; + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + ubi_err(ubi, "cannot clean auto-resize flag for volume %d", + vol_id); + } else { + desc.vol = vol; + err = ubi_resize_volume(&desc, + old_reserved_pebs + ubi->avail_pebs); + if (err) + ubi_err(ubi, "cannot auto-resize volume %d", + vol_id); + } + + if (err) + return err; + + ubi_msg(ubi, "volume %d (\"%s\") re-sized from %d to %d LEBs", + vol_id, vol->name, old_reserved_pebs, vol->reserved_pebs); + return 0; +} + +/** + * ubi_attach_mtd_dev - attach an MTD device. + * @mtd: MTD device description object + * @ubi_num: number to assign to the new UBI device + * @vid_hdr_offset: VID header offset + * @max_beb_per1024: maximum expected number of bad PEB per 1024 PEBs + * + * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number + * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in + * which case this function finds a vacant device number and assigns it + * automatically. Returns the new UBI device number in case of success and a + * negative error code in case of failure. + * + * Note, the invocations of this function has to be serialized by the + * @ubi_devices_mutex. + */ +int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, + int vid_hdr_offset, int max_beb_per1024) +{ + struct ubi_device *ubi; + int i, err, ref = 0; + + if (max_beb_per1024 < 0 || max_beb_per1024 > MAX_MTD_UBI_BEB_LIMIT) + return -EINVAL; + + if (!max_beb_per1024) + max_beb_per1024 = CONFIG_MTD_UBI_BEB_LIMIT; + + /* + * Check if we already have the same MTD device attached. + * + * Note, this function assumes that UBI devices creations and deletions + * are serialized, so it does not take the &ubi_devices_lock. + */ + for (i = 0; i < UBI_MAX_DEVICES; i++) { + ubi = ubi_devices[i]; + if (ubi && mtd->index == ubi->mtd->index) { + ubi_err(ubi, "mtd%d is already attached to ubi%d", + mtd->index, i); + return -EEXIST; + } + } + + /* + * Make sure this MTD device is not emulated on top of an UBI volume + * already. Well, generally this recursion works fine, but there are + * different problems like the UBI module takes a reference to itself + * by attaching (and thus, opening) the emulated MTD device. This + * results in inability to unload the module. And in general it makes + * no sense to attach emulated MTD devices, so we prohibit this. + */ + if (mtd->type == MTD_UBIVOLUME) { + ubi_err(ubi, "refuse attaching mtd%d - it is already emulated on top of UBI", + mtd->index); + return -EINVAL; + } + + if (ubi_num == UBI_DEV_NUM_AUTO) { + /* Search for an empty slot in the @ubi_devices array */ + for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++) + if (!ubi_devices[ubi_num]) + break; + if (ubi_num == UBI_MAX_DEVICES) { + ubi_err(ubi, "only %d UBI devices may be created", + UBI_MAX_DEVICES); + return -ENFILE; + } + } else { + if (ubi_num >= UBI_MAX_DEVICES) + return -EINVAL; + + /* Make sure ubi_num is not busy */ + if (ubi_devices[ubi_num]) { + ubi_err(ubi, "already exists"); + return -EEXIST; + } + } + + ubi = kzalloc(sizeof(struct ubi_device), GFP_KERNEL); + if (!ubi) + return -ENOMEM; + + ubi->mtd = mtd; + ubi->ubi_num = ubi_num; + ubi->vid_hdr_offset = vid_hdr_offset; + ubi->autoresize_vol_id = -1; + +#ifdef CONFIG_MTD_UBI_FASTMAP + ubi->fm_pool.used = ubi->fm_pool.size = 0; + ubi->fm_wl_pool.used = ubi->fm_wl_pool.size = 0; + + /* + * fm_pool.max_size is 5% of the total number of PEBs but it's also + * between UBI_FM_MAX_POOL_SIZE and UBI_FM_MIN_POOL_SIZE. + */ + ubi->fm_pool.max_size = min(((int)mtd_div_by_eb(ubi->mtd->size, + ubi->mtd) / 100) * 5, UBI_FM_MAX_POOL_SIZE); + if (ubi->fm_pool.max_size < UBI_FM_MIN_POOL_SIZE) + ubi->fm_pool.max_size = UBI_FM_MIN_POOL_SIZE; + + ubi->fm_wl_pool.max_size = ubi->fm_pool.max_size / 2; + ubi->fm_disabled = !fm_autoconvert; + if (fm_debug) + ubi_enable_dbg_chk_fastmap(ubi); + + if (!ubi->fm_disabled && (int)mtd_div_by_eb(ubi->mtd->size, ubi->mtd) + <= UBI_FM_MAX_START) { + ubi_err(ubi, "More than %i PEBs are needed for fastmap, sorry.", + UBI_FM_MAX_START); + ubi->fm_disabled = 1; + } + + ubi_msg(ubi, "default fastmap pool size: %d", ubi->fm_pool.max_size); + ubi_msg(ubi, "default fastmap WL pool size: %d", + ubi->fm_wl_pool.max_size); +#else + ubi->fm_disabled = 1; +#endif + mutex_init(&ubi->buf_mutex); + mutex_init(&ubi->ckvol_mutex); + mutex_init(&ubi->device_mutex); + spin_lock_init(&ubi->volumes_lock); + init_rwsem(&ubi->fm_protect); + init_rwsem(&ubi->fm_eba_sem); + + ubi_msg(ubi, "attaching mtd%d", mtd->index); + + err = io_init(ubi, max_beb_per1024); + if (err) + goto out_free; + + err = -ENOMEM; + ubi->peb_buf = vmalloc(ubi->peb_size); + if (!ubi->peb_buf) + goto out_free; + +#ifdef CONFIG_MTD_UBI_FASTMAP + ubi->fm_size = ubi_calc_fm_size(ubi); + ubi->fm_buf = vzalloc(ubi->fm_size); + if (!ubi->fm_buf) + goto out_free; +#endif + err = ubi_attach(ubi, 0); + if (err) { + ubi_err(ubi, "failed to attach mtd%d, error %d", + mtd->index, err); + goto out_free; + } + + if (ubi->autoresize_vol_id != -1) { + err = autoresize(ubi, ubi->autoresize_vol_id); + if (err) + goto out_detach; + } + + err = uif_init(ubi, &ref); + if (err) + goto out_detach; + + err = ubi_debugfs_init_dev(ubi); + if (err) + goto out_uif; + + ubi->bgt_thread = kthread_create(ubi_thread, ubi, "%s", ubi->bgt_name); + if (IS_ERR(ubi->bgt_thread)) { + err = PTR_ERR(ubi->bgt_thread); + ubi_err(ubi, "cannot spawn \"%s\", error %d", + ubi->bgt_name, err); + goto out_debugfs; + } + + ubi_msg(ubi, "attached mtd%d (name \"%s\", size %llu MiB)", + mtd->index, mtd->name, ubi->flash_size >> 20); + ubi_msg(ubi, "PEB size: %d bytes (%d KiB), LEB size: %d bytes", + ubi->peb_size, ubi->peb_size >> 10, ubi->leb_size); + ubi_msg(ubi, "min./max. I/O unit sizes: %d/%d, sub-page size %d", + ubi->min_io_size, ubi->max_write_size, ubi->hdrs_min_io_size); + ubi_msg(ubi, "VID header offset: %d (aligned %d), data offset: %d", + ubi->vid_hdr_offset, ubi->vid_hdr_aloffset, ubi->leb_start); + ubi_msg(ubi, "good PEBs: %d, bad PEBs: %d, corrupted PEBs: %d", + ubi->good_peb_count, ubi->bad_peb_count, ubi->corr_peb_count); + ubi_msg(ubi, "user volume: %d, internal volumes: %d, max. volumes count: %d", + ubi->vol_count - UBI_INT_VOL_COUNT, UBI_INT_VOL_COUNT, + ubi->vtbl_slots); + ubi_msg(ubi, "max/mean erase counter: %d/%d, WL threshold: %d, image sequence number: %u", + ubi->max_ec, ubi->mean_ec, CONFIG_MTD_UBI_WL_THRESHOLD, + ubi->image_seq); + ubi_msg(ubi, "available PEBs: %d, total reserved PEBs: %d, PEBs reserved for bad PEB handling: %d", + ubi->avail_pebs, ubi->rsvd_pebs, ubi->beb_rsvd_pebs); + + /* + * The below lock makes sure we do not race with 'ubi_thread()' which + * checks @ubi->thread_enabled. Otherwise we may fail to wake it up. + */ + spin_lock(&ubi->wl_lock); + ubi->thread_enabled = 1; + wake_up_process(ubi->bgt_thread); + spin_unlock(&ubi->wl_lock); + + ubi_devices[ubi_num] = ubi; + ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); + return ubi_num; + +out_debugfs: + ubi_debugfs_exit_dev(ubi); +out_uif: + get_device(&ubi->dev); + ubi_assert(ref); + uif_close(ubi); +out_detach: + ubi_wl_close(ubi); + ubi_free_internal_volumes(ubi); + vfree(ubi->vtbl); +out_free: + vfree(ubi->peb_buf); + vfree(ubi->fm_buf); + if (ref) + put_device(&ubi->dev); + else + kfree(ubi); + return err; +} + +/** + * ubi_detach_mtd_dev - detach an MTD device. + * @ubi_num: UBI device number to detach from + * @anyway: detach MTD even if device reference count is not zero + * + * This function destroys an UBI device number @ubi_num and detaches the + * underlying MTD device. Returns zero in case of success and %-EBUSY if the + * UBI device is busy and cannot be destroyed, and %-EINVAL if it does not + * exist. + * + * Note, the invocations of this function has to be serialized by the + * @ubi_devices_mutex. + */ +int ubi_detach_mtd_dev(int ubi_num, int anyway) +{ + struct ubi_device *ubi; + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) + return -EINVAL; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -EINVAL; + + spin_lock(&ubi_devices_lock); + put_device(&ubi->dev); + ubi->ref_count -= 1; + if (ubi->ref_count) { + if (!anyway) { + spin_unlock(&ubi_devices_lock); + return -EBUSY; + } + /* This may only happen if there is a bug */ + ubi_err(ubi, "%s reference count %d, destroy anyway", + ubi->ubi_name, ubi->ref_count); + } + ubi_devices[ubi_num] = NULL; + spin_unlock(&ubi_devices_lock); + + ubi_assert(ubi_num == ubi->ubi_num); + ubi_notify_all(ubi, UBI_VOLUME_REMOVED, NULL); + ubi_msg(ubi, "detaching mtd%d", ubi->mtd->index); +#ifdef CONFIG_MTD_UBI_FASTMAP + /* If we don't write a new fastmap at detach time we lose all + * EC updates that have been made since the last written fastmap. + * In case of fastmap debugging we omit the update to simulate an + * unclean shutdown. */ + if (!ubi_dbg_chk_fastmap(ubi)) + ubi_update_fastmap(ubi); +#endif + /* + * Before freeing anything, we have to stop the background thread to + * prevent it from doing anything on this device while we are freeing. + */ + if (ubi->bgt_thread) + kthread_stop(ubi->bgt_thread); + + /* + * Get a reference to the device in order to prevent 'dev_release()' + * from freeing the @ubi object. + */ + get_device(&ubi->dev); + + ubi_debugfs_exit_dev(ubi); + uif_close(ubi); + + ubi_wl_close(ubi); + ubi_free_internal_volumes(ubi); + vfree(ubi->vtbl); + put_mtd_device(ubi->mtd); + vfree(ubi->peb_buf); + vfree(ubi->fm_buf); + ubi_msg(ubi, "mtd%d is detached", ubi->mtd->index); + put_device(&ubi->dev); + return 0; +} + +/** + * open_mtd_by_chdev - open an MTD device by its character device node path. + * @mtd_dev: MTD character device node path + * + * This helper function opens an MTD device by its character node device path. + * Returns MTD device description object in case of success and a negative + * error code in case of failure. + */ +static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev) +{ + int err, major, minor, mode; + struct path path; + + /* Probably this is an MTD character device node path */ + err = kern_path(mtd_dev, LOOKUP_FOLLOW, &path); + if (err) + return ERR_PTR(err); + + /* MTD device number is defined by the major / minor numbers */ + major = imajor(d_backing_inode(path.dentry)); + minor = iminor(d_backing_inode(path.dentry)); + mode = d_backing_inode(path.dentry)->i_mode; + path_put(&path); + if (major != MTD_CHAR_MAJOR || !S_ISCHR(mode)) + return ERR_PTR(-EINVAL); + + if (minor & 1) + /* + * Just do not think the "/dev/mtdrX" devices support is need, + * so do not support them to avoid doing extra work. + */ + return ERR_PTR(-EINVAL); + + return get_mtd_device(NULL, minor / 2); +} + +/** + * open_mtd_device - open MTD device by name, character device path, or number. + * @mtd_dev: name, character device node path, or MTD device device number + * + * This function tries to open and MTD device described by @mtd_dev string, + * which is first treated as ASCII MTD device number, and if it is not true, it + * is treated as MTD device name, and if that is also not true, it is treated + * as MTD character device node path. Returns MTD device description object in + * case of success and a negative error code in case of failure. + */ +static struct mtd_info * __init open_mtd_device(const char *mtd_dev) +{ + struct mtd_info *mtd; + int mtd_num; + char *endp; + + mtd_num = simple_strtoul(mtd_dev, &endp, 0); + if (*endp != '\0' || mtd_dev == endp) { + /* + * This does not look like an ASCII integer, probably this is + * MTD device name. + */ + mtd = get_mtd_device_nm(mtd_dev); + if (IS_ERR(mtd) && PTR_ERR(mtd) == -ENODEV) + /* Probably this is an MTD character device node path */ + mtd = open_mtd_by_chdev(mtd_dev); + } else + mtd = get_mtd_device(NULL, mtd_num); + + return mtd; +} + +static int __init ubi_init(void) +{ + int err, i, k; + + /* Ensure that EC and VID headers have correct size */ + BUILD_BUG_ON(sizeof(struct ubi_ec_hdr) != 64); + BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64); + + if (mtd_devs > UBI_MAX_DEVICES) { + pr_err("UBI error: too many MTD devices, maximum is %d", + UBI_MAX_DEVICES); + return -EINVAL; + } + + /* Create base sysfs directory and sysfs files */ + ubi_class = class_create(THIS_MODULE, UBI_NAME_STR); + if (IS_ERR(ubi_class)) { + err = PTR_ERR(ubi_class); + pr_err("UBI error: cannot create UBI class"); + goto out; + } + + err = class_create_file(ubi_class, &ubi_version); + if (err) { + pr_err("UBI error: cannot create sysfs file"); + goto out_class; + } + + err = misc_register(&ubi_ctrl_cdev); + if (err) { + pr_err("UBI error: cannot register device"); + goto out_version; + } + + ubi_wl_entry_slab = kmem_cache_create("ubi_wl_entry_slab", + sizeof(struct ubi_wl_entry), + 0, 0, NULL); + if (!ubi_wl_entry_slab) { + err = -ENOMEM; + goto out_dev_unreg; + } + + err = ubi_debugfs_init(); + if (err) + goto out_slab; + + + /* Attach MTD devices */ + for (i = 0; i < mtd_devs; i++) { + struct mtd_dev_param *p = &mtd_dev_param[i]; + struct mtd_info *mtd; + + cond_resched(); + + mtd = open_mtd_device(p->name); + if (IS_ERR(mtd)) { + err = PTR_ERR(mtd); + pr_err("UBI error: cannot open mtd %s, error %d", + p->name, err); + /* See comment below re-ubi_is_module(). */ + if (ubi_is_module()) + goto out_detach; + continue; + } + + mutex_lock(&ubi_devices_mutex); + err = ubi_attach_mtd_dev(mtd, p->ubi_num, + p->vid_hdr_offs, p->max_beb_per1024); + mutex_unlock(&ubi_devices_mutex); + if (err < 0) { + pr_err("UBI error: cannot attach mtd%d", + mtd->index); + put_mtd_device(mtd); + + /* + * Originally UBI stopped initializing on any error. + * However, later on it was found out that this + * behavior is not very good when UBI is compiled into + * the kernel and the MTD devices to attach are passed + * through the command line. Indeed, UBI failure + * stopped whole boot sequence. + * + * To fix this, we changed the behavior for the + * non-module case, but preserved the old behavior for + * the module case, just for compatibility. This is a + * little inconsistent, though. + */ + if (ubi_is_module()) + goto out_detach; + } + } + + err = ubiblock_init(); + if (err) { + pr_err("UBI error: block: cannot initialize, error %d", err); + + /* See comment above re-ubi_is_module(). */ + if (ubi_is_module()) + goto out_detach; + } + + return 0; + +out_detach: + for (k = 0; k < i; k++) + if (ubi_devices[k]) { + mutex_lock(&ubi_devices_mutex); + ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1); + mutex_unlock(&ubi_devices_mutex); + } + ubi_debugfs_exit(); +out_slab: + kmem_cache_destroy(ubi_wl_entry_slab); +out_dev_unreg: + misc_deregister(&ubi_ctrl_cdev); +out_version: + class_remove_file(ubi_class, &ubi_version); +out_class: + class_destroy(ubi_class); +out: + pr_err("UBI error: cannot initialize UBI, error %d", err); + return err; +} +late_initcall(ubi_init); + +static void __exit ubi_exit(void) +{ + int i; + + ubiblock_exit(); + + for (i = 0; i < UBI_MAX_DEVICES; i++) + if (ubi_devices[i]) { + mutex_lock(&ubi_devices_mutex); + ubi_detach_mtd_dev(ubi_devices[i]->ubi_num, 1); + mutex_unlock(&ubi_devices_mutex); + } + ubi_debugfs_exit(); + kmem_cache_destroy(ubi_wl_entry_slab); + misc_deregister(&ubi_ctrl_cdev); + class_remove_file(ubi_class, &ubi_version); + class_destroy(ubi_class); +} +module_exit(ubi_exit); + +/** + * bytes_str_to_int - convert a number of bytes string into an integer. + * @str: the string to convert + * + * This function returns positive resulting integer in case of success and a + * negative error code in case of failure. + */ +static int __init bytes_str_to_int(const char *str) +{ + char *endp; + unsigned long result; + + result = simple_strtoul(str, &endp, 0); + if (str == endp || result >= INT_MAX) { + pr_err("UBI error: incorrect bytes count: \"%s\"\n", str); + return -EINVAL; + } + + switch (*endp) { + case 'G': + result *= 1024; + case 'M': + result *= 1024; + case 'K': + result *= 1024; + if (endp[1] == 'i' && endp[2] == 'B') + endp += 2; + case '\0': + break; + default: + pr_err("UBI error: incorrect bytes count: \"%s\"\n", str); + return -EINVAL; + } + + return result; +} + +/** + * ubi_mtd_param_parse - parse the 'mtd=' UBI parameter. + * @val: the parameter value to parse + * @kp: not used + * + * This function returns zero in case of success and a negative error code in + * case of error. + */ +static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp) +{ + int i, len; + struct mtd_dev_param *p; + char buf[MTD_PARAM_LEN_MAX]; + char *pbuf = &buf[0]; + char *tokens[MTD_PARAM_MAX_COUNT], *token; + + if (!val) + return -EINVAL; + + if (mtd_devs == UBI_MAX_DEVICES) { + pr_err("UBI error: too many parameters, max. is %d\n", + UBI_MAX_DEVICES); + return -EINVAL; + } + + len = strnlen(val, MTD_PARAM_LEN_MAX); + if (len == MTD_PARAM_LEN_MAX) { + pr_err("UBI error: parameter \"%s\" is too long, max. is %d\n", + val, MTD_PARAM_LEN_MAX); + return -EINVAL; + } + + if (len == 0) { + pr_warn("UBI warning: empty 'mtd=' parameter - ignored\n"); + return 0; + } + + strcpy(buf, val); + + /* Get rid of the final newline */ + if (buf[len - 1] == '\n') + buf[len - 1] = '\0'; + + for (i = 0; i < MTD_PARAM_MAX_COUNT; i++) + tokens[i] = strsep(&pbuf, ","); + + if (pbuf) { + pr_err("UBI error: too many arguments at \"%s\"\n", val); + return -EINVAL; + } + + p = &mtd_dev_param[mtd_devs]; + strcpy(&p->name[0], tokens[0]); + + token = tokens[1]; + if (token) { + p->vid_hdr_offs = bytes_str_to_int(token); + + if (p->vid_hdr_offs < 0) + return p->vid_hdr_offs; + } + + token = tokens[2]; + if (token) { + int err = kstrtoint(token, 10, &p->max_beb_per1024); + + if (err) { + pr_err("UBI error: bad value for max_beb_per1024 parameter: %s", + token); + return -EINVAL; + } + } + + token = tokens[3]; + if (token) { + int err = kstrtoint(token, 10, &p->ubi_num); + + if (err) { + pr_err("UBI error: bad value for ubi_num parameter: %s", + token); + return -EINVAL; + } + } else + p->ubi_num = UBI_DEV_NUM_AUTO; + + mtd_devs += 1; + return 0; +} + +module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000); +MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: mtd=<name|num|path>[,<vid_hdr_offs>[,max_beb_per1024[,ubi_num]]].\n" + "Multiple \"mtd\" parameters may be specified.\n" + "MTD devices may be specified by their number, name, or path to the MTD character device node.\n" + "Optional \"vid_hdr_offs\" parameter specifies UBI VID header position to be used by UBI. (default value if 0)\n" + "Optional \"max_beb_per1024\" parameter specifies the maximum expected bad eraseblock per 1024 eraseblocks. (default value (" + __stringify(CONFIG_MTD_UBI_BEB_LIMIT) ") if 0)\n" + "Optional \"ubi_num\" parameter specifies UBI device number which have to be assigned to the newly created UBI device (assigned automatically by default)\n" + "\n" + "Example 1: mtd=/dev/mtd0 - attach MTD device /dev/mtd0.\n" + "Example 2: mtd=content,1984 mtd=4 - attach MTD device with name \"content\" using VID header offset 1984, and MTD device number 4 with default VID header offset.\n" + "Example 3: mtd=/dev/mtd1,0,25 - attach MTD device /dev/mtd1 using default VID header offset and reserve 25*nand_size_in_blocks/1024 erase blocks for bad block handling.\n" + "Example 4: mtd=/dev/mtd1,0,0,5 - attach MTD device /dev/mtd1 to UBI 5 and using default values for the other fields.\n" + "\t(e.g. if the NAND *chipset* has 4096 PEB, 100 will be reserved for this UBI device)."); +#ifdef CONFIG_MTD_UBI_FASTMAP +module_param(fm_autoconvert, bool, 0644); +MODULE_PARM_DESC(fm_autoconvert, "Set this parameter to enable fastmap automatically on images without a fastmap."); +module_param(fm_debug, bool, 0); +MODULE_PARM_DESC(fm_debug, "Set this parameter to enable fastmap debugging by default. Warning, this will make fastmap slow!"); +#endif +MODULE_VERSION(__stringify(UBI_VERSION)); +MODULE_DESCRIPTION("UBI - Unsorted Block Images"); +MODULE_AUTHOR("Artem Bityutskiy"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c new file mode 100644 index 000000000..d16fccf79 --- /dev/null +++ b/drivers/mtd/ubi/cdev.c @@ -0,0 +1,1110 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file includes implementation of UBI character device operations. + * + * There are two kinds of character devices in UBI: UBI character devices and + * UBI volume character devices. UBI character devices allow users to + * manipulate whole volumes: create, remove, and re-size them. Volume character + * devices provide volume I/O capabilities. + * + * Major and minor numbers are assigned dynamically to both UBI and volume + * character devices. + * + * Well, there is the third kind of character devices - the UBI control + * character device, which allows to manipulate by UBI devices - create and + * delete them. In other words, it is used for attaching and detaching MTD + * devices. + */ + +#include <linux/module.h> +#include <linux/stat.h> +#include <linux/slab.h> +#include <linux/ioctl.h> +#include <linux/capability.h> +#include <linux/uaccess.h> +#include <linux/compat.h> +#include <linux/math64.h> +#include <mtd/ubi-user.h> +#include "ubi.h" + +/** + * get_exclusive - get exclusive access to an UBI volume. + * @desc: volume descriptor + * + * This function changes UBI volume open mode to "exclusive". Returns previous + * mode value (positive integer) in case of success and a negative error code + * in case of failure. + */ +static int get_exclusive(struct ubi_volume_desc *desc) +{ + int users, err; + struct ubi_volume *vol = desc->vol; + + spin_lock(&vol->ubi->volumes_lock); + users = vol->readers + vol->writers + vol->exclusive + vol->metaonly; + ubi_assert(users > 0); + if (users > 1) { + ubi_err(vol->ubi, "%d users for volume %d", users, vol->vol_id); + err = -EBUSY; + } else { + vol->readers = vol->writers = vol->metaonly = 0; + vol->exclusive = 1; + err = desc->mode; + desc->mode = UBI_EXCLUSIVE; + } + spin_unlock(&vol->ubi->volumes_lock); + + return err; +} + +/** + * revoke_exclusive - revoke exclusive mode. + * @desc: volume descriptor + * @mode: new mode to switch to + */ +static void revoke_exclusive(struct ubi_volume_desc *desc, int mode) +{ + struct ubi_volume *vol = desc->vol; + + spin_lock(&vol->ubi->volumes_lock); + ubi_assert(vol->readers == 0 && vol->writers == 0 && vol->metaonly == 0); + ubi_assert(vol->exclusive == 1 && desc->mode == UBI_EXCLUSIVE); + vol->exclusive = 0; + if (mode == UBI_READONLY) + vol->readers = 1; + else if (mode == UBI_READWRITE) + vol->writers = 1; + else if (mode == UBI_METAONLY) + vol->metaonly = 1; + else + vol->exclusive = 1; + spin_unlock(&vol->ubi->volumes_lock); + + desc->mode = mode; +} + +static int vol_cdev_open(struct inode *inode, struct file *file) +{ + struct ubi_volume_desc *desc; + int vol_id = iminor(inode) - 1, mode, ubi_num; + + ubi_num = ubi_major2num(imajor(inode)); + if (ubi_num < 0) + return ubi_num; + + if (file->f_mode & FMODE_WRITE) + mode = UBI_READWRITE; + else + mode = UBI_READONLY; + + dbg_gen("open device %d, volume %d, mode %d", + ubi_num, vol_id, mode); + + desc = ubi_open_volume(ubi_num, vol_id, mode); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + file->private_data = desc; + return 0; +} + +static int vol_cdev_release(struct inode *inode, struct file *file) +{ + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + + dbg_gen("release device %d, volume %d, mode %d", + vol->ubi->ubi_num, vol->vol_id, desc->mode); + + if (vol->updating) { + ubi_warn(vol->ubi, "update of volume %d not finished, volume is damaged", + vol->vol_id); + ubi_assert(!vol->changing_leb); + vol->updating = 0; + vfree(vol->upd_buf); + } else if (vol->changing_leb) { + dbg_gen("only %lld of %lld bytes received for atomic LEB change for volume %d:%d, cancel", + vol->upd_received, vol->upd_bytes, vol->ubi->ubi_num, + vol->vol_id); + vol->changing_leb = 0; + vfree(vol->upd_buf); + } + + ubi_close_volume(desc); + return 0; +} + +static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin) +{ + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + + if (vol->updating) { + /* Update is in progress, seeking is prohibited */ + ubi_err(vol->ubi, "updating"); + return -EBUSY; + } + + return fixed_size_llseek(file, offset, origin, vol->used_bytes); +} + +static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + struct ubi_volume_desc *desc = file->private_data; + struct ubi_device *ubi = desc->vol->ubi; + struct inode *inode = file_inode(file); + int err; + mutex_lock(&inode->i_mutex); + err = ubi_sync(ubi->ubi_num); + mutex_unlock(&inode->i_mutex); + return err; +} + + +static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count, + loff_t *offp) +{ + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int err, lnum, off, len, tbuf_size; + size_t count_save = count; + void *tbuf; + + dbg_gen("read %zd bytes from offset %lld of volume %d", + count, *offp, vol->vol_id); + + if (vol->updating) { + ubi_err(vol->ubi, "updating"); + return -EBUSY; + } + if (vol->upd_marker) { + ubi_err(vol->ubi, "damaged volume, update marker is set"); + return -EBADF; + } + if (*offp == vol->used_bytes || count == 0) + return 0; + + if (vol->corrupted) + dbg_gen("read from corrupted volume %d", vol->vol_id); + + if (*offp + count > vol->used_bytes) + count_save = count = vol->used_bytes - *offp; + + tbuf_size = vol->usable_leb_size; + if (count < tbuf_size) + tbuf_size = ALIGN(count, ubi->min_io_size); + tbuf = vmalloc(tbuf_size); + if (!tbuf) + return -ENOMEM; + + len = count > tbuf_size ? tbuf_size : count; + lnum = div_u64_rem(*offp, vol->usable_leb_size, &off); + + do { + cond_resched(); + + if (off + len >= vol->usable_leb_size) + len = vol->usable_leb_size - off; + + err = ubi_eba_read_leb(ubi, vol, lnum, tbuf, off, len, 0); + if (err) + break; + + off += len; + if (off == vol->usable_leb_size) { + lnum += 1; + off -= vol->usable_leb_size; + } + + count -= len; + *offp += len; + + err = copy_to_user(buf, tbuf, len); + if (err) { + err = -EFAULT; + break; + } + + buf += len; + len = count > tbuf_size ? tbuf_size : count; + } while (count); + + vfree(tbuf); + return err ? err : count_save - count; +} + +/* + * This function allows to directly write to dynamic UBI volumes, without + * issuing the volume update operation. + */ +static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf, + size_t count, loff_t *offp) +{ + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int lnum, off, len, tbuf_size, err = 0; + size_t count_save = count; + char *tbuf; + + if (!vol->direct_writes) + return -EPERM; + + dbg_gen("requested: write %zd bytes to offset %lld of volume %u", + count, *offp, vol->vol_id); + + if (vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + lnum = div_u64_rem(*offp, vol->usable_leb_size, &off); + if (off & (ubi->min_io_size - 1)) { + ubi_err(ubi, "unaligned position"); + return -EINVAL; + } + + if (*offp + count > vol->used_bytes) + count_save = count = vol->used_bytes - *offp; + + /* We can write only in fractions of the minimum I/O unit */ + if (count & (ubi->min_io_size - 1)) { + ubi_err(ubi, "unaligned write length"); + return -EINVAL; + } + + tbuf_size = vol->usable_leb_size; + if (count < tbuf_size) + tbuf_size = ALIGN(count, ubi->min_io_size); + tbuf = vmalloc(tbuf_size); + if (!tbuf) + return -ENOMEM; + + len = count > tbuf_size ? tbuf_size : count; + + while (count) { + cond_resched(); + + if (off + len >= vol->usable_leb_size) + len = vol->usable_leb_size - off; + + err = copy_from_user(tbuf, buf, len); + if (err) { + err = -EFAULT; + break; + } + + err = ubi_eba_write_leb(ubi, vol, lnum, tbuf, off, len); + if (err) + break; + + off += len; + if (off == vol->usable_leb_size) { + lnum += 1; + off -= vol->usable_leb_size; + } + + count -= len; + *offp += len; + buf += len; + len = count > tbuf_size ? tbuf_size : count; + } + + vfree(tbuf); + return err ? err : count_save - count; +} + +static ssize_t vol_cdev_write(struct file *file, const char __user *buf, + size_t count, loff_t *offp) +{ + int err = 0; + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + + if (!vol->updating && !vol->changing_leb) + return vol_cdev_direct_write(file, buf, count, offp); + + if (vol->updating) + err = ubi_more_update_data(ubi, vol, buf, count); + else + err = ubi_more_leb_change_data(ubi, vol, buf, count); + + if (err < 0) { + ubi_err(ubi, "cannot accept more %zd bytes of data, error %d", + count, err); + return err; + } + + if (err) { + /* + * The operation is finished, @err contains number of actually + * written bytes. + */ + count = err; + + if (vol->changing_leb) { + revoke_exclusive(desc, UBI_READWRITE); + return count; + } + + err = ubi_check_volume(ubi, vol->vol_id); + if (err < 0) + return err; + + if (err) { + ubi_warn(ubi, "volume %d on UBI device %d is corrupted", + vol->vol_id, ubi->ubi_num); + vol->corrupted = 1; + } + vol->checked = 1; + ubi_volume_notify(ubi, vol, UBI_VOLUME_UPDATED); + revoke_exclusive(desc, UBI_READWRITE); + } + + return count; +} + +static long vol_cdev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int err = 0; + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + void __user *argp = (void __user *)arg; + + switch (cmd) { + /* Volume update command */ + case UBI_IOCVOLUP: + { + int64_t bytes, rsvd_bytes; + + if (!capable(CAP_SYS_RESOURCE)) { + err = -EPERM; + break; + } + + err = copy_from_user(&bytes, argp, sizeof(int64_t)); + if (err) { + err = -EFAULT; + break; + } + + if (desc->mode == UBI_READONLY) { + err = -EROFS; + break; + } + + rsvd_bytes = (long long)vol->reserved_pebs * + ubi->leb_size-vol->data_pad; + if (bytes < 0 || bytes > rsvd_bytes) { + err = -EINVAL; + break; + } + + err = get_exclusive(desc); + if (err < 0) + break; + + err = ubi_start_update(ubi, vol, bytes); + if (bytes == 0) { + ubi_volume_notify(ubi, vol, UBI_VOLUME_UPDATED); + revoke_exclusive(desc, UBI_READWRITE); + } + break; + } + + /* Atomic logical eraseblock change command */ + case UBI_IOCEBCH: + { + struct ubi_leb_change_req req; + + err = copy_from_user(&req, argp, + sizeof(struct ubi_leb_change_req)); + if (err) { + err = -EFAULT; + break; + } + + if (desc->mode == UBI_READONLY || + vol->vol_type == UBI_STATIC_VOLUME) { + err = -EROFS; + break; + } + + /* Validate the request */ + err = -EINVAL; + if (req.lnum < 0 || req.lnum >= vol->reserved_pebs || + req.bytes < 0 || req.bytes > vol->usable_leb_size) + break; + + err = get_exclusive(desc); + if (err < 0) + break; + + err = ubi_start_leb_change(ubi, vol, &req); + if (req.bytes == 0) + revoke_exclusive(desc, UBI_READWRITE); + break; + } + + /* Logical eraseblock erasure command */ + case UBI_IOCEBER: + { + int32_t lnum; + + err = get_user(lnum, (__user int32_t *)argp); + if (err) { + err = -EFAULT; + break; + } + + if (desc->mode == UBI_READONLY || + vol->vol_type == UBI_STATIC_VOLUME) { + err = -EROFS; + break; + } + + if (lnum < 0 || lnum >= vol->reserved_pebs) { + err = -EINVAL; + break; + } + + dbg_gen("erase LEB %d:%d", vol->vol_id, lnum); + err = ubi_eba_unmap_leb(ubi, vol, lnum); + if (err) + break; + + err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL); + break; + } + + /* Logical eraseblock map command */ + case UBI_IOCEBMAP: + { + struct ubi_map_req req; + + err = copy_from_user(&req, argp, sizeof(struct ubi_map_req)); + if (err) { + err = -EFAULT; + break; + } + err = ubi_leb_map(desc, req.lnum); + break; + } + + /* Logical eraseblock un-map command */ + case UBI_IOCEBUNMAP: + { + int32_t lnum; + + err = get_user(lnum, (__user int32_t *)argp); + if (err) { + err = -EFAULT; + break; + } + err = ubi_leb_unmap(desc, lnum); + break; + } + + /* Check if logical eraseblock is mapped command */ + case UBI_IOCEBISMAP: + { + int32_t lnum; + + err = get_user(lnum, (__user int32_t *)argp); + if (err) { + err = -EFAULT; + break; + } + err = ubi_is_mapped(desc, lnum); + break; + } + + /* Set volume property command */ + case UBI_IOCSETVOLPROP: + { + struct ubi_set_vol_prop_req req; + + err = copy_from_user(&req, argp, + sizeof(struct ubi_set_vol_prop_req)); + if (err) { + err = -EFAULT; + break; + } + switch (req.property) { + case UBI_VOL_PROP_DIRECT_WRITE: + mutex_lock(&ubi->device_mutex); + desc->vol->direct_writes = !!req.value; + mutex_unlock(&ubi->device_mutex); + break; + default: + err = -EINVAL; + break; + } + break; + } + + /* Create a R/O block device on top of the UBI volume */ + case UBI_IOCVOLCRBLK: + { + struct ubi_volume_info vi; + + ubi_get_volume_info(desc, &vi); + err = ubiblock_create(&vi); + break; + } + + /* Remove the R/O block device */ + case UBI_IOCVOLRMBLK: + { + struct ubi_volume_info vi; + + ubi_get_volume_info(desc, &vi); + err = ubiblock_remove(&vi); + break; + } + + default: + err = -ENOTTY; + break; + } + return err; +} + +/** + * verify_mkvol_req - verify volume creation request. + * @ubi: UBI device description object + * @req: the request to check + * + * This function zero if the request is correct, and %-EINVAL if not. + */ +static int verify_mkvol_req(const struct ubi_device *ubi, + const struct ubi_mkvol_req *req) +{ + int n, err = -EINVAL; + + if (req->bytes < 0 || req->alignment < 0 || req->vol_type < 0 || + req->name_len < 0) + goto bad; + + if ((req->vol_id < 0 || req->vol_id >= ubi->vtbl_slots) && + req->vol_id != UBI_VOL_NUM_AUTO) + goto bad; + + if (req->alignment == 0) + goto bad; + + if (req->bytes == 0) + goto bad; + + if (req->vol_type != UBI_DYNAMIC_VOLUME && + req->vol_type != UBI_STATIC_VOLUME) + goto bad; + + if (req->alignment > ubi->leb_size) + goto bad; + + n = req->alignment & (ubi->min_io_size - 1); + if (req->alignment != 1 && n) + goto bad; + + if (!req->name[0] || !req->name_len) + goto bad; + + if (req->name_len > UBI_VOL_NAME_MAX) { + err = -ENAMETOOLONG; + goto bad; + } + + n = strnlen(req->name, req->name_len + 1); + if (n != req->name_len) + goto bad; + + return 0; + +bad: + ubi_err(ubi, "bad volume creation request"); + ubi_dump_mkvol_req(req); + return err; +} + +/** + * verify_rsvol_req - verify volume re-size request. + * @ubi: UBI device description object + * @req: the request to check + * + * This function returns zero if the request is correct, and %-EINVAL if not. + */ +static int verify_rsvol_req(const struct ubi_device *ubi, + const struct ubi_rsvol_req *req) +{ + if (req->bytes <= 0) + return -EINVAL; + + if (req->vol_id < 0 || req->vol_id >= ubi->vtbl_slots) + return -EINVAL; + + return 0; +} + +/** + * rename_volumes - rename UBI volumes. + * @ubi: UBI device description object + * @req: volumes re-name request + * + * This is a helper function for the volume re-name IOCTL which validates the + * the request, opens the volume and calls corresponding volumes management + * function. Returns zero in case of success and a negative error code in case + * of failure. + */ +static int rename_volumes(struct ubi_device *ubi, + struct ubi_rnvol_req *req) +{ + int i, n, err; + struct list_head rename_list; + struct ubi_rename_entry *re, *re1; + + if (req->count < 0 || req->count > UBI_MAX_RNVOL) + return -EINVAL; + + if (req->count == 0) + return 0; + + /* Validate volume IDs and names in the request */ + for (i = 0; i < req->count; i++) { + if (req->ents[i].vol_id < 0 || + req->ents[i].vol_id >= ubi->vtbl_slots) + return -EINVAL; + if (req->ents[i].name_len < 0) + return -EINVAL; + if (req->ents[i].name_len > UBI_VOL_NAME_MAX) + return -ENAMETOOLONG; + req->ents[i].name[req->ents[i].name_len] = '\0'; + n = strlen(req->ents[i].name); + if (n != req->ents[i].name_len) + return -EINVAL; + } + + /* Make sure volume IDs and names are unique */ + for (i = 0; i < req->count - 1; i++) { + for (n = i + 1; n < req->count; n++) { + if (req->ents[i].vol_id == req->ents[n].vol_id) { + ubi_err(ubi, "duplicated volume id %d", + req->ents[i].vol_id); + return -EINVAL; + } + if (!strcmp(req->ents[i].name, req->ents[n].name)) { + ubi_err(ubi, "duplicated volume name \"%s\"", + req->ents[i].name); + return -EINVAL; + } + } + } + + /* Create the re-name list */ + INIT_LIST_HEAD(&rename_list); + for (i = 0; i < req->count; i++) { + int vol_id = req->ents[i].vol_id; + int name_len = req->ents[i].name_len; + const char *name = req->ents[i].name; + + re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); + if (!re) { + err = -ENOMEM; + goto out_free; + } + + re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_METAONLY); + if (IS_ERR(re->desc)) { + err = PTR_ERR(re->desc); + ubi_err(ubi, "cannot open volume %d, error %d", + vol_id, err); + kfree(re); + goto out_free; + } + + /* Skip this re-naming if the name does not really change */ + if (re->desc->vol->name_len == name_len && + !memcmp(re->desc->vol->name, name, name_len)) { + ubi_close_volume(re->desc); + kfree(re); + continue; + } + + re->new_name_len = name_len; + memcpy(re->new_name, name, name_len); + list_add_tail(&re->list, &rename_list); + dbg_gen("will rename volume %d from \"%s\" to \"%s\"", + vol_id, re->desc->vol->name, name); + } + + if (list_empty(&rename_list)) + return 0; + + /* Find out the volumes which have to be removed */ + list_for_each_entry(re, &rename_list, list) { + struct ubi_volume_desc *desc; + int no_remove_needed = 0; + + /* + * Volume @re->vol_id is going to be re-named to + * @re->new_name, while its current name is @name. If a volume + * with name @re->new_name currently exists, it has to be + * removed, unless it is also re-named in the request (@req). + */ + list_for_each_entry(re1, &rename_list, list) { + if (re->new_name_len == re1->desc->vol->name_len && + !memcmp(re->new_name, re1->desc->vol->name, + re1->desc->vol->name_len)) { + no_remove_needed = 1; + break; + } + } + + if (no_remove_needed) + continue; + + /* + * It seems we need to remove volume with name @re->new_name, + * if it exists. + */ + desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, + UBI_EXCLUSIVE); + if (IS_ERR(desc)) { + err = PTR_ERR(desc); + if (err == -ENODEV) + /* Re-naming into a non-existing volume name */ + continue; + + /* The volume exists but busy, or an error occurred */ + ubi_err(ubi, "cannot open volume \"%s\", error %d", + re->new_name, err); + goto out_free; + } + + re1 = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); + if (!re1) { + err = -ENOMEM; + ubi_close_volume(desc); + goto out_free; + } + + re1->remove = 1; + re1->desc = desc; + list_add(&re1->list, &rename_list); + dbg_gen("will remove volume %d, name \"%s\"", + re1->desc->vol->vol_id, re1->desc->vol->name); + } + + mutex_lock(&ubi->device_mutex); + err = ubi_rename_volumes(ubi, &rename_list); + mutex_unlock(&ubi->device_mutex); + +out_free: + list_for_each_entry_safe(re, re1, &rename_list, list) { + ubi_close_volume(re->desc); + list_del(&re->list); + kfree(re); + } + return err; +} + +static long ubi_cdev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int err = 0; + struct ubi_device *ubi; + struct ubi_volume_desc *desc; + void __user *argp = (void __user *)arg; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + + ubi = ubi_get_by_major(imajor(file->f_mapping->host)); + if (!ubi) + return -ENODEV; + + switch (cmd) { + /* Create volume command */ + case UBI_IOCMKVOL: + { + struct ubi_mkvol_req req; + + dbg_gen("create volume"); + err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req)); + if (err) { + err = -EFAULT; + break; + } + + err = verify_mkvol_req(ubi, &req); + if (err) + break; + + mutex_lock(&ubi->device_mutex); + err = ubi_create_volume(ubi, &req); + mutex_unlock(&ubi->device_mutex); + if (err) + break; + + err = put_user(req.vol_id, (__user int32_t *)argp); + if (err) + err = -EFAULT; + + break; + } + + /* Remove volume command */ + case UBI_IOCRMVOL: + { + int vol_id; + + dbg_gen("remove volume"); + err = get_user(vol_id, (__user int32_t *)argp); + if (err) { + err = -EFAULT; + break; + } + + desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE); + if (IS_ERR(desc)) { + err = PTR_ERR(desc); + break; + } + + mutex_lock(&ubi->device_mutex); + err = ubi_remove_volume(desc, 0); + mutex_unlock(&ubi->device_mutex); + + /* + * The volume is deleted (unless an error occurred), and the + * 'struct ubi_volume' object will be freed when + * 'ubi_close_volume()' will call 'put_device()'. + */ + ubi_close_volume(desc); + break; + } + + /* Re-size volume command */ + case UBI_IOCRSVOL: + { + int pebs; + struct ubi_rsvol_req req; + + dbg_gen("re-size volume"); + err = copy_from_user(&req, argp, sizeof(struct ubi_rsvol_req)); + if (err) { + err = -EFAULT; + break; + } + + err = verify_rsvol_req(ubi, &req); + if (err) + break; + + desc = ubi_open_volume(ubi->ubi_num, req.vol_id, UBI_EXCLUSIVE); + if (IS_ERR(desc)) { + err = PTR_ERR(desc); + break; + } + + pebs = div_u64(req.bytes + desc->vol->usable_leb_size - 1, + desc->vol->usable_leb_size); + + mutex_lock(&ubi->device_mutex); + err = ubi_resize_volume(desc, pebs); + mutex_unlock(&ubi->device_mutex); + ubi_close_volume(desc); + break; + } + + /* Re-name volumes command */ + case UBI_IOCRNVOL: + { + struct ubi_rnvol_req *req; + + dbg_gen("re-name volumes"); + req = kmalloc(sizeof(struct ubi_rnvol_req), GFP_KERNEL); + if (!req) { + err = -ENOMEM; + break; + }; + + err = copy_from_user(req, argp, sizeof(struct ubi_rnvol_req)); + if (err) { + err = -EFAULT; + kfree(req); + break; + } + + err = rename_volumes(ubi, req); + kfree(req); + break; + } + + default: + err = -ENOTTY; + break; + } + + ubi_put_device(ubi); + return err; +} + +static long ctrl_cdev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int err = 0; + void __user *argp = (void __user *)arg; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + + switch (cmd) { + /* Attach an MTD device command */ + case UBI_IOCATT: + { + struct ubi_attach_req req; + struct mtd_info *mtd; + + dbg_gen("attach MTD device"); + err = copy_from_user(&req, argp, sizeof(struct ubi_attach_req)); + if (err) { + err = -EFAULT; + break; + } + + if (req.mtd_num < 0 || + (req.ubi_num < 0 && req.ubi_num != UBI_DEV_NUM_AUTO)) { + err = -EINVAL; + break; + } + + mtd = get_mtd_device(NULL, req.mtd_num); + if (IS_ERR(mtd)) { + err = PTR_ERR(mtd); + break; + } + + /* + * Note, further request verification is done by + * 'ubi_attach_mtd_dev()'. + */ + mutex_lock(&ubi_devices_mutex); + err = ubi_attach_mtd_dev(mtd, req.ubi_num, req.vid_hdr_offset, + req.max_beb_per1024); + mutex_unlock(&ubi_devices_mutex); + if (err < 0) + put_mtd_device(mtd); + else + /* @err contains UBI device number */ + err = put_user(err, (__user int32_t *)argp); + + break; + } + + /* Detach an MTD device command */ + case UBI_IOCDET: + { + int ubi_num; + + dbg_gen("detach MTD device"); + err = get_user(ubi_num, (__user int32_t *)argp); + if (err) { + err = -EFAULT; + break; + } + + mutex_lock(&ubi_devices_mutex); + err = ubi_detach_mtd_dev(ubi_num, 0); + mutex_unlock(&ubi_devices_mutex); + break; + } + + default: + err = -ENOTTY; + break; + } + + return err; +} + +#ifdef CONFIG_COMPAT +static long vol_cdev_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + unsigned long translated_arg = (unsigned long)compat_ptr(arg); + + return vol_cdev_ioctl(file, cmd, translated_arg); +} + +static long ubi_cdev_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + unsigned long translated_arg = (unsigned long)compat_ptr(arg); + + return ubi_cdev_ioctl(file, cmd, translated_arg); +} + +static long ctrl_cdev_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + unsigned long translated_arg = (unsigned long)compat_ptr(arg); + + return ctrl_cdev_ioctl(file, cmd, translated_arg); +} +#else +#define vol_cdev_compat_ioctl NULL +#define ubi_cdev_compat_ioctl NULL +#define ctrl_cdev_compat_ioctl NULL +#endif + +/* UBI volume character device operations */ +const struct file_operations ubi_vol_cdev_operations = { + .owner = THIS_MODULE, + .open = vol_cdev_open, + .release = vol_cdev_release, + .llseek = vol_cdev_llseek, + .read = vol_cdev_read, + .write = vol_cdev_write, + .fsync = vol_cdev_fsync, + .unlocked_ioctl = vol_cdev_ioctl, + .compat_ioctl = vol_cdev_compat_ioctl, +}; + +/* UBI character device operations */ +const struct file_operations ubi_cdev_operations = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .unlocked_ioctl = ubi_cdev_ioctl, + .compat_ioctl = ubi_cdev_compat_ioctl, +}; + +/* UBI control character device operations */ +const struct file_operations ubi_ctrl_cdev_operations = { + .owner = THIS_MODULE, + .unlocked_ioctl = ctrl_cdev_ioctl, + .compat_ioctl = ctrl_cdev_compat_ioctl, + .llseek = no_llseek, +}; diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c new file mode 100644 index 000000000..b077e43b5 --- /dev/null +++ b/drivers/mtd/ubi/debug.c @@ -0,0 +1,545 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +#include "ubi.h" +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/module.h> + + +/** + * ubi_dump_flash - dump a region of flash. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to dump + * @offset: the starting offset within the physical eraseblock to dump + * @len: the length of the region to dump + */ +void ubi_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len) +{ + int err; + size_t read; + void *buf; + loff_t addr = (loff_t)pnum * ubi->peb_size + offset; + + buf = vmalloc(len); + if (!buf) + return; + err = mtd_read(ubi->mtd, addr, len, &read, buf); + if (err && err != -EUCLEAN) { + ubi_err(ubi, "err %d while reading %d bytes from PEB %d:%d, read %zd bytes", + err, len, pnum, offset, read); + goto out; + } + + ubi_msg(ubi, "dumping %d bytes of data from PEB %d, offset %d", + len, pnum, offset); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1); +out: + vfree(buf); + return; +} + +/** + * ubi_dump_ec_hdr - dump an erase counter header. + * @ec_hdr: the erase counter header to dump + */ +void ubi_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) +{ + pr_err("Erase counter header dump:\n"); + pr_err("\tmagic %#08x\n", be32_to_cpu(ec_hdr->magic)); + pr_err("\tversion %d\n", (int)ec_hdr->version); + pr_err("\tec %llu\n", (long long)be64_to_cpu(ec_hdr->ec)); + pr_err("\tvid_hdr_offset %d\n", be32_to_cpu(ec_hdr->vid_hdr_offset)); + pr_err("\tdata_offset %d\n", be32_to_cpu(ec_hdr->data_offset)); + pr_err("\timage_seq %d\n", be32_to_cpu(ec_hdr->image_seq)); + pr_err("\thdr_crc %#08x\n", be32_to_cpu(ec_hdr->hdr_crc)); + pr_err("erase counter header hexdump:\n"); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + ec_hdr, UBI_EC_HDR_SIZE, 1); +} + +/** + * ubi_dump_vid_hdr - dump a volume identifier header. + * @vid_hdr: the volume identifier header to dump + */ +void ubi_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) +{ + pr_err("Volume identifier header dump:\n"); + pr_err("\tmagic %08x\n", be32_to_cpu(vid_hdr->magic)); + pr_err("\tversion %d\n", (int)vid_hdr->version); + pr_err("\tvol_type %d\n", (int)vid_hdr->vol_type); + pr_err("\tcopy_flag %d\n", (int)vid_hdr->copy_flag); + pr_err("\tcompat %d\n", (int)vid_hdr->compat); + pr_err("\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); + pr_err("\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); + pr_err("\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); + pr_err("\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); + pr_err("\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); + pr_err("\tsqnum %llu\n", + (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); + pr_err("\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc)); + pr_err("Volume identifier header hexdump:\n"); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + vid_hdr, UBI_VID_HDR_SIZE, 1); +} + +/** + * ubi_dump_vol_info - dump volume information. + * @vol: UBI volume description object + */ +void ubi_dump_vol_info(const struct ubi_volume *vol) +{ + pr_err("Volume information dump:\n"); + pr_err("\tvol_id %d\n", vol->vol_id); + pr_err("\treserved_pebs %d\n", vol->reserved_pebs); + pr_err("\talignment %d\n", vol->alignment); + pr_err("\tdata_pad %d\n", vol->data_pad); + pr_err("\tvol_type %d\n", vol->vol_type); + pr_err("\tname_len %d\n", vol->name_len); + pr_err("\tusable_leb_size %d\n", vol->usable_leb_size); + pr_err("\tused_ebs %d\n", vol->used_ebs); + pr_err("\tused_bytes %lld\n", vol->used_bytes); + pr_err("\tlast_eb_bytes %d\n", vol->last_eb_bytes); + pr_err("\tcorrupted %d\n", vol->corrupted); + pr_err("\tupd_marker %d\n", vol->upd_marker); + + if (vol->name_len <= UBI_VOL_NAME_MAX && + strnlen(vol->name, vol->name_len + 1) == vol->name_len) { + pr_err("\tname %s\n", vol->name); + } else { + pr_err("\t1st 5 characters of name: %c%c%c%c%c\n", + vol->name[0], vol->name[1], vol->name[2], + vol->name[3], vol->name[4]); + } +} + +/** + * ubi_dump_vtbl_record - dump a &struct ubi_vtbl_record object. + * @r: the object to dump + * @idx: volume table index + */ +void ubi_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) +{ + int name_len = be16_to_cpu(r->name_len); + + pr_err("Volume table record %d dump:\n", idx); + pr_err("\treserved_pebs %d\n", be32_to_cpu(r->reserved_pebs)); + pr_err("\talignment %d\n", be32_to_cpu(r->alignment)); + pr_err("\tdata_pad %d\n", be32_to_cpu(r->data_pad)); + pr_err("\tvol_type %d\n", (int)r->vol_type); + pr_err("\tupd_marker %d\n", (int)r->upd_marker); + pr_err("\tname_len %d\n", name_len); + + if (r->name[0] == '\0') { + pr_err("\tname NULL\n"); + return; + } + + if (name_len <= UBI_VOL_NAME_MAX && + strnlen(&r->name[0], name_len + 1) == name_len) { + pr_err("\tname %s\n", &r->name[0]); + } else { + pr_err("\t1st 5 characters of name: %c%c%c%c%c\n", + r->name[0], r->name[1], r->name[2], r->name[3], + r->name[4]); + } + pr_err("\tcrc %#08x\n", be32_to_cpu(r->crc)); +} + +/** + * ubi_dump_av - dump a &struct ubi_ainf_volume object. + * @av: the object to dump + */ +void ubi_dump_av(const struct ubi_ainf_volume *av) +{ + pr_err("Volume attaching information dump:\n"); + pr_err("\tvol_id %d\n", av->vol_id); + pr_err("\thighest_lnum %d\n", av->highest_lnum); + pr_err("\tleb_count %d\n", av->leb_count); + pr_err("\tcompat %d\n", av->compat); + pr_err("\tvol_type %d\n", av->vol_type); + pr_err("\tused_ebs %d\n", av->used_ebs); + pr_err("\tlast_data_size %d\n", av->last_data_size); + pr_err("\tdata_pad %d\n", av->data_pad); +} + +/** + * ubi_dump_aeb - dump a &struct ubi_ainf_peb object. + * @aeb: the object to dump + * @type: object type: 0 - not corrupted, 1 - corrupted + */ +void ubi_dump_aeb(const struct ubi_ainf_peb *aeb, int type) +{ + pr_err("eraseblock attaching information dump:\n"); + pr_err("\tec %d\n", aeb->ec); + pr_err("\tpnum %d\n", aeb->pnum); + if (type == 0) { + pr_err("\tlnum %d\n", aeb->lnum); + pr_err("\tscrub %d\n", aeb->scrub); + pr_err("\tsqnum %llu\n", aeb->sqnum); + } +} + +/** + * ubi_dump_mkvol_req - dump a &struct ubi_mkvol_req object. + * @req: the object to dump + */ +void ubi_dump_mkvol_req(const struct ubi_mkvol_req *req) +{ + char nm[17]; + + pr_err("Volume creation request dump:\n"); + pr_err("\tvol_id %d\n", req->vol_id); + pr_err("\talignment %d\n", req->alignment); + pr_err("\tbytes %lld\n", (long long)req->bytes); + pr_err("\tvol_type %d\n", req->vol_type); + pr_err("\tname_len %d\n", req->name_len); + + memcpy(nm, req->name, 16); + nm[16] = 0; + pr_err("\t1st 16 characters of name: %s\n", nm); +} + +/* + * Root directory for UBI stuff in debugfs. Contains sub-directories which + * contain the stuff specific to particular UBI devices. + */ +static struct dentry *dfs_rootdir; + +/** + * ubi_debugfs_init - create UBI debugfs directory. + * + * Create UBI debugfs directory. Returns zero in case of success and a negative + * error code in case of failure. + */ +int ubi_debugfs_init(void) +{ + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + + dfs_rootdir = debugfs_create_dir("ubi", NULL); + if (IS_ERR_OR_NULL(dfs_rootdir)) { + int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir); + + pr_err("UBI error: cannot create \"ubi\" debugfs directory, error %d\n", + err); + return err; + } + + return 0; +} + +/** + * ubi_debugfs_exit - remove UBI debugfs directory. + */ +void ubi_debugfs_exit(void) +{ + if (IS_ENABLED(CONFIG_DEBUG_FS)) + debugfs_remove(dfs_rootdir); +} + +/* Read an UBI debugfs file */ +static ssize_t dfs_file_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + unsigned long ubi_num = (unsigned long)file->private_data; + struct dentry *dent = file->f_path.dentry; + struct ubi_device *ubi; + struct ubi_debug_info *d; + char buf[8]; + int val; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + d = &ubi->dbg; + + if (dent == d->dfs_chk_gen) + val = d->chk_gen; + else if (dent == d->dfs_chk_io) + val = d->chk_io; + else if (dent == d->dfs_chk_fastmap) + val = d->chk_fastmap; + else if (dent == d->dfs_disable_bgt) + val = d->disable_bgt; + else if (dent == d->dfs_emulate_bitflips) + val = d->emulate_bitflips; + else if (dent == d->dfs_emulate_io_failures) + val = d->emulate_io_failures; + else if (dent == d->dfs_emulate_power_cut) { + snprintf(buf, sizeof(buf), "%u\n", d->emulate_power_cut); + count = simple_read_from_buffer(user_buf, count, ppos, + buf, strlen(buf)); + goto out; + } else if (dent == d->dfs_power_cut_min) { + snprintf(buf, sizeof(buf), "%u\n", d->power_cut_min); + count = simple_read_from_buffer(user_buf, count, ppos, + buf, strlen(buf)); + goto out; + } else if (dent == d->dfs_power_cut_max) { + snprintf(buf, sizeof(buf), "%u\n", d->power_cut_max); + count = simple_read_from_buffer(user_buf, count, ppos, + buf, strlen(buf)); + goto out; + } + else { + count = -EINVAL; + goto out; + } + + if (val) + buf[0] = '1'; + else + buf[0] = '0'; + buf[1] = '\n'; + buf[2] = 0x00; + + count = simple_read_from_buffer(user_buf, count, ppos, buf, 2); + +out: + ubi_put_device(ubi); + return count; +} + +/* Write an UBI debugfs file */ +static ssize_t dfs_file_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + unsigned long ubi_num = (unsigned long)file->private_data; + struct dentry *dent = file->f_path.dentry; + struct ubi_device *ubi; + struct ubi_debug_info *d; + size_t buf_size; + char buf[8] = {0}; + int val; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + d = &ubi->dbg; + + buf_size = min_t(size_t, count, (sizeof(buf) - 1)); + if (copy_from_user(buf, user_buf, buf_size)) { + count = -EFAULT; + goto out; + } + + if (dent == d->dfs_power_cut_min) { + if (kstrtouint(buf, 0, &d->power_cut_min) != 0) + count = -EINVAL; + goto out; + } else if (dent == d->dfs_power_cut_max) { + if (kstrtouint(buf, 0, &d->power_cut_max) != 0) + count = -EINVAL; + goto out; + } else if (dent == d->dfs_emulate_power_cut) { + if (kstrtoint(buf, 0, &val) != 0) + count = -EINVAL; + d->emulate_power_cut = val; + goto out; + } + + if (buf[0] == '1') + val = 1; + else if (buf[0] == '0') + val = 0; + else { + count = -EINVAL; + goto out; + } + + if (dent == d->dfs_chk_gen) + d->chk_gen = val; + else if (dent == d->dfs_chk_io) + d->chk_io = val; + else if (dent == d->dfs_chk_fastmap) + d->chk_fastmap = val; + else if (dent == d->dfs_disable_bgt) + d->disable_bgt = val; + else if (dent == d->dfs_emulate_bitflips) + d->emulate_bitflips = val; + else if (dent == d->dfs_emulate_io_failures) + d->emulate_io_failures = val; + else + count = -EINVAL; + +out: + ubi_put_device(ubi); + return count; +} + +/* File operations for all UBI debugfs files */ +static const struct file_operations dfs_fops = { + .read = dfs_file_read, + .write = dfs_file_write, + .open = simple_open, + .llseek = no_llseek, + .owner = THIS_MODULE, +}; + +/** + * ubi_debugfs_init_dev - initialize debugfs for an UBI device. + * @ubi: UBI device description object + * + * This function creates all debugfs files for UBI device @ubi. Returns zero in + * case of success and a negative error code in case of failure. + */ +int ubi_debugfs_init_dev(struct ubi_device *ubi) +{ + int err, n; + unsigned long ubi_num = ubi->ubi_num; + const char *fname; + struct dentry *dent; + struct ubi_debug_info *d = &ubi->dbg; + + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + + n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME, + ubi->ubi_num); + if (n == UBI_DFS_DIR_LEN) { + /* The array size is too small */ + fname = UBI_DFS_DIR_NAME; + dent = ERR_PTR(-EINVAL); + goto out; + } + + fname = d->dfs_dir_name; + dent = debugfs_create_dir(fname, dfs_rootdir); + if (IS_ERR_OR_NULL(dent)) + goto out; + d->dfs_dir = dent; + + fname = "chk_gen"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_gen = dent; + + fname = "chk_io"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_io = dent; + + fname = "chk_fastmap"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_fastmap = dent; + + fname = "tst_disable_bgt"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_disable_bgt = dent; + + fname = "tst_emulate_bitflips"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_emulate_bitflips = dent; + + fname = "tst_emulate_io_failures"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_emulate_io_failures = dent; + + fname = "tst_emulate_power_cut"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_emulate_power_cut = dent; + + fname = "tst_emulate_power_cut_min"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_power_cut_min = dent; + + fname = "tst_emulate_power_cut_max"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_power_cut_max = dent; + + return 0; + +out_remove: + debugfs_remove_recursive(d->dfs_dir); +out: + err = dent ? PTR_ERR(dent) : -ENODEV; + ubi_err(ubi, "cannot create \"%s\" debugfs file or directory, error %d\n", + fname, err); + return err; +} + +/** + * dbg_debug_exit_dev - free all debugfs files corresponding to device @ubi + * @ubi: UBI device description object + */ +void ubi_debugfs_exit_dev(struct ubi_device *ubi) +{ + if (IS_ENABLED(CONFIG_DEBUG_FS)) + debugfs_remove_recursive(ubi->dbg.dfs_dir); +} + +/** + * ubi_dbg_power_cut - emulate a power cut if it is time to do so + * @ubi: UBI device description object + * @caller: Flags set to indicate from where the function is being called + * + * Returns non-zero if a power cut was emulated, zero if not. + */ +int ubi_dbg_power_cut(struct ubi_device *ubi, int caller) +{ + unsigned int range; + + if ((ubi->dbg.emulate_power_cut & caller) == 0) + return 0; + + if (ubi->dbg.power_cut_counter == 0) { + ubi->dbg.power_cut_counter = ubi->dbg.power_cut_min; + + if (ubi->dbg.power_cut_max > ubi->dbg.power_cut_min) { + range = ubi->dbg.power_cut_max - ubi->dbg.power_cut_min; + ubi->dbg.power_cut_counter += prandom_u32() % range; + } + return 0; + } + + ubi->dbg.power_cut_counter--; + if (ubi->dbg.power_cut_counter) + return 0; + + ubi_msg(ubi, "XXXXXXXXXXXXXXX emulating a power cut XXXXXXXXXXXXXXXX"); + ubi_ro_mode(ubi); + return 1; +} diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h new file mode 100644 index 000000000..eb8985e5c --- /dev/null +++ b/drivers/mtd/ubi/debug.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +#ifndef __UBI_DEBUG_H__ +#define __UBI_DEBUG_H__ + +void ubi_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len); +void ubi_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr); +void ubi_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr); + +#include <linux/random.h> + +#define ubi_assert(expr) do { \ + if (unlikely(!(expr))) { \ + pr_crit("UBI assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, current->pid); \ + dump_stack(); \ + } \ +} while (0) + +#define ubi_dbg_print_hex_dump(l, ps, pt, r, g, b, len, a) \ + print_hex_dump(l, ps, pt, r, g, b, len, a) + +#define ubi_dbg_msg(type, fmt, ...) \ + pr_debug("UBI DBG " type " (pid %d): " fmt "\n", current->pid, \ + ##__VA_ARGS__) + +/* General debugging messages */ +#define dbg_gen(fmt, ...) ubi_dbg_msg("gen", fmt, ##__VA_ARGS__) +/* Messages from the eraseblock association sub-system */ +#define dbg_eba(fmt, ...) ubi_dbg_msg("eba", fmt, ##__VA_ARGS__) +/* Messages from the wear-leveling sub-system */ +#define dbg_wl(fmt, ...) ubi_dbg_msg("wl", fmt, ##__VA_ARGS__) +/* Messages from the input/output sub-system */ +#define dbg_io(fmt, ...) ubi_dbg_msg("io", fmt, ##__VA_ARGS__) +/* Initialization and build messages */ +#define dbg_bld(fmt, ...) ubi_dbg_msg("bld", fmt, ##__VA_ARGS__) + +void ubi_dump_vol_info(const struct ubi_volume *vol); +void ubi_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx); +void ubi_dump_av(const struct ubi_ainf_volume *av); +void ubi_dump_aeb(const struct ubi_ainf_peb *aeb, int type); +void ubi_dump_mkvol_req(const struct ubi_mkvol_req *req); +int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, + int len); +int ubi_debugfs_init(void); +void ubi_debugfs_exit(void); +int ubi_debugfs_init_dev(struct ubi_device *ubi); +void ubi_debugfs_exit_dev(struct ubi_device *ubi); + +/** + * ubi_dbg_is_bgt_disabled - if the background thread is disabled. + * @ubi: UBI device description object + * + * Returns non-zero if the UBI background thread is disabled for testing + * purposes. + */ +static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) +{ + return ubi->dbg.disable_bgt; +} + +/** + * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip. + * @ubi: UBI device description object + * + * Returns non-zero if a bit-flip should be emulated, otherwise returns zero. + */ +static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) +{ + if (ubi->dbg.emulate_bitflips) + return !(prandom_u32() % 200); + return 0; +} + +/** + * ubi_dbg_is_write_failure - if it is time to emulate a write failure. + * @ubi: UBI device description object + * + * Returns non-zero if a write failure should be emulated, otherwise returns + * zero. + */ +static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) +{ + if (ubi->dbg.emulate_io_failures) + return !(prandom_u32() % 500); + return 0; +} + +/** + * ubi_dbg_is_erase_failure - if its time to emulate an erase failure. + * @ubi: UBI device description object + * + * Returns non-zero if an erase failure should be emulated, otherwise returns + * zero. + */ +static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) +{ + if (ubi->dbg.emulate_io_failures) + return !(prandom_u32() % 400); + return 0; +} + +static inline int ubi_dbg_chk_io(const struct ubi_device *ubi) +{ + return ubi->dbg.chk_io; +} + +static inline int ubi_dbg_chk_gen(const struct ubi_device *ubi) +{ + return ubi->dbg.chk_gen; +} + +static inline int ubi_dbg_chk_fastmap(const struct ubi_device *ubi) +{ + return ubi->dbg.chk_fastmap; +} + +static inline void ubi_enable_dbg_chk_fastmap(struct ubi_device *ubi) +{ + ubi->dbg.chk_fastmap = 1; +} + +int ubi_dbg_power_cut(struct ubi_device *ubi, int caller); +#endif /* !__UBI_DEBUG_H__ */ diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c new file mode 100644 index 000000000..51bca035c --- /dev/null +++ b/drivers/mtd/ubi/eba.c @@ -0,0 +1,1477 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * The UBI Eraseblock Association (EBA) sub-system. + * + * This sub-system is responsible for I/O to/from logical eraseblock. + * + * Although in this implementation the EBA table is fully kept and managed in + * RAM, which assumes poor scalability, it might be (partially) maintained on + * flash in future implementations. + * + * The EBA sub-system implements per-logical eraseblock locking. Before + * accessing a logical eraseblock it is locked for reading or writing. The + * per-logical eraseblock locking is implemented by means of the lock tree. The + * lock tree is an RB-tree which refers all the currently locked logical + * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects. + * They are indexed by (@vol_id, @lnum) pairs. + * + * EBA also maintains the global sequence counter which is incremented each + * time a logical eraseblock is mapped to a physical eraseblock and it is + * stored in the volume identifier header. This means that each VID header has + * a unique sequence number. The sequence number is only increased an we assume + * 64 bits is enough to never overflow. + */ + +#include <linux/slab.h> +#include <linux/crc32.h> +#include <linux/err.h> +#include "ubi.h" + +/* Number of physical eraseblocks reserved for atomic LEB change operation */ +#define EBA_RESERVED_PEBS 1 + +/** + * next_sqnum - get next sequence number. + * @ubi: UBI device description object + * + * This function returns next sequence number to use, which is just the current + * global sequence counter value. It also increases the global sequence + * counter. + */ +unsigned long long ubi_next_sqnum(struct ubi_device *ubi) +{ + unsigned long long sqnum; + + spin_lock(&ubi->ltree_lock); + sqnum = ubi->global_sqnum++; + spin_unlock(&ubi->ltree_lock); + + return sqnum; +} + +/** + * ubi_get_compat - get compatibility flags of a volume. + * @ubi: UBI device description object + * @vol_id: volume ID + * + * This function returns compatibility flags for an internal volume. User + * volumes have no compatibility flags, so %0 is returned. + */ +static int ubi_get_compat(const struct ubi_device *ubi, int vol_id) +{ + if (vol_id == UBI_LAYOUT_VOLUME_ID) + return UBI_LAYOUT_VOLUME_COMPAT; + return 0; +} + +/** + * ltree_lookup - look up the lock tree. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function returns a pointer to the corresponding &struct ubi_ltree_entry + * object if the logical eraseblock is locked and %NULL if it is not. + * @ubi->ltree_lock has to be locked. + */ +static struct ubi_ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id, + int lnum) +{ + struct rb_node *p; + + p = ubi->ltree.rb_node; + while (p) { + struct ubi_ltree_entry *le; + + le = rb_entry(p, struct ubi_ltree_entry, rb); + + if (vol_id < le->vol_id) + p = p->rb_left; + else if (vol_id > le->vol_id) + p = p->rb_right; + else { + if (lnum < le->lnum) + p = p->rb_left; + else if (lnum > le->lnum) + p = p->rb_right; + else + return le; + } + } + + return NULL; +} + +/** + * ltree_add_entry - add new entry to the lock tree. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the + * lock tree. If such entry is already there, its usage counter is increased. + * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation + * failed. + */ +static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi, + int vol_id, int lnum) +{ + struct ubi_ltree_entry *le, *le1, *le_free; + + le = kmalloc(sizeof(struct ubi_ltree_entry), GFP_NOFS); + if (!le) + return ERR_PTR(-ENOMEM); + + le->users = 0; + init_rwsem(&le->mutex); + le->vol_id = vol_id; + le->lnum = lnum; + + spin_lock(&ubi->ltree_lock); + le1 = ltree_lookup(ubi, vol_id, lnum); + + if (le1) { + /* + * This logical eraseblock is already locked. The newly + * allocated lock entry is not needed. + */ + le_free = le; + le = le1; + } else { + struct rb_node **p, *parent = NULL; + + /* + * No lock entry, add the newly allocated one to the + * @ubi->ltree RB-tree. + */ + le_free = NULL; + + p = &ubi->ltree.rb_node; + while (*p) { + parent = *p; + le1 = rb_entry(parent, struct ubi_ltree_entry, rb); + + if (vol_id < le1->vol_id) + p = &(*p)->rb_left; + else if (vol_id > le1->vol_id) + p = &(*p)->rb_right; + else { + ubi_assert(lnum != le1->lnum); + if (lnum < le1->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + } + + rb_link_node(&le->rb, parent, p); + rb_insert_color(&le->rb, &ubi->ltree); + } + le->users += 1; + spin_unlock(&ubi->ltree_lock); + + kfree(le_free); + return le; +} + +/** + * leb_read_lock - lock logical eraseblock for reading. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function locks a logical eraseblock for reading. Returns zero in case + * of success and a negative error code in case of failure. + */ +static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum) +{ + struct ubi_ltree_entry *le; + + le = ltree_add_entry(ubi, vol_id, lnum); + if (IS_ERR(le)) + return PTR_ERR(le); + down_read(&le->mutex); + return 0; +} + +/** + * leb_read_unlock - unlock logical eraseblock. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + */ +static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) +{ + struct ubi_ltree_entry *le; + + spin_lock(&ubi->ltree_lock); + le = ltree_lookup(ubi, vol_id, lnum); + le->users -= 1; + ubi_assert(le->users >= 0); + up_read(&le->mutex); + if (le->users == 0) { + rb_erase(&le->rb, &ubi->ltree); + kfree(le); + } + spin_unlock(&ubi->ltree_lock); +} + +/** + * leb_write_lock - lock logical eraseblock for writing. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function locks a logical eraseblock for writing. Returns zero in case + * of success and a negative error code in case of failure. + */ +static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) +{ + struct ubi_ltree_entry *le; + + le = ltree_add_entry(ubi, vol_id, lnum); + if (IS_ERR(le)) + return PTR_ERR(le); + down_write(&le->mutex); + return 0; +} + +/** + * leb_write_lock - lock logical eraseblock for writing. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function locks a logical eraseblock for writing if there is no + * contention and does nothing if there is contention. Returns %0 in case of + * success, %1 in case of contention, and and a negative error code in case of + * failure. + */ +static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) +{ + struct ubi_ltree_entry *le; + + le = ltree_add_entry(ubi, vol_id, lnum); + if (IS_ERR(le)) + return PTR_ERR(le); + if (down_write_trylock(&le->mutex)) + return 0; + + /* Contention, cancel */ + spin_lock(&ubi->ltree_lock); + le->users -= 1; + ubi_assert(le->users >= 0); + if (le->users == 0) { + rb_erase(&le->rb, &ubi->ltree); + kfree(le); + } + spin_unlock(&ubi->ltree_lock); + + return 1; +} + +/** + * leb_write_unlock - unlock logical eraseblock. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + */ +static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) +{ + struct ubi_ltree_entry *le; + + spin_lock(&ubi->ltree_lock); + le = ltree_lookup(ubi, vol_id, lnum); + le->users -= 1; + ubi_assert(le->users >= 0); + up_write(&le->mutex); + if (le->users == 0) { + rb_erase(&le->rb, &ubi->ltree); + kfree(le); + } + spin_unlock(&ubi->ltree_lock); +} + +/** + * ubi_eba_unmap_leb - un-map logical eraseblock. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * + * This function un-maps logical eraseblock @lnum and schedules corresponding + * physical eraseblock for erasure. Returns zero in case of success and a + * negative error code in case of failure. + */ +int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum) +{ + int err, pnum, vol_id = vol->vol_id; + + if (ubi->ro_mode) + return -EROFS; + + err = leb_write_lock(ubi, vol_id, lnum); + if (err) + return err; + + pnum = vol->eba_tbl[lnum]; + if (pnum < 0) + /* This logical eraseblock is already unmapped */ + goto out_unlock; + + dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum); + + down_read(&ubi->fm_eba_sem); + vol->eba_tbl[lnum] = UBI_LEB_UNMAPPED; + up_read(&ubi->fm_eba_sem); + err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 0); + +out_unlock: + leb_write_unlock(ubi, vol_id, lnum); + return err; +} + +/** + * ubi_eba_read_leb - read data. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: buffer to store the read data + * @offset: offset from where to read + * @len: how many bytes to read + * @check: data CRC check flag + * + * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF + * bytes. The @check flag only makes sense for static volumes and forces + * eraseblock data CRC checking. + * + * In case of success this function returns zero. In case of a static volume, + * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be + * returned for any volume type if an ECC error was detected by the MTD device + * driver. Other negative error cored may be returned in case of other errors. + */ +int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + void *buf, int offset, int len, int check) +{ + int err, pnum, scrub = 0, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; + uint32_t uninitialized_var(crc); + + err = leb_read_lock(ubi, vol_id, lnum); + if (err) + return err; + + pnum = vol->eba_tbl[lnum]; + if (pnum < 0) { + /* + * The logical eraseblock is not mapped, fill the whole buffer + * with 0xFF bytes. The exception is static volumes for which + * it is an error to read unmapped logical eraseblocks. + */ + dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)", + len, offset, vol_id, lnum); + leb_read_unlock(ubi, vol_id, lnum); + ubi_assert(vol->vol_type != UBI_STATIC_VOLUME); + memset(buf, 0xFF, len); + return 0; + } + + dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d", + len, offset, vol_id, lnum, pnum); + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + check = 0; + +retry: + if (check) { + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) { + err = -ENOMEM; + goto out_unlock; + } + + err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1); + if (err && err != UBI_IO_BITFLIPS) { + if (err > 0) { + /* + * The header is either absent or corrupted. + * The former case means there is a bug - + * switch to read-only mode just in case. + * The latter case means a real corruption - we + * may try to recover data. FIXME: but this is + * not implemented. + */ + if (err == UBI_IO_BAD_HDR_EBADMSG || + err == UBI_IO_BAD_HDR) { + ubi_warn(ubi, "corrupted VID header at PEB %d, LEB %d:%d", + pnum, vol_id, lnum); + err = -EBADMSG; + } else { + err = -EINVAL; + ubi_ro_mode(ubi); + } + } + goto out_free; + } else if (err == UBI_IO_BITFLIPS) + scrub = 1; + + ubi_assert(lnum < be32_to_cpu(vid_hdr->used_ebs)); + ubi_assert(len == be32_to_cpu(vid_hdr->data_size)); + + crc = be32_to_cpu(vid_hdr->data_crc); + ubi_free_vid_hdr(ubi, vid_hdr); + } + + err = ubi_io_read_data(ubi, buf, pnum, offset, len); + if (err) { + if (err == UBI_IO_BITFLIPS) + scrub = 1; + else if (mtd_is_eccerr(err)) { + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + goto out_unlock; + scrub = 1; + if (!check) { + ubi_msg(ubi, "force data checking"); + check = 1; + goto retry; + } + } else + goto out_unlock; + } + + if (check) { + uint32_t crc1 = crc32(UBI_CRC32_INIT, buf, len); + if (crc1 != crc) { + ubi_warn(ubi, "CRC error: calculated %#08x, must be %#08x", + crc1, crc); + err = -EBADMSG; + goto out_unlock; + } + } + + if (scrub) + err = ubi_wl_scrub_peb(ubi, pnum); + + leb_read_unlock(ubi, vol_id, lnum); + return err; + +out_free: + ubi_free_vid_hdr(ubi, vid_hdr); +out_unlock: + leb_read_unlock(ubi, vol_id, lnum); + return err; +} + +/** + * ubi_eba_read_leb_sg - read data into a scatter gather list. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @sgl: UBI scatter gather list to store the read data + * @offset: offset from where to read + * @len: how many bytes to read + * @check: data CRC check flag + * + * This function works exactly like ubi_eba_read_leb(). But instead of + * storing the read data into a buffer it writes to an UBI scatter gather + * list. + */ +int ubi_eba_read_leb_sg(struct ubi_device *ubi, struct ubi_volume *vol, + struct ubi_sgl *sgl, int lnum, int offset, int len, + int check) +{ + int to_read; + int ret; + struct scatterlist *sg; + + for (;;) { + ubi_assert(sgl->list_pos < UBI_MAX_SG_COUNT); + sg = &sgl->sg[sgl->list_pos]; + if (len < sg->length - sgl->page_pos) + to_read = len; + else + to_read = sg->length - sgl->page_pos; + + ret = ubi_eba_read_leb(ubi, vol, lnum, + sg_virt(sg) + sgl->page_pos, offset, + to_read, check); + if (ret < 0) + return ret; + + offset += to_read; + len -= to_read; + if (!len) { + sgl->page_pos += to_read; + if (sgl->page_pos == sg->length) { + sgl->list_pos++; + sgl->page_pos = 0; + } + + break; + } + + sgl->list_pos++; + sgl->page_pos = 0; + } + + return ret; +} + +/** + * recover_peb - recover from write failure. + * @ubi: UBI device description object + * @pnum: the physical eraseblock to recover + * @vol_id: volume ID + * @lnum: logical eraseblock number + * @buf: data which was not written because of the write failure + * @offset: offset of the failed write + * @len: how many bytes should have been written + * + * This function is called in case of a write failure and moves all good data + * from the potentially bad physical eraseblock to a good physical eraseblock. + * This function also writes the data which was not written due to the failure. + * Returns new physical eraseblock number in case of success, and a negative + * error code in case of failure. + */ +static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, + const void *buf, int offset, int len) +{ + int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0; + struct ubi_volume *vol = ubi->volumes[idx]; + struct ubi_vid_hdr *vid_hdr; + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) + return -ENOMEM; + +retry: + new_pnum = ubi_wl_get_peb(ubi); + if (new_pnum < 0) { + ubi_free_vid_hdr(ubi, vid_hdr); + up_read(&ubi->fm_eba_sem); + return new_pnum; + } + + ubi_msg(ubi, "recover PEB %d, move data to PEB %d", + pnum, new_pnum); + + err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1); + if (err && err != UBI_IO_BITFLIPS) { + if (err > 0) + err = -EIO; + up_read(&ubi->fm_eba_sem); + goto out_put; + } + + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr); + if (err) { + up_read(&ubi->fm_eba_sem); + goto write_error; + } + + data_size = offset + len; + mutex_lock(&ubi->buf_mutex); + memset(ubi->peb_buf + offset, 0xFF, len); + + /* Read everything before the area where the write failure happened */ + if (offset > 0) { + err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, offset); + if (err && err != UBI_IO_BITFLIPS) { + up_read(&ubi->fm_eba_sem); + goto out_unlock; + } + } + + memcpy(ubi->peb_buf + offset, buf, len); + + err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size); + if (err) { + mutex_unlock(&ubi->buf_mutex); + up_read(&ubi->fm_eba_sem); + goto write_error; + } + + mutex_unlock(&ubi->buf_mutex); + ubi_free_vid_hdr(ubi, vid_hdr); + + vol->eba_tbl[lnum] = new_pnum; + up_read(&ubi->fm_eba_sem); + ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); + + ubi_msg(ubi, "data was successfully recovered"); + return 0; + +out_unlock: + mutex_unlock(&ubi->buf_mutex); +out_put: + ubi_wl_put_peb(ubi, vol_id, lnum, new_pnum, 1); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + +write_error: + /* + * Bad luck? This physical eraseblock is bad too? Crud. Let's try to + * get another one. + */ + ubi_warn(ubi, "failed to write to PEB %d", new_pnum); + ubi_wl_put_peb(ubi, vol_id, lnum, new_pnum, 1); + if (++tries > UBI_IO_RETRIES) { + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + ubi_msg(ubi, "try again"); + goto retry; +} + +/** + * ubi_eba_write_leb - write data to dynamic volume. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: the data to write + * @offset: offset within the logical eraseblock where to write + * @len: how many bytes to write + * + * This function writes data to logical eraseblock @lnum of a dynamic volume + * @vol. Returns zero in case of success and a negative error code in case + * of failure. In case of error, it is possible that something was still + * written to the flash media, but may be some garbage. + */ +int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + const void *buf, int offset, int len) +{ + int err, pnum, tries = 0, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; + + if (ubi->ro_mode) + return -EROFS; + + err = leb_write_lock(ubi, vol_id, lnum); + if (err) + return err; + + pnum = vol->eba_tbl[lnum]; + if (pnum >= 0) { + dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d", + len, offset, vol_id, lnum, pnum); + + err = ubi_io_write_data(ubi, buf, pnum, offset, len); + if (err) { + ubi_warn(ubi, "failed to write data to PEB %d", pnum); + if (err == -EIO && ubi->bad_allowed) + err = recover_peb(ubi, pnum, vol_id, lnum, buf, + offset, len); + if (err) + ubi_ro_mode(ubi); + } + leb_write_unlock(ubi, vol_id, lnum); + return err; + } + + /* + * The logical eraseblock is not mapped. We have to get a free physical + * eraseblock and write the volume identifier header there first. + */ + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) { + leb_write_unlock(ubi, vol_id, lnum); + return -ENOMEM; + } + + vid_hdr->vol_type = UBI_VID_DYNAMIC; + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_be32(vol_id); + vid_hdr->lnum = cpu_to_be32(lnum); + vid_hdr->compat = ubi_get_compat(ubi, vol_id); + vid_hdr->data_pad = cpu_to_be32(vol->data_pad); + +retry: + pnum = ubi_wl_get_peb(ubi); + if (pnum < 0) { + ubi_free_vid_hdr(ubi, vid_hdr); + leb_write_unlock(ubi, vol_id, lnum); + up_read(&ubi->fm_eba_sem); + return pnum; + } + + dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d", + len, offset, vol_id, lnum, pnum); + + err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); + if (err) { + ubi_warn(ubi, "failed to write VID header to LEB %d:%d, PEB %d", + vol_id, lnum, pnum); + up_read(&ubi->fm_eba_sem); + goto write_error; + } + + if (len) { + err = ubi_io_write_data(ubi, buf, pnum, offset, len); + if (err) { + ubi_warn(ubi, "failed to write %d bytes at offset %d of LEB %d:%d, PEB %d", + len, offset, vol_id, lnum, pnum); + up_read(&ubi->fm_eba_sem); + goto write_error; + } + } + + vol->eba_tbl[lnum] = pnum; + up_read(&ubi->fm_eba_sem); + + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return 0; + +write_error: + if (err != -EIO || !ubi->bad_allowed) { + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + /* + * Fortunately, this is the first write operation to this physical + * eraseblock, so just put it and request a new one. We assume that if + * this physical eraseblock went bad, the erase code will handle that. + */ + err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); + if (err || ++tries > UBI_IO_RETRIES) { + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + ubi_msg(ubi, "try another PEB"); + goto retry; +} + +/** + * ubi_eba_write_leb_st - write data to static volume. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: data to write + * @len: how many bytes to write + * @used_ebs: how many logical eraseblocks will this volume contain + * + * This function writes data to logical eraseblock @lnum of static volume + * @vol. The @used_ebs argument should contain total number of logical + * eraseblock in this static volume. + * + * When writing to the last logical eraseblock, the @len argument doesn't have + * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent + * to the real data size, although the @buf buffer has to contain the + * alignment. In all other cases, @len has to be aligned. + * + * It is prohibited to write more than once to logical eraseblocks of static + * volumes. This function returns zero in case of success and a negative error + * code in case of failure. + */ +int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum, const void *buf, int len, int used_ebs) +{ + int err, pnum, tries = 0, data_size = len, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; + uint32_t crc; + + if (ubi->ro_mode) + return -EROFS; + + if (lnum == used_ebs - 1) + /* If this is the last LEB @len may be unaligned */ + len = ALIGN(data_size, ubi->min_io_size); + else + ubi_assert(!(len & (ubi->min_io_size - 1))); + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) + return -ENOMEM; + + err = leb_write_lock(ubi, vol_id, lnum); + if (err) { + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_be32(vol_id); + vid_hdr->lnum = cpu_to_be32(lnum); + vid_hdr->compat = ubi_get_compat(ubi, vol_id); + vid_hdr->data_pad = cpu_to_be32(vol->data_pad); + + crc = crc32(UBI_CRC32_INIT, buf, data_size); + vid_hdr->vol_type = UBI_VID_STATIC; + vid_hdr->data_size = cpu_to_be32(data_size); + vid_hdr->used_ebs = cpu_to_be32(used_ebs); + vid_hdr->data_crc = cpu_to_be32(crc); + +retry: + pnum = ubi_wl_get_peb(ubi); + if (pnum < 0) { + ubi_free_vid_hdr(ubi, vid_hdr); + leb_write_unlock(ubi, vol_id, lnum); + up_read(&ubi->fm_eba_sem); + return pnum; + } + + dbg_eba("write VID hdr and %d bytes at LEB %d:%d, PEB %d, used_ebs %d", + len, vol_id, lnum, pnum, used_ebs); + + err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); + if (err) { + ubi_warn(ubi, "failed to write VID header to LEB %d:%d, PEB %d", + vol_id, lnum, pnum); + up_read(&ubi->fm_eba_sem); + goto write_error; + } + + err = ubi_io_write_data(ubi, buf, pnum, 0, len); + if (err) { + ubi_warn(ubi, "failed to write %d bytes of data to PEB %d", + len, pnum); + up_read(&ubi->fm_eba_sem); + goto write_error; + } + + ubi_assert(vol->eba_tbl[lnum] < 0); + vol->eba_tbl[lnum] = pnum; + up_read(&ubi->fm_eba_sem); + + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return 0; + +write_error: + if (err != -EIO || !ubi->bad_allowed) { + /* + * This flash device does not admit of bad eraseblocks or + * something nasty and unexpected happened. Switch to read-only + * mode just in case. + */ + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); + if (err || ++tries > UBI_IO_RETRIES) { + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + ubi_msg(ubi, "try another PEB"); + goto retry; +} + +/* + * ubi_eba_atomic_leb_change - change logical eraseblock atomically. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: data to write + * @len: how many bytes to write + * + * This function changes the contents of a logical eraseblock atomically. @buf + * has to contain new logical eraseblock data, and @len - the length of the + * data, which has to be aligned. This function guarantees that in case of an + * unclean reboot the old contents is preserved. Returns zero in case of + * success and a negative error code in case of failure. + * + * UBI reserves one LEB for the "atomic LEB change" operation, so only one + * LEB change may be done at a time. This is ensured by @ubi->alc_mutex. + */ +int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum, const void *buf, int len) +{ + int err, pnum, old_pnum, tries = 0, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; + uint32_t crc; + + if (ubi->ro_mode) + return -EROFS; + + if (len == 0) { + /* + * Special case when data length is zero. In this case the LEB + * has to be unmapped and mapped somewhere else. + */ + err = ubi_eba_unmap_leb(ubi, vol, lnum); + if (err) + return err; + return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0); + } + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) + return -ENOMEM; + + mutex_lock(&ubi->alc_mutex); + err = leb_write_lock(ubi, vol_id, lnum); + if (err) + goto out_mutex; + + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_be32(vol_id); + vid_hdr->lnum = cpu_to_be32(lnum); + vid_hdr->compat = ubi_get_compat(ubi, vol_id); + vid_hdr->data_pad = cpu_to_be32(vol->data_pad); + + crc = crc32(UBI_CRC32_INIT, buf, len); + vid_hdr->vol_type = UBI_VID_DYNAMIC; + vid_hdr->data_size = cpu_to_be32(len); + vid_hdr->copy_flag = 1; + vid_hdr->data_crc = cpu_to_be32(crc); + +retry: + pnum = ubi_wl_get_peb(ubi); + if (pnum < 0) { + err = pnum; + up_read(&ubi->fm_eba_sem); + goto out_leb_unlock; + } + + dbg_eba("change LEB %d:%d, PEB %d, write VID hdr to PEB %d", + vol_id, lnum, vol->eba_tbl[lnum], pnum); + + err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); + if (err) { + ubi_warn(ubi, "failed to write VID header to LEB %d:%d, PEB %d", + vol_id, lnum, pnum); + up_read(&ubi->fm_eba_sem); + goto write_error; + } + + err = ubi_io_write_data(ubi, buf, pnum, 0, len); + if (err) { + ubi_warn(ubi, "failed to write %d bytes of data to PEB %d", + len, pnum); + up_read(&ubi->fm_eba_sem); + goto write_error; + } + + old_pnum = vol->eba_tbl[lnum]; + vol->eba_tbl[lnum] = pnum; + up_read(&ubi->fm_eba_sem); + + if (old_pnum >= 0) { + err = ubi_wl_put_peb(ubi, vol_id, lnum, old_pnum, 0); + if (err) + goto out_leb_unlock; + } + +out_leb_unlock: + leb_write_unlock(ubi, vol_id, lnum); +out_mutex: + mutex_unlock(&ubi->alc_mutex); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + +write_error: + if (err != -EIO || !ubi->bad_allowed) { + /* + * This flash device does not admit of bad eraseblocks or + * something nasty and unexpected happened. Switch to read-only + * mode just in case. + */ + ubi_ro_mode(ubi); + goto out_leb_unlock; + } + + err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); + if (err || ++tries > UBI_IO_RETRIES) { + ubi_ro_mode(ubi); + goto out_leb_unlock; + } + + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + ubi_msg(ubi, "try another PEB"); + goto retry; +} + +/** + * is_error_sane - check whether a read error is sane. + * @err: code of the error happened during reading + * + * This is a helper function for 'ubi_eba_copy_leb()' which is called when we + * cannot read data from the target PEB (an error @err happened). If the error + * code is sane, then we treat this error as non-fatal. Otherwise the error is + * fatal and UBI will be switched to R/O mode later. + * + * The idea is that we try not to switch to R/O mode if the read error is + * something which suggests there was a real read problem. E.g., %-EIO. Or a + * memory allocation failed (-%ENOMEM). Otherwise, it is safer to switch to R/O + * mode, simply because we do not know what happened at the MTD level, and we + * cannot handle this. E.g., the underlying driver may have become crazy, and + * it is safer to switch to R/O mode to preserve the data. + * + * And bear in mind, this is about reading from the target PEB, i.e. the PEB + * which we have just written. + */ +static int is_error_sane(int err) +{ + if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_HDR || + err == UBI_IO_BAD_HDR_EBADMSG || err == -ETIMEDOUT) + return 0; + return 1; +} + +/** + * ubi_eba_copy_leb - copy logical eraseblock. + * @ubi: UBI device description object + * @from: physical eraseblock number from where to copy + * @to: physical eraseblock number where to copy + * @vid_hdr: VID header of the @from physical eraseblock + * + * This function copies logical eraseblock from physical eraseblock @from to + * physical eraseblock @to. The @vid_hdr buffer may be changed by this + * function. Returns: + * o %0 in case of success; + * o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_TARGET_BITFLIPS, etc; + * o a negative error code in case of failure. + */ +int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + struct ubi_vid_hdr *vid_hdr) +{ + int err, vol_id, lnum, data_size, aldata_size, idx; + struct ubi_volume *vol; + uint32_t crc; + + vol_id = be32_to_cpu(vid_hdr->vol_id); + lnum = be32_to_cpu(vid_hdr->lnum); + + dbg_wl("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to); + + if (vid_hdr->vol_type == UBI_VID_STATIC) { + data_size = be32_to_cpu(vid_hdr->data_size); + aldata_size = ALIGN(data_size, ubi->min_io_size); + } else + data_size = aldata_size = + ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); + + idx = vol_id2idx(ubi, vol_id); + spin_lock(&ubi->volumes_lock); + /* + * Note, we may race with volume deletion, which means that the volume + * this logical eraseblock belongs to might be being deleted. Since the + * volume deletion un-maps all the volume's logical eraseblocks, it will + * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. + */ + vol = ubi->volumes[idx]; + spin_unlock(&ubi->volumes_lock); + if (!vol) { + /* No need to do further work, cancel */ + dbg_wl("volume %d is being removed, cancel", vol_id); + return MOVE_CANCEL_RACE; + } + + /* + * We do not want anybody to write to this logical eraseblock while we + * are moving it, so lock it. + * + * Note, we are using non-waiting locking here, because we cannot sleep + * on the LEB, since it may cause deadlocks. Indeed, imagine a task is + * unmapping the LEB which is mapped to the PEB we are going to move + * (@from). This task locks the LEB and goes sleep in the + * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are + * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the + * LEB is already locked, we just do not move it and return + * %MOVE_RETRY. Note, we do not return %MOVE_CANCEL_RACE here because + * we do not know the reasons of the contention - it may be just a + * normal I/O on this LEB, so we want to re-try. + */ + err = leb_write_trylock(ubi, vol_id, lnum); + if (err) { + dbg_wl("contention on LEB %d:%d, cancel", vol_id, lnum); + return MOVE_RETRY; + } + + /* + * The LEB might have been put meanwhile, and the task which put it is + * probably waiting on @ubi->move_mutex. No need to continue the work, + * cancel it. + */ + if (vol->eba_tbl[lnum] != from) { + dbg_wl("LEB %d:%d is no longer mapped to PEB %d, mapped to PEB %d, cancel", + vol_id, lnum, from, vol->eba_tbl[lnum]); + err = MOVE_CANCEL_RACE; + goto out_unlock_leb; + } + + /* + * OK, now the LEB is locked and we can safely start moving it. Since + * this function utilizes the @ubi->peb_buf buffer which is shared + * with some other functions - we lock the buffer by taking the + * @ubi->buf_mutex. + */ + mutex_lock(&ubi->buf_mutex); + dbg_wl("read %d bytes of data", aldata_size); + err = ubi_io_read_data(ubi, ubi->peb_buf, from, 0, aldata_size); + if (err && err != UBI_IO_BITFLIPS) { + ubi_warn(ubi, "error %d while reading data from PEB %d", + err, from); + err = MOVE_SOURCE_RD_ERR; + goto out_unlock_buf; + } + + /* + * Now we have got to calculate how much data we have to copy. In + * case of a static volume it is fairly easy - the VID header contains + * the data size. In case of a dynamic volume it is more difficult - we + * have to read the contents, cut 0xFF bytes from the end and copy only + * the first part. We must do this to avoid writing 0xFF bytes as it + * may have some side-effects. And not only this. It is important not + * to include those 0xFFs to CRC because later the they may be filled + * by data. + */ + if (vid_hdr->vol_type == UBI_VID_DYNAMIC) + aldata_size = data_size = + ubi_calc_data_len(ubi, ubi->peb_buf, data_size); + + cond_resched(); + crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size); + cond_resched(); + + /* + * It may turn out to be that the whole @from physical eraseblock + * contains only 0xFF bytes. Then we have to only write the VID header + * and do not write any data. This also means we should not set + * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc. + */ + if (data_size > 0) { + vid_hdr->copy_flag = 1; + vid_hdr->data_size = cpu_to_be32(data_size); + vid_hdr->data_crc = cpu_to_be32(crc); + } + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + + err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); + if (err) { + if (err == -EIO) + err = MOVE_TARGET_WR_ERR; + goto out_unlock_buf; + } + + cond_resched(); + + /* Read the VID header back and check if it was written correctly */ + err = ubi_io_read_vid_hdr(ubi, to, vid_hdr, 1); + if (err) { + if (err != UBI_IO_BITFLIPS) { + ubi_warn(ubi, "error %d while reading VID header back from PEB %d", + err, to); + if (is_error_sane(err)) + err = MOVE_TARGET_RD_ERR; + } else + err = MOVE_TARGET_BITFLIPS; + goto out_unlock_buf; + } + + if (data_size > 0) { + err = ubi_io_write_data(ubi, ubi->peb_buf, to, 0, aldata_size); + if (err) { + if (err == -EIO) + err = MOVE_TARGET_WR_ERR; + goto out_unlock_buf; + } + + cond_resched(); + + /* + * We've written the data and are going to read it back to make + * sure it was written correctly. + */ + memset(ubi->peb_buf, 0xFF, aldata_size); + err = ubi_io_read_data(ubi, ubi->peb_buf, to, 0, aldata_size); + if (err) { + if (err != UBI_IO_BITFLIPS) { + ubi_warn(ubi, "error %d while reading data back from PEB %d", + err, to); + if (is_error_sane(err)) + err = MOVE_TARGET_RD_ERR; + } else + err = MOVE_TARGET_BITFLIPS; + goto out_unlock_buf; + } + + cond_resched(); + + if (crc != crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size)) { + ubi_warn(ubi, "read data back from PEB %d and it is different", + to); + err = -EINVAL; + goto out_unlock_buf; + } + } + + ubi_assert(vol->eba_tbl[lnum] == from); + down_read(&ubi->fm_eba_sem); + vol->eba_tbl[lnum] = to; + up_read(&ubi->fm_eba_sem); + +out_unlock_buf: + mutex_unlock(&ubi->buf_mutex); +out_unlock_leb: + leb_write_unlock(ubi, vol_id, lnum); + return err; +} + +/** + * print_rsvd_warning - warn about not having enough reserved PEBs. + * @ubi: UBI device description object + * + * This is a helper function for 'ubi_eba_init()' which is called when UBI + * cannot reserve enough PEBs for bad block handling. This function makes a + * decision whether we have to print a warning or not. The algorithm is as + * follows: + * o if this is a new UBI image, then just print the warning + * o if this is an UBI image which has already been used for some time, print + * a warning only if we can reserve less than 10% of the expected amount of + * the reserved PEB. + * + * The idea is that when UBI is used, PEBs become bad, and the reserved pool + * of PEBs becomes smaller, which is normal and we do not want to scare users + * with a warning every time they attach the MTD device. This was an issue + * reported by real users. + */ +static void print_rsvd_warning(struct ubi_device *ubi, + struct ubi_attach_info *ai) +{ + /* + * The 1 << 18 (256KiB) number is picked randomly, just a reasonably + * large number to distinguish between newly flashed and used images. + */ + if (ai->max_sqnum > (1 << 18)) { + int min = ubi->beb_rsvd_level / 10; + + if (!min) + min = 1; + if (ubi->beb_rsvd_pebs > min) + return; + } + + ubi_warn(ubi, "cannot reserve enough PEBs for bad PEB handling, reserved %d, need %d", + ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); + if (ubi->corr_peb_count) + ubi_warn(ubi, "%d PEBs are corrupted and not used", + ubi->corr_peb_count); +} + +/** + * self_check_eba - run a self check on the EBA table constructed by fastmap. + * @ubi: UBI device description object + * @ai_fastmap: UBI attach info object created by fastmap + * @ai_scan: UBI attach info object created by scanning + * + * Returns < 0 in case of an internal error, 0 otherwise. + * If a bad EBA table entry was found it will be printed out and + * ubi_assert() triggers. + */ +int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, + struct ubi_attach_info *ai_scan) +{ + int i, j, num_volumes, ret = 0; + int **scan_eba, **fm_eba; + struct ubi_ainf_volume *av; + struct ubi_volume *vol; + struct ubi_ainf_peb *aeb; + struct rb_node *rb; + + num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; + + scan_eba = kmalloc(sizeof(*scan_eba) * num_volumes, GFP_KERNEL); + if (!scan_eba) + return -ENOMEM; + + fm_eba = kmalloc(sizeof(*fm_eba) * num_volumes, GFP_KERNEL); + if (!fm_eba) { + kfree(scan_eba); + return -ENOMEM; + } + + for (i = 0; i < num_volumes; i++) { + vol = ubi->volumes[i]; + if (!vol) + continue; + + scan_eba[i] = kmalloc(vol->reserved_pebs * sizeof(**scan_eba), + GFP_KERNEL); + if (!scan_eba[i]) { + ret = -ENOMEM; + goto out_free; + } + + fm_eba[i] = kmalloc(vol->reserved_pebs * sizeof(**fm_eba), + GFP_KERNEL); + if (!fm_eba[i]) { + ret = -ENOMEM; + goto out_free; + } + + for (j = 0; j < vol->reserved_pebs; j++) + scan_eba[i][j] = fm_eba[i][j] = UBI_LEB_UNMAPPED; + + av = ubi_find_av(ai_scan, idx2vol_id(ubi, i)); + if (!av) + continue; + + ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) + scan_eba[i][aeb->lnum] = aeb->pnum; + + av = ubi_find_av(ai_fastmap, idx2vol_id(ubi, i)); + if (!av) + continue; + + ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) + fm_eba[i][aeb->lnum] = aeb->pnum; + + for (j = 0; j < vol->reserved_pebs; j++) { + if (scan_eba[i][j] != fm_eba[i][j]) { + if (scan_eba[i][j] == UBI_LEB_UNMAPPED || + fm_eba[i][j] == UBI_LEB_UNMAPPED) + continue; + + ubi_err(ubi, "LEB:%i:%i is PEB:%i instead of %i!", + vol->vol_id, i, fm_eba[i][j], + scan_eba[i][j]); + ubi_assert(0); + } + } + } + +out_free: + for (i = 0; i < num_volumes; i++) { + if (!ubi->volumes[i]) + continue; + + kfree(scan_eba[i]); + kfree(fm_eba[i]); + } + + kfree(scan_eba); + kfree(fm_eba); + return ret; +} + +/** + * ubi_eba_init - initialize the EBA sub-system using attaching information. + * @ubi: UBI device description object + * @ai: attaching information + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_eba_init(struct ubi_device *ubi, struct ubi_attach_info *ai) +{ + int i, j, err, num_volumes; + struct ubi_ainf_volume *av; + struct ubi_volume *vol; + struct ubi_ainf_peb *aeb; + struct rb_node *rb; + + dbg_eba("initialize EBA sub-system"); + + spin_lock_init(&ubi->ltree_lock); + mutex_init(&ubi->alc_mutex); + ubi->ltree = RB_ROOT; + + ubi->global_sqnum = ai->max_sqnum + 1; + num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; + + for (i = 0; i < num_volumes; i++) { + vol = ubi->volumes[i]; + if (!vol) + continue; + + cond_resched(); + + vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int), + GFP_KERNEL); + if (!vol->eba_tbl) { + err = -ENOMEM; + goto out_free; + } + + for (j = 0; j < vol->reserved_pebs; j++) + vol->eba_tbl[j] = UBI_LEB_UNMAPPED; + + av = ubi_find_av(ai, idx2vol_id(ubi, i)); + if (!av) + continue; + + ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) { + if (aeb->lnum >= vol->reserved_pebs) + /* + * This may happen in case of an unclean reboot + * during re-size. + */ + ubi_move_aeb_to_list(av, aeb, &ai->erase); + else + vol->eba_tbl[aeb->lnum] = aeb->pnum; + } + } + + if (ubi->avail_pebs < EBA_RESERVED_PEBS) { + ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)", + ubi->avail_pebs, EBA_RESERVED_PEBS); + if (ubi->corr_peb_count) + ubi_err(ubi, "%d PEBs are corrupted and not used", + ubi->corr_peb_count); + err = -ENOSPC; + goto out_free; + } + ubi->avail_pebs -= EBA_RESERVED_PEBS; + ubi->rsvd_pebs += EBA_RESERVED_PEBS; + + if (ubi->bad_allowed) { + ubi_calculate_reserved(ubi); + + if (ubi->avail_pebs < ubi->beb_rsvd_level) { + /* No enough free physical eraseblocks */ + ubi->beb_rsvd_pebs = ubi->avail_pebs; + print_rsvd_warning(ubi, ai); + } else + ubi->beb_rsvd_pebs = ubi->beb_rsvd_level; + + ubi->avail_pebs -= ubi->beb_rsvd_pebs; + ubi->rsvd_pebs += ubi->beb_rsvd_pebs; + } + + dbg_eba("EBA sub-system is initialized"); + return 0; + +out_free: + for (i = 0; i < num_volumes; i++) { + if (!ubi->volumes[i]) + continue; + kfree(ubi->volumes[i]->eba_tbl); + ubi->volumes[i]->eba_tbl = NULL; + } + return err; +} diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c new file mode 100644 index 000000000..b2a665398 --- /dev/null +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2012 Linutronix GmbH + * Copyright (c) 2014 sigma star gmbh + * Author: Richard Weinberger <richard@nod.at> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + */ + +/** + * update_fastmap_work_fn - calls ubi_update_fastmap from a work queue + * @wrk: the work description object + */ +static void update_fastmap_work_fn(struct work_struct *wrk) +{ + struct ubi_device *ubi = container_of(wrk, struct ubi_device, fm_work); + + ubi_update_fastmap(ubi); + spin_lock(&ubi->wl_lock); + ubi->fm_work_scheduled = 0; + spin_unlock(&ubi->wl_lock); +} + +/** + * find_anchor_wl_entry - find wear-leveling entry to used as anchor PEB. + * @root: the RB-tree where to look for + */ +static struct ubi_wl_entry *find_anchor_wl_entry(struct rb_root *root) +{ + struct rb_node *p; + struct ubi_wl_entry *e, *victim = NULL; + int max_ec = UBI_MAX_ERASECOUNTER; + + ubi_rb_for_each_entry(p, e, root, u.rb) { + if (e->pnum < UBI_FM_MAX_START && e->ec < max_ec) { + victim = e; + max_ec = e->ec; + } + } + + return victim; +} + +/** + * return_unused_pool_pebs - returns unused PEB to the free tree. + * @ubi: UBI device description object + * @pool: fastmap pool description object + */ +static void return_unused_pool_pebs(struct ubi_device *ubi, + struct ubi_fm_pool *pool) +{ + int i; + struct ubi_wl_entry *e; + + for (i = pool->used; i < pool->size; i++) { + e = ubi->lookuptbl[pool->pebs[i]]; + wl_tree_add(e, &ubi->free); + ubi->free_count++; + } +} + +static int anchor_pebs_avalible(struct rb_root *root) +{ + struct rb_node *p; + struct ubi_wl_entry *e; + + ubi_rb_for_each_entry(p, e, root, u.rb) + if (e->pnum < UBI_FM_MAX_START) + return 1; + + return 0; +} + +/** + * ubi_wl_get_fm_peb - find a physical erase block with a given maximal number. + * @ubi: UBI device description object + * @anchor: This PEB will be used as anchor PEB by fastmap + * + * The function returns a physical erase block with a given maximal number + * and removes it from the wl subsystem. + * Must be called with wl_lock held! + */ +struct ubi_wl_entry *ubi_wl_get_fm_peb(struct ubi_device *ubi, int anchor) +{ + struct ubi_wl_entry *e = NULL; + + if (!ubi->free.rb_node || (ubi->free_count - ubi->beb_rsvd_pebs < 1)) + goto out; + + if (anchor) + e = find_anchor_wl_entry(&ubi->free); + else + e = find_mean_wl_entry(ubi, &ubi->free); + + if (!e) + goto out; + + self_check_in_wl_tree(ubi, e, &ubi->free); + + /* remove it from the free list, + * the wl subsystem does no longer know this erase block */ + rb_erase(&e->u.rb, &ubi->free); + ubi->free_count--; +out: + return e; +} + +/** + * ubi_refill_pools - refills all fastmap PEB pools. + * @ubi: UBI device description object + */ +void ubi_refill_pools(struct ubi_device *ubi) +{ + struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool; + struct ubi_fm_pool *pool = &ubi->fm_pool; + struct ubi_wl_entry *e; + int enough; + + spin_lock(&ubi->wl_lock); + + return_unused_pool_pebs(ubi, wl_pool); + return_unused_pool_pebs(ubi, pool); + + wl_pool->size = 0; + pool->size = 0; + + for (;;) { + enough = 0; + if (pool->size < pool->max_size) { + if (!ubi->free.rb_node) + break; + + e = wl_get_wle(ubi); + if (!e) + break; + + pool->pebs[pool->size] = e->pnum; + pool->size++; + } else + enough++; + + if (wl_pool->size < wl_pool->max_size) { + if (!ubi->free.rb_node || + (ubi->free_count - ubi->beb_rsvd_pebs < 5)) + break; + + e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + self_check_in_wl_tree(ubi, e, &ubi->free); + rb_erase(&e->u.rb, &ubi->free); + ubi->free_count--; + + wl_pool->pebs[wl_pool->size] = e->pnum; + wl_pool->size++; + } else + enough++; + + if (enough == 2) + break; + } + + wl_pool->used = 0; + pool->used = 0; + + spin_unlock(&ubi->wl_lock); +} + +/** + * ubi_wl_get_peb - get a physical eraseblock. + * @ubi: UBI device description object + * + * This function returns a physical eraseblock in case of success and a + * negative error code in case of failure. + * Returns with ubi->fm_eba_sem held in read mode! + */ +int ubi_wl_get_peb(struct ubi_device *ubi) +{ + int ret, retried = 0; + struct ubi_fm_pool *pool = &ubi->fm_pool; + struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool; + +again: + down_read(&ubi->fm_eba_sem); + spin_lock(&ubi->wl_lock); + + /* We check here also for the WL pool because at this point we can + * refill the WL pool synchronous. */ + if (pool->used == pool->size || wl_pool->used == wl_pool->size) { + spin_unlock(&ubi->wl_lock); + up_read(&ubi->fm_eba_sem); + ret = ubi_update_fastmap(ubi); + if (ret) { + ubi_msg(ubi, "Unable to write a new fastmap: %i", ret); + down_read(&ubi->fm_eba_sem); + return -ENOSPC; + } + down_read(&ubi->fm_eba_sem); + spin_lock(&ubi->wl_lock); + } + + if (pool->used == pool->size) { + spin_unlock(&ubi->wl_lock); + if (retried) { + ubi_err(ubi, "Unable to get a free PEB from user WL pool"); + ret = -ENOSPC; + goto out; + } + retried = 1; + up_read(&ubi->fm_eba_sem); + goto again; + } + + ubi_assert(pool->used < pool->size); + ret = pool->pebs[pool->used++]; + prot_queue_add(ubi, ubi->lookuptbl[ret]); + spin_unlock(&ubi->wl_lock); +out: + return ret; +} + +/* get_peb_for_wl - returns a PEB to be used internally by the WL sub-system. + * + * @ubi: UBI device description object + */ +static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) +{ + struct ubi_fm_pool *pool = &ubi->fm_wl_pool; + int pnum; + + if (pool->used == pool->size) { + /* We cannot update the fastmap here because this + * function is called in atomic context. + * Let's fail here and refill/update it as soon as possible. */ + if (!ubi->fm_work_scheduled) { + ubi->fm_work_scheduled = 1; + schedule_work(&ubi->fm_work); + } + return NULL; + } + + pnum = pool->pebs[pool->used++]; + return ubi->lookuptbl[pnum]; +} + +/** + * ubi_ensure_anchor_pebs - schedule wear-leveling to produce an anchor PEB. + * @ubi: UBI device description object + */ +int ubi_ensure_anchor_pebs(struct ubi_device *ubi) +{ + struct ubi_work *wrk; + + spin_lock(&ubi->wl_lock); + if (ubi->wl_scheduled) { + spin_unlock(&ubi->wl_lock); + return 0; + } + ubi->wl_scheduled = 1; + spin_unlock(&ubi->wl_lock); + + wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); + if (!wrk) { + spin_lock(&ubi->wl_lock); + ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + return -ENOMEM; + } + + wrk->anchor = 1; + wrk->func = &wear_leveling_worker; + schedule_ubi_work(ubi, wrk); + return 0; +} + +/** + * ubi_wl_put_fm_peb - returns a PEB used in a fastmap to the wear-leveling + * sub-system. + * see: ubi_wl_put_peb() + * + * @ubi: UBI device description object + * @fm_e: physical eraseblock to return + * @lnum: the last used logical eraseblock number for the PEB + * @torture: if this physical eraseblock has to be tortured + */ +int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *fm_e, + int lnum, int torture) +{ + struct ubi_wl_entry *e; + int vol_id, pnum = fm_e->pnum; + + dbg_wl("PEB %d", pnum); + + ubi_assert(pnum >= 0); + ubi_assert(pnum < ubi->peb_count); + + spin_lock(&ubi->wl_lock); + e = ubi->lookuptbl[pnum]; + + /* This can happen if we recovered from a fastmap the very + * first time and writing now a new one. In this case the wl system + * has never seen any PEB used by the original fastmap. + */ + if (!e) { + e = fm_e; + ubi_assert(e->ec >= 0); + ubi->lookuptbl[pnum] = e; + } + + spin_unlock(&ubi->wl_lock); + + vol_id = lnum ? UBI_FM_DATA_VOLUME_ID : UBI_FM_SB_VOLUME_ID; + return schedule_erase(ubi, e, vol_id, lnum, torture); +} + +/** + * ubi_is_erase_work - checks whether a work is erase work. + * @wrk: The work object to be checked + */ +int ubi_is_erase_work(struct ubi_work *wrk) +{ + return wrk->func == erase_worker; +} + +static void ubi_fastmap_close(struct ubi_device *ubi) +{ + int i; + + flush_work(&ubi->fm_work); + return_unused_pool_pebs(ubi, &ubi->fm_pool); + return_unused_pool_pebs(ubi, &ubi->fm_wl_pool); + + if (ubi->fm) { + for (i = 0; i < ubi->fm->used_blocks; i++) + kfree(ubi->fm->e[i]); + } + kfree(ubi->fm); +} + +/** + * may_reserve_for_fm - tests whether a PEB shall be reserved for fastmap. + * See find_mean_wl_entry() + * + * @ubi: UBI device description object + * @e: physical eraseblock to return + * @root: RB tree to test against. + */ +static struct ubi_wl_entry *may_reserve_for_fm(struct ubi_device *ubi, + struct ubi_wl_entry *e, + struct rb_root *root) { + if (e && !ubi->fm_disabled && !ubi->fm && + e->pnum < UBI_FM_MAX_START) + e = rb_entry(rb_next(root->rb_node), + struct ubi_wl_entry, u.rb); + + return e; +} diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c new file mode 100644 index 000000000..02a6de2f5 --- /dev/null +++ b/drivers/mtd/ubi/fastmap.c @@ -0,0 +1,1631 @@ +/* + * Copyright (c) 2012 Linutronix GmbH + * Copyright (c) 2014 sigma star gmbh + * Author: Richard Weinberger <richard@nod.at> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + */ + +#include <linux/crc32.h> +#include "ubi.h" + +/** + * init_seen - allocate memory for used for debugging. + * @ubi: UBI device description object + */ +static inline int *init_seen(struct ubi_device *ubi) +{ + int *ret; + + if (!ubi_dbg_chk_fastmap(ubi)) + return NULL; + + ret = kcalloc(ubi->peb_count, sizeof(int), GFP_KERNEL); + if (!ret) + return ERR_PTR(-ENOMEM); + + return ret; +} + +/** + * free_seen - free the seen logic integer array. + * @seen: integer array of @ubi->peb_count size + */ +static inline void free_seen(int *seen) +{ + kfree(seen); +} + +/** + * set_seen - mark a PEB as seen. + * @ubi: UBI device description object + * @pnum: The PEB to be makred as seen + * @seen: integer array of @ubi->peb_count size + */ +static inline void set_seen(struct ubi_device *ubi, int pnum, int *seen) +{ + if (!ubi_dbg_chk_fastmap(ubi) || !seen) + return; + + seen[pnum] = 1; +} + +/** + * self_check_seen - check whether all PEB have been seen by fastmap. + * @ubi: UBI device description object + * @seen: integer array of @ubi->peb_count size + */ +static int self_check_seen(struct ubi_device *ubi, int *seen) +{ + int pnum, ret = 0; + + if (!ubi_dbg_chk_fastmap(ubi) || !seen) + return 0; + + for (pnum = 0; pnum < ubi->peb_count; pnum++) { + if (!seen[pnum] && ubi->lookuptbl[pnum]) { + ubi_err(ubi, "self-check failed for PEB %d, fastmap didn't see it", pnum); + ret = -EINVAL; + } + } + + return ret; +} + +/** + * ubi_calc_fm_size - calculates the fastmap size in bytes for an UBI device. + * @ubi: UBI device description object + */ +size_t ubi_calc_fm_size(struct ubi_device *ubi) +{ + size_t size; + + size = sizeof(struct ubi_fm_sb) + \ + sizeof(struct ubi_fm_hdr) + \ + sizeof(struct ubi_fm_scan_pool) + \ + sizeof(struct ubi_fm_scan_pool) + \ + (ubi->peb_count * sizeof(struct ubi_fm_ec)) + \ + (sizeof(struct ubi_fm_eba) + \ + (ubi->peb_count * sizeof(__be32))) + \ + sizeof(struct ubi_fm_volhdr) * UBI_MAX_VOLUMES; + return roundup(size, ubi->leb_size); +} + + +/** + * new_fm_vhdr - allocate a new volume header for fastmap usage. + * @ubi: UBI device description object + * @vol_id: the VID of the new header + * + * Returns a new struct ubi_vid_hdr on success. + * NULL indicates out of memory. + */ +static struct ubi_vid_hdr *new_fm_vhdr(struct ubi_device *ubi, int vol_id) +{ + struct ubi_vid_hdr *new; + + new = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!new) + goto out; + + new->vol_type = UBI_VID_DYNAMIC; + new->vol_id = cpu_to_be32(vol_id); + + /* UBI implementations without fastmap support have to delete the + * fastmap. + */ + new->compat = UBI_COMPAT_DELETE; + +out: + return new; +} + +/** + * add_aeb - create and add a attach erase block to a given list. + * @ai: UBI attach info object + * @list: the target list + * @pnum: PEB number of the new attach erase block + * @ec: erease counter of the new LEB + * @scrub: scrub this PEB after attaching + * + * Returns 0 on success, < 0 indicates an internal error. + */ +static int add_aeb(struct ubi_attach_info *ai, struct list_head *list, + int pnum, int ec, int scrub) +{ + struct ubi_ainf_peb *aeb; + + aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); + if (!aeb) + return -ENOMEM; + + aeb->pnum = pnum; + aeb->ec = ec; + aeb->lnum = -1; + aeb->scrub = scrub; + aeb->copy_flag = aeb->sqnum = 0; + + ai->ec_sum += aeb->ec; + ai->ec_count++; + + if (ai->max_ec < aeb->ec) + ai->max_ec = aeb->ec; + + if (ai->min_ec > aeb->ec) + ai->min_ec = aeb->ec; + + list_add_tail(&aeb->u.list, list); + + return 0; +} + +/** + * add_vol - create and add a new volume to ubi_attach_info. + * @ai: ubi_attach_info object + * @vol_id: VID of the new volume + * @used_ebs: number of used EBS + * @data_pad: data padding value of the new volume + * @vol_type: volume type + * @last_eb_bytes: number of bytes in the last LEB + * + * Returns the new struct ubi_ainf_volume on success. + * NULL indicates an error. + */ +static struct ubi_ainf_volume *add_vol(struct ubi_attach_info *ai, int vol_id, + int used_ebs, int data_pad, u8 vol_type, + int last_eb_bytes) +{ + struct ubi_ainf_volume *av; + struct rb_node **p = &ai->volumes.rb_node, *parent = NULL; + + while (*p) { + parent = *p; + av = rb_entry(parent, struct ubi_ainf_volume, rb); + + if (vol_id > av->vol_id) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + av = kmalloc(sizeof(struct ubi_ainf_volume), GFP_KERNEL); + if (!av) + goto out; + + av->highest_lnum = av->leb_count = av->used_ebs = 0; + av->vol_id = vol_id; + av->data_pad = data_pad; + av->last_data_size = last_eb_bytes; + av->compat = 0; + av->vol_type = vol_type; + av->root = RB_ROOT; + if (av->vol_type == UBI_STATIC_VOLUME) + av->used_ebs = used_ebs; + + dbg_bld("found volume (ID %i)", vol_id); + + rb_link_node(&av->rb, parent, p); + rb_insert_color(&av->rb, &ai->volumes); + +out: + return av; +} + +/** + * assign_aeb_to_av - assigns a SEB to a given ainf_volume and removes it + * from it's original list. + * @ai: ubi_attach_info object + * @aeb: the to be assigned SEB + * @av: target scan volume + */ +static void assign_aeb_to_av(struct ubi_attach_info *ai, + struct ubi_ainf_peb *aeb, + struct ubi_ainf_volume *av) +{ + struct ubi_ainf_peb *tmp_aeb; + struct rb_node **p = &ai->volumes.rb_node, *parent = NULL; + + p = &av->root.rb_node; + while (*p) { + parent = *p; + + tmp_aeb = rb_entry(parent, struct ubi_ainf_peb, u.rb); + if (aeb->lnum != tmp_aeb->lnum) { + if (aeb->lnum < tmp_aeb->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + + continue; + } else + break; + } + + list_del(&aeb->u.list); + av->leb_count++; + + rb_link_node(&aeb->u.rb, parent, p); + rb_insert_color(&aeb->u.rb, &av->root); +} + +/** + * update_vol - inserts or updates a LEB which was found a pool. + * @ubi: the UBI device object + * @ai: attach info object + * @av: the volume this LEB belongs to + * @new_vh: the volume header derived from new_aeb + * @new_aeb: the AEB to be examined + * + * Returns 0 on success, < 0 indicates an internal error. + */ +static int update_vol(struct ubi_device *ubi, struct ubi_attach_info *ai, + struct ubi_ainf_volume *av, struct ubi_vid_hdr *new_vh, + struct ubi_ainf_peb *new_aeb) +{ + struct rb_node **p = &av->root.rb_node, *parent = NULL; + struct ubi_ainf_peb *aeb, *victim; + int cmp_res; + + while (*p) { + parent = *p; + aeb = rb_entry(parent, struct ubi_ainf_peb, u.rb); + + if (be32_to_cpu(new_vh->lnum) != aeb->lnum) { + if (be32_to_cpu(new_vh->lnum) < aeb->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + + continue; + } + + /* This case can happen if the fastmap gets written + * because of a volume change (creation, deletion, ..). + * Then a PEB can be within the persistent EBA and the pool. + */ + if (aeb->pnum == new_aeb->pnum) { + ubi_assert(aeb->lnum == new_aeb->lnum); + kmem_cache_free(ai->aeb_slab_cache, new_aeb); + + return 0; + } + + cmp_res = ubi_compare_lebs(ubi, aeb, new_aeb->pnum, new_vh); + if (cmp_res < 0) + return cmp_res; + + /* new_aeb is newer */ + if (cmp_res & 1) { + victim = kmem_cache_alloc(ai->aeb_slab_cache, + GFP_KERNEL); + if (!victim) + return -ENOMEM; + + victim->ec = aeb->ec; + victim->pnum = aeb->pnum; + list_add_tail(&victim->u.list, &ai->erase); + + if (av->highest_lnum == be32_to_cpu(new_vh->lnum)) + av->last_data_size = \ + be32_to_cpu(new_vh->data_size); + + dbg_bld("vol %i: AEB %i's PEB %i is the newer", + av->vol_id, aeb->lnum, new_aeb->pnum); + + aeb->ec = new_aeb->ec; + aeb->pnum = new_aeb->pnum; + aeb->copy_flag = new_vh->copy_flag; + aeb->scrub = new_aeb->scrub; + kmem_cache_free(ai->aeb_slab_cache, new_aeb); + + /* new_aeb is older */ + } else { + dbg_bld("vol %i: AEB %i's PEB %i is old, dropping it", + av->vol_id, aeb->lnum, new_aeb->pnum); + list_add_tail(&new_aeb->u.list, &ai->erase); + } + + return 0; + } + /* This LEB is new, let's add it to the volume */ + + if (av->highest_lnum <= be32_to_cpu(new_vh->lnum)) { + av->highest_lnum = be32_to_cpu(new_vh->lnum); + av->last_data_size = be32_to_cpu(new_vh->data_size); + } + + if (av->vol_type == UBI_STATIC_VOLUME) + av->used_ebs = be32_to_cpu(new_vh->used_ebs); + + av->leb_count++; + + rb_link_node(&new_aeb->u.rb, parent, p); + rb_insert_color(&new_aeb->u.rb, &av->root); + + return 0; +} + +/** + * process_pool_aeb - we found a non-empty PEB in a pool. + * @ubi: UBI device object + * @ai: attach info object + * @new_vh: the volume header derived from new_aeb + * @new_aeb: the AEB to be examined + * + * Returns 0 on success, < 0 indicates an internal error. + */ +static int process_pool_aeb(struct ubi_device *ubi, struct ubi_attach_info *ai, + struct ubi_vid_hdr *new_vh, + struct ubi_ainf_peb *new_aeb) +{ + struct ubi_ainf_volume *av, *tmp_av = NULL; + struct rb_node **p = &ai->volumes.rb_node, *parent = NULL; + int found = 0; + + if (be32_to_cpu(new_vh->vol_id) == UBI_FM_SB_VOLUME_ID || + be32_to_cpu(new_vh->vol_id) == UBI_FM_DATA_VOLUME_ID) { + kmem_cache_free(ai->aeb_slab_cache, new_aeb); + + return 0; + } + + /* Find the volume this SEB belongs to */ + while (*p) { + parent = *p; + tmp_av = rb_entry(parent, struct ubi_ainf_volume, rb); + + if (be32_to_cpu(new_vh->vol_id) > tmp_av->vol_id) + p = &(*p)->rb_left; + else if (be32_to_cpu(new_vh->vol_id) < tmp_av->vol_id) + p = &(*p)->rb_right; + else { + found = 1; + break; + } + } + + if (found) + av = tmp_av; + else { + ubi_err(ubi, "orphaned volume in fastmap pool!"); + kmem_cache_free(ai->aeb_slab_cache, new_aeb); + return UBI_BAD_FASTMAP; + } + + ubi_assert(be32_to_cpu(new_vh->vol_id) == av->vol_id); + + return update_vol(ubi, ai, av, new_vh, new_aeb); +} + +/** + * unmap_peb - unmap a PEB. + * If fastmap detects a free PEB in the pool it has to check whether + * this PEB has been unmapped after writing the fastmap. + * + * @ai: UBI attach info object + * @pnum: The PEB to be unmapped + */ +static void unmap_peb(struct ubi_attach_info *ai, int pnum) +{ + struct ubi_ainf_volume *av; + struct rb_node *node, *node2; + struct ubi_ainf_peb *aeb; + + for (node = rb_first(&ai->volumes); node; node = rb_next(node)) { + av = rb_entry(node, struct ubi_ainf_volume, rb); + + for (node2 = rb_first(&av->root); node2; + node2 = rb_next(node2)) { + aeb = rb_entry(node2, struct ubi_ainf_peb, u.rb); + if (aeb->pnum == pnum) { + rb_erase(&aeb->u.rb, &av->root); + av->leb_count--; + kmem_cache_free(ai->aeb_slab_cache, aeb); + return; + } + } + } +} + +/** + * scan_pool - scans a pool for changed (no longer empty PEBs). + * @ubi: UBI device object + * @ai: attach info object + * @pebs: an array of all PEB numbers in the to be scanned pool + * @pool_size: size of the pool (number of entries in @pebs) + * @max_sqnum: pointer to the maximal sequence number + * @free: list of PEBs which are most likely free (and go into @ai->free) + * + * Returns 0 on success, if the pool is unusable UBI_BAD_FASTMAP is returned. + * < 0 indicates an internal error. + */ +static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai, + int *pebs, int pool_size, unsigned long long *max_sqnum, + struct list_head *free) +{ + struct ubi_vid_hdr *vh; + struct ubi_ec_hdr *ech; + struct ubi_ainf_peb *new_aeb; + int i, pnum, err, ret = 0; + + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) + return -ENOMEM; + + vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vh) { + kfree(ech); + return -ENOMEM; + } + + dbg_bld("scanning fastmap pool: size = %i", pool_size); + + /* + * Now scan all PEBs in the pool to find changes which have been made + * after the creation of the fastmap + */ + for (i = 0; i < pool_size; i++) { + int scrub = 0; + int image_seq; + + pnum = be32_to_cpu(pebs[i]); + + if (ubi_io_is_bad(ubi, pnum)) { + ubi_err(ubi, "bad PEB in fastmap pool!"); + ret = UBI_BAD_FASTMAP; + goto out; + } + + err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); + if (err && err != UBI_IO_BITFLIPS) { + ubi_err(ubi, "unable to read EC header! PEB:%i err:%i", + pnum, err); + ret = err > 0 ? UBI_BAD_FASTMAP : err; + goto out; + } else if (err == UBI_IO_BITFLIPS) + scrub = 1; + + /* + * Older UBI implementations have image_seq set to zero, so + * we shouldn't fail if image_seq == 0. + */ + image_seq = be32_to_cpu(ech->image_seq); + + if (image_seq && (image_seq != ubi->image_seq)) { + ubi_err(ubi, "bad image seq: 0x%x, expected: 0x%x", + be32_to_cpu(ech->image_seq), ubi->image_seq); + ret = UBI_BAD_FASTMAP; + goto out; + } + + err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0); + if (err == UBI_IO_FF || err == UBI_IO_FF_BITFLIPS) { + unsigned long long ec = be64_to_cpu(ech->ec); + unmap_peb(ai, pnum); + dbg_bld("Adding PEB to free: %i", pnum); + if (err == UBI_IO_FF_BITFLIPS) + add_aeb(ai, free, pnum, ec, 1); + else + add_aeb(ai, free, pnum, ec, 0); + continue; + } else if (err == 0 || err == UBI_IO_BITFLIPS) { + dbg_bld("Found non empty PEB:%i in pool", pnum); + + if (err == UBI_IO_BITFLIPS) + scrub = 1; + + new_aeb = kmem_cache_alloc(ai->aeb_slab_cache, + GFP_KERNEL); + if (!new_aeb) { + ret = -ENOMEM; + goto out; + } + + new_aeb->ec = be64_to_cpu(ech->ec); + new_aeb->pnum = pnum; + new_aeb->lnum = be32_to_cpu(vh->lnum); + new_aeb->sqnum = be64_to_cpu(vh->sqnum); + new_aeb->copy_flag = vh->copy_flag; + new_aeb->scrub = scrub; + + if (*max_sqnum < new_aeb->sqnum) + *max_sqnum = new_aeb->sqnum; + + err = process_pool_aeb(ubi, ai, vh, new_aeb); + if (err) { + ret = err > 0 ? UBI_BAD_FASTMAP : err; + goto out; + } + } else { + /* We are paranoid and fall back to scanning mode */ + ubi_err(ubi, "fastmap pool PEBs contains damaged PEBs!"); + ret = err > 0 ? UBI_BAD_FASTMAP : err; + goto out; + } + + } + +out: + ubi_free_vid_hdr(ubi, vh); + kfree(ech); + return ret; +} + +/** + * count_fastmap_pebs - Counts the PEBs found by fastmap. + * @ai: The UBI attach info object + */ +static int count_fastmap_pebs(struct ubi_attach_info *ai) +{ + struct ubi_ainf_peb *aeb; + struct ubi_ainf_volume *av; + struct rb_node *rb1, *rb2; + int n = 0; + + list_for_each_entry(aeb, &ai->erase, u.list) + n++; + + list_for_each_entry(aeb, &ai->free, u.list) + n++; + + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) + n++; + + return n; +} + +/** + * ubi_attach_fastmap - creates ubi_attach_info from a fastmap. + * @ubi: UBI device object + * @ai: UBI attach info object + * @fm: the fastmap to be attached + * + * Returns 0 on success, UBI_BAD_FASTMAP if the found fastmap was unusable. + * < 0 indicates an internal error. + */ +static int ubi_attach_fastmap(struct ubi_device *ubi, + struct ubi_attach_info *ai, + struct ubi_fastmap_layout *fm) +{ + struct list_head used, free; + struct ubi_ainf_volume *av; + struct ubi_ainf_peb *aeb, *tmp_aeb, *_tmp_aeb; + struct ubi_fm_sb *fmsb; + struct ubi_fm_hdr *fmhdr; + struct ubi_fm_scan_pool *fmpl1, *fmpl2; + struct ubi_fm_ec *fmec; + struct ubi_fm_volhdr *fmvhdr; + struct ubi_fm_eba *fm_eba; + int ret, i, j, pool_size, wl_pool_size; + size_t fm_pos = 0, fm_size = ubi->fm_size; + unsigned long long max_sqnum = 0; + void *fm_raw = ubi->fm_buf; + + INIT_LIST_HEAD(&used); + INIT_LIST_HEAD(&free); + ai->min_ec = UBI_MAX_ERASECOUNTER; + + fmsb = (struct ubi_fm_sb *)(fm_raw); + ai->max_sqnum = fmsb->sqnum; + fm_pos += sizeof(struct ubi_fm_sb); + if (fm_pos >= fm_size) + goto fail_bad; + + fmhdr = (struct ubi_fm_hdr *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmhdr); + if (fm_pos >= fm_size) + goto fail_bad; + + if (be32_to_cpu(fmhdr->magic) != UBI_FM_HDR_MAGIC) { + ubi_err(ubi, "bad fastmap header magic: 0x%x, expected: 0x%x", + be32_to_cpu(fmhdr->magic), UBI_FM_HDR_MAGIC); + goto fail_bad; + } + + fmpl1 = (struct ubi_fm_scan_pool *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmpl1); + if (fm_pos >= fm_size) + goto fail_bad; + if (be32_to_cpu(fmpl1->magic) != UBI_FM_POOL_MAGIC) { + ubi_err(ubi, "bad fastmap pool magic: 0x%x, expected: 0x%x", + be32_to_cpu(fmpl1->magic), UBI_FM_POOL_MAGIC); + goto fail_bad; + } + + fmpl2 = (struct ubi_fm_scan_pool *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmpl2); + if (fm_pos >= fm_size) + goto fail_bad; + if (be32_to_cpu(fmpl2->magic) != UBI_FM_POOL_MAGIC) { + ubi_err(ubi, "bad fastmap pool magic: 0x%x, expected: 0x%x", + be32_to_cpu(fmpl2->magic), UBI_FM_POOL_MAGIC); + goto fail_bad; + } + + pool_size = be16_to_cpu(fmpl1->size); + wl_pool_size = be16_to_cpu(fmpl2->size); + fm->max_pool_size = be16_to_cpu(fmpl1->max_size); + fm->max_wl_pool_size = be16_to_cpu(fmpl2->max_size); + + if (pool_size > UBI_FM_MAX_POOL_SIZE || pool_size < 0) { + ubi_err(ubi, "bad pool size: %i", pool_size); + goto fail_bad; + } + + if (wl_pool_size > UBI_FM_MAX_POOL_SIZE || wl_pool_size < 0) { + ubi_err(ubi, "bad WL pool size: %i", wl_pool_size); + goto fail_bad; + } + + + if (fm->max_pool_size > UBI_FM_MAX_POOL_SIZE || + fm->max_pool_size < 0) { + ubi_err(ubi, "bad maximal pool size: %i", fm->max_pool_size); + goto fail_bad; + } + + if (fm->max_wl_pool_size > UBI_FM_MAX_POOL_SIZE || + fm->max_wl_pool_size < 0) { + ubi_err(ubi, "bad maximal WL pool size: %i", + fm->max_wl_pool_size); + goto fail_bad; + } + + /* read EC values from free list */ + for (i = 0; i < be32_to_cpu(fmhdr->free_peb_count); i++) { + fmec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmec); + if (fm_pos >= fm_size) + goto fail_bad; + + add_aeb(ai, &ai->free, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 0); + } + + /* read EC values from used list */ + for (i = 0; i < be32_to_cpu(fmhdr->used_peb_count); i++) { + fmec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmec); + if (fm_pos >= fm_size) + goto fail_bad; + + add_aeb(ai, &used, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 0); + } + + /* read EC values from scrub list */ + for (i = 0; i < be32_to_cpu(fmhdr->scrub_peb_count); i++) { + fmec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmec); + if (fm_pos >= fm_size) + goto fail_bad; + + add_aeb(ai, &used, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 1); + } + + /* read EC values from erase list */ + for (i = 0; i < be32_to_cpu(fmhdr->erase_peb_count); i++) { + fmec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmec); + if (fm_pos >= fm_size) + goto fail_bad; + + add_aeb(ai, &ai->erase, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 1); + } + + ai->mean_ec = div_u64(ai->ec_sum, ai->ec_count); + ai->bad_peb_count = be32_to_cpu(fmhdr->bad_peb_count); + + /* Iterate over all volumes and read their EBA table */ + for (i = 0; i < be32_to_cpu(fmhdr->vol_count); i++) { + fmvhdr = (struct ubi_fm_volhdr *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmvhdr); + if (fm_pos >= fm_size) + goto fail_bad; + + if (be32_to_cpu(fmvhdr->magic) != UBI_FM_VHDR_MAGIC) { + ubi_err(ubi, "bad fastmap vol header magic: 0x%x, expected: 0x%x", + be32_to_cpu(fmvhdr->magic), UBI_FM_VHDR_MAGIC); + goto fail_bad; + } + + av = add_vol(ai, be32_to_cpu(fmvhdr->vol_id), + be32_to_cpu(fmvhdr->used_ebs), + be32_to_cpu(fmvhdr->data_pad), + fmvhdr->vol_type, + be32_to_cpu(fmvhdr->last_eb_bytes)); + + if (!av) + goto fail_bad; + + ai->vols_found++; + if (ai->highest_vol_id < be32_to_cpu(fmvhdr->vol_id)) + ai->highest_vol_id = be32_to_cpu(fmvhdr->vol_id); + + fm_eba = (struct ubi_fm_eba *)(fm_raw + fm_pos); + fm_pos += sizeof(*fm_eba); + fm_pos += (sizeof(__be32) * be32_to_cpu(fm_eba->reserved_pebs)); + if (fm_pos >= fm_size) + goto fail_bad; + + if (be32_to_cpu(fm_eba->magic) != UBI_FM_EBA_MAGIC) { + ubi_err(ubi, "bad fastmap EBA header magic: 0x%x, expected: 0x%x", + be32_to_cpu(fm_eba->magic), UBI_FM_EBA_MAGIC); + goto fail_bad; + } + + for (j = 0; j < be32_to_cpu(fm_eba->reserved_pebs); j++) { + int pnum = be32_to_cpu(fm_eba->pnum[j]); + + if ((int)be32_to_cpu(fm_eba->pnum[j]) < 0) + continue; + + aeb = NULL; + list_for_each_entry(tmp_aeb, &used, u.list) { + if (tmp_aeb->pnum == pnum) { + aeb = tmp_aeb; + break; + } + } + + if (!aeb) { + ubi_err(ubi, "PEB %i is in EBA but not in used list", pnum); + goto fail_bad; + } + + aeb->lnum = j; + + if (av->highest_lnum <= aeb->lnum) + av->highest_lnum = aeb->lnum; + + assign_aeb_to_av(ai, aeb, av); + + dbg_bld("inserting PEB:%i (LEB %i) to vol %i", + aeb->pnum, aeb->lnum, av->vol_id); + } + } + + ret = scan_pool(ubi, ai, fmpl1->pebs, pool_size, &max_sqnum, &free); + if (ret) + goto fail; + + ret = scan_pool(ubi, ai, fmpl2->pebs, wl_pool_size, &max_sqnum, &free); + if (ret) + goto fail; + + if (max_sqnum > ai->max_sqnum) + ai->max_sqnum = max_sqnum; + + list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list) + list_move_tail(&tmp_aeb->u.list, &ai->free); + + list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &used, u.list) + list_move_tail(&tmp_aeb->u.list, &ai->erase); + + ubi_assert(list_empty(&free)); + + /* + * If fastmap is leaking PEBs (must not happen), raise a + * fat warning and fall back to scanning mode. + * We do this here because in ubi_wl_init() it's too late + * and we cannot fall back to scanning. + */ + if (WARN_ON(count_fastmap_pebs(ai) != ubi->peb_count - + ai->bad_peb_count - fm->used_blocks)) + goto fail_bad; + + return 0; + +fail_bad: + ret = UBI_BAD_FASTMAP; +fail: + list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &used, u.list) { + list_del(&tmp_aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); + } + list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list) { + list_del(&tmp_aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); + } + + return ret; +} + +/** + * ubi_scan_fastmap - scan the fastmap. + * @ubi: UBI device object + * @ai: UBI attach info to be filled + * @fm_anchor: The fastmap starts at this PEB + * + * Returns 0 on success, UBI_NO_FASTMAP if no fastmap was found, + * UBI_BAD_FASTMAP if one was found but is not usable. + * < 0 indicates an internal error. + */ +int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, + int fm_anchor) +{ + struct ubi_fm_sb *fmsb, *fmsb2; + struct ubi_vid_hdr *vh; + struct ubi_ec_hdr *ech; + struct ubi_fastmap_layout *fm; + int i, used_blocks, pnum, ret = 0; + size_t fm_size; + __be32 crc, tmp_crc; + unsigned long long sqnum = 0; + + down_write(&ubi->fm_protect); + memset(ubi->fm_buf, 0, ubi->fm_size); + + fmsb = kmalloc(sizeof(*fmsb), GFP_KERNEL); + if (!fmsb) { + ret = -ENOMEM; + goto out; + } + + fm = kzalloc(sizeof(*fm), GFP_KERNEL); + if (!fm) { + ret = -ENOMEM; + kfree(fmsb); + goto out; + } + + ret = ubi_io_read(ubi, fmsb, fm_anchor, ubi->leb_start, sizeof(*fmsb)); + if (ret && ret != UBI_IO_BITFLIPS) + goto free_fm_sb; + else if (ret == UBI_IO_BITFLIPS) + fm->to_be_tortured[0] = 1; + + if (be32_to_cpu(fmsb->magic) != UBI_FM_SB_MAGIC) { + ubi_err(ubi, "bad super block magic: 0x%x, expected: 0x%x", + be32_to_cpu(fmsb->magic), UBI_FM_SB_MAGIC); + ret = UBI_BAD_FASTMAP; + goto free_fm_sb; + } + + if (fmsb->version != UBI_FM_FMT_VERSION) { + ubi_err(ubi, "bad fastmap version: %i, expected: %i", + fmsb->version, UBI_FM_FMT_VERSION); + ret = UBI_BAD_FASTMAP; + goto free_fm_sb; + } + + used_blocks = be32_to_cpu(fmsb->used_blocks); + if (used_blocks > UBI_FM_MAX_BLOCKS || used_blocks < 1) { + ubi_err(ubi, "number of fastmap blocks is invalid: %i", + used_blocks); + ret = UBI_BAD_FASTMAP; + goto free_fm_sb; + } + + fm_size = ubi->leb_size * used_blocks; + if (fm_size != ubi->fm_size) { + ubi_err(ubi, "bad fastmap size: %zi, expected: %zi", + fm_size, ubi->fm_size); + ret = UBI_BAD_FASTMAP; + goto free_fm_sb; + } + + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) { + ret = -ENOMEM; + goto free_fm_sb; + } + + vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vh) { + ret = -ENOMEM; + goto free_hdr; + } + + for (i = 0; i < used_blocks; i++) { + int image_seq; + + pnum = be32_to_cpu(fmsb->block_loc[i]); + + if (ubi_io_is_bad(ubi, pnum)) { + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + + ret = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); + if (ret && ret != UBI_IO_BITFLIPS) { + ubi_err(ubi, "unable to read fastmap block# %i EC (PEB: %i)", + i, pnum); + if (ret > 0) + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } else if (ret == UBI_IO_BITFLIPS) + fm->to_be_tortured[i] = 1; + + image_seq = be32_to_cpu(ech->image_seq); + if (!ubi->image_seq) + ubi->image_seq = image_seq; + + /* + * Older UBI implementations have image_seq set to zero, so + * we shouldn't fail if image_seq == 0. + */ + if (image_seq && (image_seq != ubi->image_seq)) { + ubi_err(ubi, "wrong image seq:%d instead of %d", + be32_to_cpu(ech->image_seq), ubi->image_seq); + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + + ret = ubi_io_read_vid_hdr(ubi, pnum, vh, 0); + if (ret && ret != UBI_IO_BITFLIPS) { + ubi_err(ubi, "unable to read fastmap block# %i (PEB: %i)", + i, pnum); + goto free_hdr; + } + + if (i == 0) { + if (be32_to_cpu(vh->vol_id) != UBI_FM_SB_VOLUME_ID) { + ubi_err(ubi, "bad fastmap anchor vol_id: 0x%x, expected: 0x%x", + be32_to_cpu(vh->vol_id), + UBI_FM_SB_VOLUME_ID); + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + } else { + if (be32_to_cpu(vh->vol_id) != UBI_FM_DATA_VOLUME_ID) { + ubi_err(ubi, "bad fastmap data vol_id: 0x%x, expected: 0x%x", + be32_to_cpu(vh->vol_id), + UBI_FM_DATA_VOLUME_ID); + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + } + + if (sqnum < be64_to_cpu(vh->sqnum)) + sqnum = be64_to_cpu(vh->sqnum); + + ret = ubi_io_read(ubi, ubi->fm_buf + (ubi->leb_size * i), pnum, + ubi->leb_start, ubi->leb_size); + if (ret && ret != UBI_IO_BITFLIPS) { + ubi_err(ubi, "unable to read fastmap block# %i (PEB: %i, " + "err: %i)", i, pnum, ret); + goto free_hdr; + } + } + + kfree(fmsb); + fmsb = NULL; + + fmsb2 = (struct ubi_fm_sb *)(ubi->fm_buf); + tmp_crc = be32_to_cpu(fmsb2->data_crc); + fmsb2->data_crc = 0; + crc = crc32(UBI_CRC32_INIT, ubi->fm_buf, fm_size); + if (crc != tmp_crc) { + ubi_err(ubi, "fastmap data CRC is invalid"); + ubi_err(ubi, "CRC should be: 0x%x, calc: 0x%x", + tmp_crc, crc); + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + + fmsb2->sqnum = sqnum; + + fm->used_blocks = used_blocks; + + ret = ubi_attach_fastmap(ubi, ai, fm); + if (ret) { + if (ret > 0) + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + + for (i = 0; i < used_blocks; i++) { + struct ubi_wl_entry *e; + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) { + while (i--) + kfree(fm->e[i]); + + ret = -ENOMEM; + goto free_hdr; + } + + e->pnum = be32_to_cpu(fmsb2->block_loc[i]); + e->ec = be32_to_cpu(fmsb2->block_ec[i]); + fm->e[i] = e; + } + + ubi->fm = fm; + ubi->fm_pool.max_size = ubi->fm->max_pool_size; + ubi->fm_wl_pool.max_size = ubi->fm->max_wl_pool_size; + ubi_msg(ubi, "attached by fastmap"); + ubi_msg(ubi, "fastmap pool size: %d", ubi->fm_pool.max_size); + ubi_msg(ubi, "fastmap WL pool size: %d", + ubi->fm_wl_pool.max_size); + ubi->fm_disabled = 0; + + ubi_free_vid_hdr(ubi, vh); + kfree(ech); +out: + up_write(&ubi->fm_protect); + if (ret == UBI_BAD_FASTMAP) + ubi_err(ubi, "Attach by fastmap failed, doing a full scan!"); + return ret; + +free_hdr: + ubi_free_vid_hdr(ubi, vh); + kfree(ech); +free_fm_sb: + kfree(fmsb); + kfree(fm); + goto out; +} + +/** + * ubi_write_fastmap - writes a fastmap. + * @ubi: UBI device object + * @new_fm: the to be written fastmap + * + * Returns 0 on success, < 0 indicates an internal error. + */ +static int ubi_write_fastmap(struct ubi_device *ubi, + struct ubi_fastmap_layout *new_fm) +{ + size_t fm_pos = 0; + void *fm_raw; + struct ubi_fm_sb *fmsb; + struct ubi_fm_hdr *fmh; + struct ubi_fm_scan_pool *fmpl1, *fmpl2; + struct ubi_fm_ec *fec; + struct ubi_fm_volhdr *fvh; + struct ubi_fm_eba *feba; + struct ubi_wl_entry *wl_e; + struct ubi_volume *vol; + struct ubi_vid_hdr *avhdr, *dvhdr; + struct ubi_work *ubi_wrk; + struct rb_node *tmp_rb; + int ret, i, j, free_peb_count, used_peb_count, vol_count; + int scrub_peb_count, erase_peb_count; + int *seen_pebs = NULL; + + fm_raw = ubi->fm_buf; + memset(ubi->fm_buf, 0, ubi->fm_size); + + avhdr = new_fm_vhdr(ubi, UBI_FM_SB_VOLUME_ID); + if (!avhdr) { + ret = -ENOMEM; + goto out; + } + + dvhdr = new_fm_vhdr(ubi, UBI_FM_DATA_VOLUME_ID); + if (!dvhdr) { + ret = -ENOMEM; + goto out_kfree; + } + + seen_pebs = init_seen(ubi); + if (IS_ERR(seen_pebs)) { + ret = PTR_ERR(seen_pebs); + goto out_kfree; + } + + spin_lock(&ubi->volumes_lock); + spin_lock(&ubi->wl_lock); + + fmsb = (struct ubi_fm_sb *)fm_raw; + fm_pos += sizeof(*fmsb); + ubi_assert(fm_pos <= ubi->fm_size); + + fmh = (struct ubi_fm_hdr *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmh); + ubi_assert(fm_pos <= ubi->fm_size); + + fmsb->magic = cpu_to_be32(UBI_FM_SB_MAGIC); + fmsb->version = UBI_FM_FMT_VERSION; + fmsb->used_blocks = cpu_to_be32(new_fm->used_blocks); + /* the max sqnum will be filled in while *reading* the fastmap */ + fmsb->sqnum = 0; + + fmh->magic = cpu_to_be32(UBI_FM_HDR_MAGIC); + free_peb_count = 0; + used_peb_count = 0; + scrub_peb_count = 0; + erase_peb_count = 0; + vol_count = 0; + + fmpl1 = (struct ubi_fm_scan_pool *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmpl1); + fmpl1->magic = cpu_to_be32(UBI_FM_POOL_MAGIC); + fmpl1->size = cpu_to_be16(ubi->fm_pool.size); + fmpl1->max_size = cpu_to_be16(ubi->fm_pool.max_size); + + for (i = 0; i < ubi->fm_pool.size; i++) { + fmpl1->pebs[i] = cpu_to_be32(ubi->fm_pool.pebs[i]); + set_seen(ubi, ubi->fm_pool.pebs[i], seen_pebs); + } + + fmpl2 = (struct ubi_fm_scan_pool *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmpl2); + fmpl2->magic = cpu_to_be32(UBI_FM_POOL_MAGIC); + fmpl2->size = cpu_to_be16(ubi->fm_wl_pool.size); + fmpl2->max_size = cpu_to_be16(ubi->fm_wl_pool.max_size); + + for (i = 0; i < ubi->fm_wl_pool.size; i++) { + fmpl2->pebs[i] = cpu_to_be32(ubi->fm_wl_pool.pebs[i]); + set_seen(ubi, ubi->fm_wl_pool.pebs[i], seen_pebs); + } + + ubi_for_each_free_peb(ubi, wl_e, tmp_rb) { + fec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + + fec->pnum = cpu_to_be32(wl_e->pnum); + set_seen(ubi, wl_e->pnum, seen_pebs); + fec->ec = cpu_to_be32(wl_e->ec); + + free_peb_count++; + fm_pos += sizeof(*fec); + ubi_assert(fm_pos <= ubi->fm_size); + } + fmh->free_peb_count = cpu_to_be32(free_peb_count); + + ubi_for_each_used_peb(ubi, wl_e, tmp_rb) { + fec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + + fec->pnum = cpu_to_be32(wl_e->pnum); + set_seen(ubi, wl_e->pnum, seen_pebs); + fec->ec = cpu_to_be32(wl_e->ec); + + used_peb_count++; + fm_pos += sizeof(*fec); + ubi_assert(fm_pos <= ubi->fm_size); + } + + ubi_for_each_protected_peb(ubi, i, wl_e) { + fec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + + fec->pnum = cpu_to_be32(wl_e->pnum); + set_seen(ubi, wl_e->pnum, seen_pebs); + fec->ec = cpu_to_be32(wl_e->ec); + + used_peb_count++; + fm_pos += sizeof(*fec); + ubi_assert(fm_pos <= ubi->fm_size); + } + fmh->used_peb_count = cpu_to_be32(used_peb_count); + + ubi_for_each_scrub_peb(ubi, wl_e, tmp_rb) { + fec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + + fec->pnum = cpu_to_be32(wl_e->pnum); + set_seen(ubi, wl_e->pnum, seen_pebs); + fec->ec = cpu_to_be32(wl_e->ec); + + scrub_peb_count++; + fm_pos += sizeof(*fec); + ubi_assert(fm_pos <= ubi->fm_size); + } + fmh->scrub_peb_count = cpu_to_be32(scrub_peb_count); + + + list_for_each_entry(ubi_wrk, &ubi->works, list) { + if (ubi_is_erase_work(ubi_wrk)) { + wl_e = ubi_wrk->e; + ubi_assert(wl_e); + + fec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + + fec->pnum = cpu_to_be32(wl_e->pnum); + set_seen(ubi, wl_e->pnum, seen_pebs); + fec->ec = cpu_to_be32(wl_e->ec); + + erase_peb_count++; + fm_pos += sizeof(*fec); + ubi_assert(fm_pos <= ubi->fm_size); + } + } + fmh->erase_peb_count = cpu_to_be32(erase_peb_count); + + for (i = 0; i < UBI_MAX_VOLUMES + UBI_INT_VOL_COUNT; i++) { + vol = ubi->volumes[i]; + + if (!vol) + continue; + + vol_count++; + + fvh = (struct ubi_fm_volhdr *)(fm_raw + fm_pos); + fm_pos += sizeof(*fvh); + ubi_assert(fm_pos <= ubi->fm_size); + + fvh->magic = cpu_to_be32(UBI_FM_VHDR_MAGIC); + fvh->vol_id = cpu_to_be32(vol->vol_id); + fvh->vol_type = vol->vol_type; + fvh->used_ebs = cpu_to_be32(vol->used_ebs); + fvh->data_pad = cpu_to_be32(vol->data_pad); + fvh->last_eb_bytes = cpu_to_be32(vol->last_eb_bytes); + + ubi_assert(vol->vol_type == UBI_DYNAMIC_VOLUME || + vol->vol_type == UBI_STATIC_VOLUME); + + feba = (struct ubi_fm_eba *)(fm_raw + fm_pos); + fm_pos += sizeof(*feba) + (sizeof(__be32) * vol->reserved_pebs); + ubi_assert(fm_pos <= ubi->fm_size); + + for (j = 0; j < vol->reserved_pebs; j++) + feba->pnum[j] = cpu_to_be32(vol->eba_tbl[j]); + + feba->reserved_pebs = cpu_to_be32(j); + feba->magic = cpu_to_be32(UBI_FM_EBA_MAGIC); + } + fmh->vol_count = cpu_to_be32(vol_count); + fmh->bad_peb_count = cpu_to_be32(ubi->bad_peb_count); + + avhdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + avhdr->lnum = 0; + + spin_unlock(&ubi->wl_lock); + spin_unlock(&ubi->volumes_lock); + + dbg_bld("writing fastmap SB to PEB %i", new_fm->e[0]->pnum); + ret = ubi_io_write_vid_hdr(ubi, new_fm->e[0]->pnum, avhdr); + if (ret) { + ubi_err(ubi, "unable to write vid_hdr to fastmap SB!"); + goto out_kfree; + } + + for (i = 0; i < new_fm->used_blocks; i++) { + fmsb->block_loc[i] = cpu_to_be32(new_fm->e[i]->pnum); + set_seen(ubi, new_fm->e[i]->pnum, seen_pebs); + fmsb->block_ec[i] = cpu_to_be32(new_fm->e[i]->ec); + } + + fmsb->data_crc = 0; + fmsb->data_crc = cpu_to_be32(crc32(UBI_CRC32_INIT, fm_raw, + ubi->fm_size)); + + for (i = 1; i < new_fm->used_blocks; i++) { + dvhdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + dvhdr->lnum = cpu_to_be32(i); + dbg_bld("writing fastmap data to PEB %i sqnum %llu", + new_fm->e[i]->pnum, be64_to_cpu(dvhdr->sqnum)); + ret = ubi_io_write_vid_hdr(ubi, new_fm->e[i]->pnum, dvhdr); + if (ret) { + ubi_err(ubi, "unable to write vid_hdr to PEB %i!", + new_fm->e[i]->pnum); + goto out_kfree; + } + } + + for (i = 0; i < new_fm->used_blocks; i++) { + ret = ubi_io_write(ubi, fm_raw + (i * ubi->leb_size), + new_fm->e[i]->pnum, ubi->leb_start, ubi->leb_size); + if (ret) { + ubi_err(ubi, "unable to write fastmap to PEB %i!", + new_fm->e[i]->pnum); + goto out_kfree; + } + } + + ubi_assert(new_fm); + ubi->fm = new_fm; + + ret = self_check_seen(ubi, seen_pebs); + dbg_bld("fastmap written!"); + +out_kfree: + ubi_free_vid_hdr(ubi, avhdr); + ubi_free_vid_hdr(ubi, dvhdr); + free_seen(seen_pebs); +out: + return ret; +} + +/** + * erase_block - Manually erase a PEB. + * @ubi: UBI device object + * @pnum: PEB to be erased + * + * Returns the new EC value on success, < 0 indicates an internal error. + */ +static int erase_block(struct ubi_device *ubi, int pnum) +{ + int ret; + struct ubi_ec_hdr *ec_hdr; + long long ec; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ec_hdr) + return -ENOMEM; + + ret = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); + if (ret < 0) + goto out; + else if (ret && ret != UBI_IO_BITFLIPS) { + ret = -EINVAL; + goto out; + } + + ret = ubi_io_sync_erase(ubi, pnum, 0); + if (ret < 0) + goto out; + + ec = be64_to_cpu(ec_hdr->ec); + ec += ret; + if (ec > UBI_MAX_ERASECOUNTER) { + ret = -EINVAL; + goto out; + } + + ec_hdr->ec = cpu_to_be64(ec); + ret = ubi_io_write_ec_hdr(ubi, pnum, ec_hdr); + if (ret < 0) + goto out; + + ret = ec; +out: + kfree(ec_hdr); + return ret; +} + +/** + * invalidate_fastmap - destroys a fastmap. + * @ubi: UBI device object + * + * This function ensures that upon next UBI attach a full scan + * is issued. We need this if UBI is about to write a new fastmap + * but is unable to do so. In this case we have two options: + * a) Make sure that the current fastmap will not be usued upon + * attach time and contine or b) fall back to RO mode to have the + * current fastmap in a valid state. + * Returns 0 on success, < 0 indicates an internal error. + */ +static int invalidate_fastmap(struct ubi_device *ubi) +{ + int ret; + struct ubi_fastmap_layout *fm; + struct ubi_wl_entry *e; + struct ubi_vid_hdr *vh = NULL; + + if (!ubi->fm) + return 0; + + ubi->fm = NULL; + + ret = -ENOMEM; + fm = kzalloc(sizeof(*fm), GFP_KERNEL); + if (!fm) + goto out; + + vh = new_fm_vhdr(ubi, UBI_FM_SB_VOLUME_ID); + if (!vh) + goto out_free_fm; + + ret = -ENOSPC; + e = ubi_wl_get_fm_peb(ubi, 1); + if (!e) + goto out_free_fm; + + /* + * Create fake fastmap such that UBI will fall back + * to scanning mode. + */ + vh->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + ret = ubi_io_write_vid_hdr(ubi, e->pnum, vh); + if (ret < 0) { + ubi_wl_put_fm_peb(ubi, e, 0, 0); + goto out_free_fm; + } + + fm->used_blocks = 1; + fm->e[0] = e; + + ubi->fm = fm; + +out: + ubi_free_vid_hdr(ubi, vh); + return ret; + +out_free_fm: + kfree(fm); + goto out; +} + +/** + * return_fm_pebs - returns all PEBs used by a fastmap back to the + * WL sub-system. + * @ubi: UBI device object + * @fm: fastmap layout object + */ +static void return_fm_pebs(struct ubi_device *ubi, + struct ubi_fastmap_layout *fm) +{ + int i; + + if (!fm) + return; + + for (i = 0; i < fm->used_blocks; i++) { + if (fm->e[i]) { + ubi_wl_put_fm_peb(ubi, fm->e[i], i, + fm->to_be_tortured[i]); + fm->e[i] = NULL; + } + } +} + +/** + * ubi_update_fastmap - will be called by UBI if a volume changes or + * a fastmap pool becomes full. + * @ubi: UBI device object + * + * Returns 0 on success, < 0 indicates an internal error. + */ +int ubi_update_fastmap(struct ubi_device *ubi) +{ + int ret, i, j; + struct ubi_fastmap_layout *new_fm, *old_fm; + struct ubi_wl_entry *tmp_e; + + down_write(&ubi->fm_protect); + + ubi_refill_pools(ubi); + + if (ubi->ro_mode || ubi->fm_disabled) { + up_write(&ubi->fm_protect); + return 0; + } + + ret = ubi_ensure_anchor_pebs(ubi); + if (ret) { + up_write(&ubi->fm_protect); + return ret; + } + + new_fm = kzalloc(sizeof(*new_fm), GFP_KERNEL); + if (!new_fm) { + up_write(&ubi->fm_protect); + return -ENOMEM; + } + + new_fm->used_blocks = ubi->fm_size / ubi->leb_size; + old_fm = ubi->fm; + ubi->fm = NULL; + + if (new_fm->used_blocks > UBI_FM_MAX_BLOCKS) { + ubi_err(ubi, "fastmap too large"); + ret = -ENOSPC; + goto err; + } + + for (i = 1; i < new_fm->used_blocks; i++) { + spin_lock(&ubi->wl_lock); + tmp_e = ubi_wl_get_fm_peb(ubi, 0); + spin_unlock(&ubi->wl_lock); + + if (!tmp_e) { + if (old_fm && old_fm->e[i]) { + ret = erase_block(ubi, old_fm->e[i]->pnum); + if (ret < 0) { + ubi_err(ubi, "could not erase old fastmap PEB"); + + for (j = 1; j < i; j++) { + ubi_wl_put_fm_peb(ubi, new_fm->e[j], + j, 0); + new_fm->e[j] = NULL; + } + goto err; + } + new_fm->e[i] = old_fm->e[i]; + old_fm->e[i] = NULL; + } else { + ubi_err(ubi, "could not get any free erase block"); + + for (j = 1; j < i; j++) { + ubi_wl_put_fm_peb(ubi, new_fm->e[j], j, 0); + new_fm->e[j] = NULL; + } + + ret = -ENOSPC; + goto err; + } + } else { + new_fm->e[i] = tmp_e; + + if (old_fm && old_fm->e[i]) { + ubi_wl_put_fm_peb(ubi, old_fm->e[i], i, + old_fm->to_be_tortured[i]); + old_fm->e[i] = NULL; + } + } + } + + /* Old fastmap is larger than the new one */ + if (old_fm && new_fm->used_blocks < old_fm->used_blocks) { + for (i = new_fm->used_blocks; i < old_fm->used_blocks; i++) { + ubi_wl_put_fm_peb(ubi, old_fm->e[i], i, + old_fm->to_be_tortured[i]); + old_fm->e[i] = NULL; + } + } + + spin_lock(&ubi->wl_lock); + tmp_e = ubi_wl_get_fm_peb(ubi, 1); + spin_unlock(&ubi->wl_lock); + + if (old_fm) { + /* no fresh anchor PEB was found, reuse the old one */ + if (!tmp_e) { + ret = erase_block(ubi, old_fm->e[0]->pnum); + if (ret < 0) { + ubi_err(ubi, "could not erase old anchor PEB"); + + for (i = 1; i < new_fm->used_blocks; i++) { + ubi_wl_put_fm_peb(ubi, new_fm->e[i], + i, 0); + new_fm->e[i] = NULL; + } + goto err; + } + new_fm->e[0] = old_fm->e[0]; + new_fm->e[0]->ec = ret; + old_fm->e[0] = NULL; + } else { + /* we've got a new anchor PEB, return the old one */ + ubi_wl_put_fm_peb(ubi, old_fm->e[0], 0, + old_fm->to_be_tortured[0]); + new_fm->e[0] = tmp_e; + old_fm->e[0] = NULL; + } + } else { + if (!tmp_e) { + ubi_err(ubi, "could not find any anchor PEB"); + + for (i = 1; i < new_fm->used_blocks; i++) { + ubi_wl_put_fm_peb(ubi, new_fm->e[i], i, 0); + new_fm->e[i] = NULL; + } + + ret = -ENOSPC; + goto err; + } + new_fm->e[0] = tmp_e; + } + + down_write(&ubi->work_sem); + down_write(&ubi->fm_eba_sem); + ret = ubi_write_fastmap(ubi, new_fm); + up_write(&ubi->fm_eba_sem); + up_write(&ubi->work_sem); + + if (ret) + goto err; + +out_unlock: + up_write(&ubi->fm_protect); + kfree(old_fm); + return ret; + +err: + ubi_warn(ubi, "Unable to write new fastmap, err=%i", ret); + + ret = invalidate_fastmap(ubi); + if (ret < 0) { + ubi_err(ubi, "Unable to invalidiate current fastmap!"); + ubi_ro_mode(ubi); + } else { + return_fm_pebs(ubi, old_fm); + return_fm_pebs(ubi, new_fm); + ret = 0; + } + + kfree(new_fm); + goto out_unlock; +} diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c new file mode 100644 index 000000000..b93807b4c --- /dev/null +++ b/drivers/mtd/ubi/gluebi.c @@ -0,0 +1,521 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём), Joern Engel + */ + +/* + * This is a small driver which implements fake MTD devices on top of UBI + * volumes. This sounds strange, but it is in fact quite useful to make + * MTD-oriented software (including all the legacy software) work on top of + * UBI. + * + * Gluebi emulates MTD devices of "MTD_UBIVOLUME" type. Their minimal I/O unit + * size (@mtd->writesize) is equivalent to the UBI minimal I/O unit. The + * eraseblock size is equivalent to the logical eraseblock size of the volume. + */ + +#include <linux/err.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/math64.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/mtd/ubi.h> +#include <linux/mtd/mtd.h> +#include "ubi-media.h" + +#define err_msg(fmt, ...) \ + pr_err("gluebi (pid %d): %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__) + +/** + * struct gluebi_device - a gluebi device description data structure. + * @mtd: emulated MTD device description object + * @refcnt: gluebi device reference count + * @desc: UBI volume descriptor + * @ubi_num: UBI device number this gluebi device works on + * @vol_id: ID of UBI volume this gluebi device works on + * @list: link in a list of gluebi devices + */ +struct gluebi_device { + struct mtd_info mtd; + int refcnt; + struct ubi_volume_desc *desc; + int ubi_num; + int vol_id; + struct list_head list; +}; + +/* List of all gluebi devices */ +static LIST_HEAD(gluebi_devices); +static DEFINE_MUTEX(devices_mutex); + +/** + * find_gluebi_nolock - find a gluebi device. + * @ubi_num: UBI device number + * @vol_id: volume ID + * + * This function seraches for gluebi device corresponding to UBI device + * @ubi_num and UBI volume @vol_id. Returns the gluebi device description + * object in case of success and %NULL in case of failure. The caller has to + * have the &devices_mutex locked. + */ +static struct gluebi_device *find_gluebi_nolock(int ubi_num, int vol_id) +{ + struct gluebi_device *gluebi; + + list_for_each_entry(gluebi, &gluebi_devices, list) + if (gluebi->ubi_num == ubi_num && gluebi->vol_id == vol_id) + return gluebi; + return NULL; +} + +/** + * gluebi_get_device - get MTD device reference. + * @mtd: the MTD device description object + * + * This function is called every time the MTD device is being opened and + * implements the MTD get_device() operation. Returns zero in case of success + * and a negative error code in case of failure. + */ +static int gluebi_get_device(struct mtd_info *mtd) +{ + struct gluebi_device *gluebi; + int ubi_mode = UBI_READONLY; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + if (mtd->flags & MTD_WRITEABLE) + ubi_mode = UBI_READWRITE; + + gluebi = container_of(mtd, struct gluebi_device, mtd); + mutex_lock(&devices_mutex); + if (gluebi->refcnt > 0) { + /* + * The MTD device is already referenced and this is just one + * more reference. MTD allows many users to open the same + * volume simultaneously and do not distinguish between + * readers/writers/exclusive openers as UBI does. So we do not + * open the UBI volume again - just increase the reference + * counter and return. + */ + gluebi->refcnt += 1; + mutex_unlock(&devices_mutex); + return 0; + } + + /* + * This is the first reference to this UBI volume via the MTD device + * interface. Open the corresponding volume in read-write mode. + */ + gluebi->desc = ubi_open_volume(gluebi->ubi_num, gluebi->vol_id, + ubi_mode); + if (IS_ERR(gluebi->desc)) { + mutex_unlock(&devices_mutex); + module_put(THIS_MODULE); + return PTR_ERR(gluebi->desc); + } + gluebi->refcnt += 1; + mutex_unlock(&devices_mutex); + return 0; +} + +/** + * gluebi_put_device - put MTD device reference. + * @mtd: the MTD device description object + * + * This function is called every time the MTD device is being put. Returns + * zero in case of success and a negative error code in case of failure. + */ +static void gluebi_put_device(struct mtd_info *mtd) +{ + struct gluebi_device *gluebi; + + gluebi = container_of(mtd, struct gluebi_device, mtd); + mutex_lock(&devices_mutex); + gluebi->refcnt -= 1; + if (gluebi->refcnt == 0) + ubi_close_volume(gluebi->desc); + module_put(THIS_MODULE); + mutex_unlock(&devices_mutex); +} + +/** + * gluebi_read - read operation of emulated MTD devices. + * @mtd: MTD device description object + * @from: absolute offset from where to read + * @len: how many bytes to read + * @retlen: count of read bytes is returned here + * @buf: buffer to store the read data + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, unsigned char *buf) +{ + int err = 0, lnum, offs, bytes_left; + struct gluebi_device *gluebi; + + gluebi = container_of(mtd, struct gluebi_device, mtd); + lnum = div_u64_rem(from, mtd->erasesize, &offs); + bytes_left = len; + while (bytes_left) { + size_t to_read = mtd->erasesize - offs; + + if (to_read > bytes_left) + to_read = bytes_left; + + err = ubi_read(gluebi->desc, lnum, buf, offs, to_read); + if (err) + break; + + lnum += 1; + offs = 0; + bytes_left -= to_read; + buf += to_read; + } + + *retlen = len - bytes_left; + return err; +} + +/** + * gluebi_write - write operation of emulated MTD devices. + * @mtd: MTD device description object + * @to: absolute offset where to write + * @len: how many bytes to write + * @retlen: count of written bytes is returned here + * @buf: buffer with data to write + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const u_char *buf) +{ + int err = 0, lnum, offs, bytes_left; + struct gluebi_device *gluebi; + + gluebi = container_of(mtd, struct gluebi_device, mtd); + lnum = div_u64_rem(to, mtd->erasesize, &offs); + + if (len % mtd->writesize || offs % mtd->writesize) + return -EINVAL; + + bytes_left = len; + while (bytes_left) { + size_t to_write = mtd->erasesize - offs; + + if (to_write > bytes_left) + to_write = bytes_left; + + err = ubi_leb_write(gluebi->desc, lnum, buf, offs, to_write); + if (err) + break; + + lnum += 1; + offs = 0; + bytes_left -= to_write; + buf += to_write; + } + + *retlen = len - bytes_left; + return err; +} + +/** + * gluebi_erase - erase operation of emulated MTD devices. + * @mtd: the MTD device description object + * @instr: the erase operation description + * + * This function calls the erase callback when finishes. Returns zero in case + * of success and a negative error code in case of failure. + */ +static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr) +{ + int err, i, lnum, count; + struct gluebi_device *gluebi; + + if (mtd_mod_by_ws(instr->addr, mtd) || mtd_mod_by_ws(instr->len, mtd)) + return -EINVAL; + + lnum = mtd_div_by_eb(instr->addr, mtd); + count = mtd_div_by_eb(instr->len, mtd); + gluebi = container_of(mtd, struct gluebi_device, mtd); + + for (i = 0; i < count - 1; i++) { + err = ubi_leb_unmap(gluebi->desc, lnum + i); + if (err) + goto out_err; + } + /* + * MTD erase operations are synchronous, so we have to make sure the + * physical eraseblock is wiped out. + * + * Thus, perform leb_erase instead of leb_unmap operation - leb_erase + * will wait for the end of operations + */ + err = ubi_leb_erase(gluebi->desc, lnum + i); + if (err) + goto out_err; + + instr->state = MTD_ERASE_DONE; + mtd_erase_callback(instr); + return 0; + +out_err: + instr->state = MTD_ERASE_FAILED; + instr->fail_addr = (long long)lnum * mtd->erasesize; + return err; +} + +/** + * gluebi_create - create a gluebi device for an UBI volume. + * @di: UBI device description object + * @vi: UBI volume description object + * + * This function is called when a new UBI volume is created in order to create + * corresponding fake MTD device. Returns zero in case of success and a + * negative error code in case of failure. + */ +static int gluebi_create(struct ubi_device_info *di, + struct ubi_volume_info *vi) +{ + struct gluebi_device *gluebi, *g; + struct mtd_info *mtd; + + gluebi = kzalloc(sizeof(struct gluebi_device), GFP_KERNEL); + if (!gluebi) + return -ENOMEM; + + mtd = &gluebi->mtd; + mtd->name = kmemdup(vi->name, vi->name_len + 1, GFP_KERNEL); + if (!mtd->name) { + kfree(gluebi); + return -ENOMEM; + } + + gluebi->vol_id = vi->vol_id; + gluebi->ubi_num = vi->ubi_num; + mtd->type = MTD_UBIVOLUME; + if (!di->ro_mode) + mtd->flags = MTD_WRITEABLE; + mtd->owner = THIS_MODULE; + mtd->writesize = di->min_io_size; + mtd->erasesize = vi->usable_leb_size; + mtd->_read = gluebi_read; + mtd->_write = gluebi_write; + mtd->_erase = gluebi_erase; + mtd->_get_device = gluebi_get_device; + mtd->_put_device = gluebi_put_device; + + /* + * In case of dynamic a volume, MTD device size is just volume size. In + * case of a static volume the size is equivalent to the amount of data + * bytes. + */ + if (vi->vol_type == UBI_DYNAMIC_VOLUME) + mtd->size = (unsigned long long)vi->usable_leb_size * vi->size; + else + mtd->size = vi->used_bytes; + + /* Just a sanity check - make sure this gluebi device does not exist */ + mutex_lock(&devices_mutex); + g = find_gluebi_nolock(vi->ubi_num, vi->vol_id); + if (g) + err_msg("gluebi MTD device %d form UBI device %d volume %d already exists", + g->mtd.index, vi->ubi_num, vi->vol_id); + mutex_unlock(&devices_mutex); + + if (mtd_device_register(mtd, NULL, 0)) { + err_msg("cannot add MTD device"); + kfree(mtd->name); + kfree(gluebi); + return -ENFILE; + } + + mutex_lock(&devices_mutex); + list_add_tail(&gluebi->list, &gluebi_devices); + mutex_unlock(&devices_mutex); + return 0; +} + +/** + * gluebi_remove - remove a gluebi device. + * @vi: UBI volume description object + * + * This function is called when an UBI volume is removed and it removes + * corresponding fake MTD device. Returns zero in case of success and a + * negative error code in case of failure. + */ +static int gluebi_remove(struct ubi_volume_info *vi) +{ + int err = 0; + struct mtd_info *mtd; + struct gluebi_device *gluebi; + + mutex_lock(&devices_mutex); + gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id); + if (!gluebi) { + err_msg("got remove notification for unknown UBI device %d volume %d", + vi->ubi_num, vi->vol_id); + err = -ENOENT; + } else if (gluebi->refcnt) + err = -EBUSY; + else + list_del(&gluebi->list); + mutex_unlock(&devices_mutex); + if (err) + return err; + + mtd = &gluebi->mtd; + err = mtd_device_unregister(mtd); + if (err) { + err_msg("cannot remove fake MTD device %d, UBI device %d, volume %d, error %d", + mtd->index, gluebi->ubi_num, gluebi->vol_id, err); + mutex_lock(&devices_mutex); + list_add_tail(&gluebi->list, &gluebi_devices); + mutex_unlock(&devices_mutex); + return err; + } + + kfree(mtd->name); + kfree(gluebi); + return 0; +} + +/** + * gluebi_updated - UBI volume was updated notifier. + * @vi: volume info structure + * + * This function is called every time an UBI volume is updated. It does nothing + * if te volume @vol is dynamic, and changes MTD device size if the + * volume is static. This is needed because static volumes cannot be read past + * data they contain. This function returns zero in case of success and a + * negative error code in case of error. + */ +static int gluebi_updated(struct ubi_volume_info *vi) +{ + struct gluebi_device *gluebi; + + mutex_lock(&devices_mutex); + gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id); + if (!gluebi) { + mutex_unlock(&devices_mutex); + err_msg("got update notification for unknown UBI device %d volume %d", + vi->ubi_num, vi->vol_id); + return -ENOENT; + } + + if (vi->vol_type == UBI_STATIC_VOLUME) + gluebi->mtd.size = vi->used_bytes; + mutex_unlock(&devices_mutex); + return 0; +} + +/** + * gluebi_resized - UBI volume was re-sized notifier. + * @vi: volume info structure + * + * This function is called every time an UBI volume is re-size. It changes the + * corresponding fake MTD device size. This function returns zero in case of + * success and a negative error code in case of error. + */ +static int gluebi_resized(struct ubi_volume_info *vi) +{ + struct gluebi_device *gluebi; + + mutex_lock(&devices_mutex); + gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id); + if (!gluebi) { + mutex_unlock(&devices_mutex); + err_msg("got update notification for unknown UBI device %d volume %d", + vi->ubi_num, vi->vol_id); + return -ENOENT; + } + gluebi->mtd.size = vi->used_bytes; + mutex_unlock(&devices_mutex); + return 0; +} + +/** + * gluebi_notify - UBI notification handler. + * @nb: registered notifier block + * @l: notification type + * @ptr: pointer to the &struct ubi_notification object + */ +static int gluebi_notify(struct notifier_block *nb, unsigned long l, + void *ns_ptr) +{ + struct ubi_notification *nt = ns_ptr; + + switch (l) { + case UBI_VOLUME_ADDED: + gluebi_create(&nt->di, &nt->vi); + break; + case UBI_VOLUME_REMOVED: + gluebi_remove(&nt->vi); + break; + case UBI_VOLUME_RESIZED: + gluebi_resized(&nt->vi); + break; + case UBI_VOLUME_UPDATED: + gluebi_updated(&nt->vi); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block gluebi_notifier = { + .notifier_call = gluebi_notify, +}; + +static int __init ubi_gluebi_init(void) +{ + return ubi_register_volume_notifier(&gluebi_notifier, 0); +} + +static void __exit ubi_gluebi_exit(void) +{ + struct gluebi_device *gluebi, *g; + + list_for_each_entry_safe(gluebi, g, &gluebi_devices, list) { + int err; + struct mtd_info *mtd = &gluebi->mtd; + + err = mtd_device_unregister(mtd); + if (err) + err_msg("error %d while removing gluebi MTD device %d, UBI device %d, volume %d - ignoring", + err, mtd->index, gluebi->ubi_num, + gluebi->vol_id); + kfree(mtd->name); + kfree(gluebi); + } + ubi_unregister_volume_notifier(&gluebi_notifier); +} + +module_init(ubi_gluebi_init); +module_exit(ubi_gluebi_exit); +MODULE_DESCRIPTION("MTD emulation layer over UBI volumes"); +MODULE_AUTHOR("Artem Bityutskiy, Joern Engel"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c new file mode 100644 index 000000000..5bbd1f094 --- /dev/null +++ b/drivers/mtd/ubi/io.c @@ -0,0 +1,1435 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * UBI input/output sub-system. + * + * This sub-system provides a uniform way to work with all kinds of the + * underlying MTD devices. It also implements handy functions for reading and + * writing UBI headers. + * + * We are trying to have a paranoid mindset and not to trust to what we read + * from the flash media in order to be more secure and robust. So this + * sub-system validates every single header it reads from the flash media. + * + * Some words about how the eraseblock headers are stored. + * + * The erase counter header is always stored at offset zero. By default, the + * VID header is stored after the EC header at the closest aligned offset + * (i.e. aligned to the minimum I/O unit size). Data starts next to the VID + * header at the closest aligned offset. But this default layout may be + * changed. For example, for different reasons (e.g., optimization) UBI may be + * asked to put the VID header at further offset, and even at an unaligned + * offset. Of course, if the offset of the VID header is unaligned, UBI adds + * proper padding in front of it. Data offset may also be changed but it has to + * be aligned. + * + * About minimal I/O units. In general, UBI assumes flash device model where + * there is only one minimal I/O unit size. E.g., in case of NOR flash it is 1, + * in case of NAND flash it is a NAND page, etc. This is reported by MTD in the + * @ubi->mtd->writesize field. But as an exception, UBI admits of using another + * (smaller) minimal I/O unit size for EC and VID headers to make it possible + * to do different optimizations. + * + * This is extremely useful in case of NAND flashes which admit of several + * write operations to one NAND page. In this case UBI can fit EC and VID + * headers at one NAND page. Thus, UBI may use "sub-page" size as the minimal + * I/O unit for the headers (the @ubi->hdrs_min_io_size field). But it still + * reports NAND page size (@ubi->min_io_size) as a minimal I/O unit for the UBI + * users. + * + * Example: some Samsung NANDs with 2KiB pages allow 4x 512-byte writes, so + * although the minimal I/O unit is 2K, UBI uses 512 bytes for EC and VID + * headers. + * + * Q: why not just to treat sub-page as a minimal I/O unit of this flash + * device, e.g., make @ubi->min_io_size = 512 in the example above? + * + * A: because when writing a sub-page, MTD still writes a full 2K page but the + * bytes which are not relevant to the sub-page are 0xFF. So, basically, + * writing 4x512 sub-pages is 4 times slower than writing one 2KiB NAND page. + * Thus, we prefer to use sub-pages only for EC and VID headers. + * + * As it was noted above, the VID header may start at a non-aligned offset. + * For example, in case of a 2KiB page NAND flash with a 512 bytes sub-page, + * the VID header may reside at offset 1984 which is the last 64 bytes of the + * last sub-page (EC header is always at offset zero). This causes some + * difficulties when reading and writing VID headers. + * + * Suppose we have a 64-byte buffer and we read a VID header at it. We change + * the data and want to write this VID header out. As we can only write in + * 512-byte chunks, we have to allocate one more buffer and copy our VID header + * to offset 448 of this buffer. + * + * The I/O sub-system does the following trick in order to avoid this extra + * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID + * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. + * When the VID header is being written out, it shifts the VID header pointer + * back and writes the whole sub-page. + */ + +#include <linux/crc32.h> +#include <linux/err.h> +#include <linux/slab.h> +#include "ubi.h" + +static int self_check_not_bad(const struct ubi_device *ubi, int pnum); +static int self_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum); +static int self_check_ec_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_ec_hdr *ec_hdr); +static int self_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum); +static int self_check_vid_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_vid_hdr *vid_hdr); +static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum, + int offset, int len); + +/** + * ubi_io_read - read data from a physical eraseblock. + * @ubi: UBI device description object + * @buf: buffer where to store the read data + * @pnum: physical eraseblock number to read from + * @offset: offset within the physical eraseblock from where to read + * @len: how many bytes to read + * + * This function reads data from offset @offset of physical eraseblock @pnum + * and stores the read data in the @buf buffer. The following return codes are + * possible: + * + * o %0 if all the requested data were successfully read; + * o %UBI_IO_BITFLIPS if all the requested data were successfully read, but + * correctable bit-flips were detected; this is harmless but may indicate + * that this eraseblock may become bad soon (but do not have to); + * o %-EBADMSG if the MTD subsystem reported about data integrity problems, for + * example it can be an ECC error in case of NAND; this most probably means + * that the data is corrupted; + * o %-EIO if some I/O error occurred; + * o other negative error codes in case of other errors. + */ +int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, + int len) +{ + int err, retries = 0; + size_t read; + loff_t addr; + + dbg_io("read %d bytes from PEB %d:%d", len, pnum, offset); + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + ubi_assert(offset >= 0 && offset + len <= ubi->peb_size); + ubi_assert(len > 0); + + err = self_check_not_bad(ubi, pnum); + if (err) + return err; + + /* + * Deliberately corrupt the buffer to improve robustness. Indeed, if we + * do not do this, the following may happen: + * 1. The buffer contains data from previous operation, e.g., read from + * another PEB previously. The data looks like expected, e.g., if we + * just do not read anything and return - the caller would not + * notice this. E.g., if we are reading a VID header, the buffer may + * contain a valid VID header from another PEB. + * 2. The driver is buggy and returns us success or -EBADMSG or + * -EUCLEAN, but it does not actually put any data to the buffer. + * + * This may confuse UBI or upper layers - they may think the buffer + * contains valid data while in fact it is just old data. This is + * especially possible because UBI (and UBIFS) relies on CRC, and + * treats data as correct even in case of ECC errors if the CRC is + * correct. + * + * Try to prevent this situation by changing the first byte of the + * buffer. + */ + *((uint8_t *)buf) ^= 0xFF; + + addr = (loff_t)pnum * ubi->peb_size + offset; +retry: + err = mtd_read(ubi->mtd, addr, len, &read, buf); + if (err) { + const char *errstr = mtd_is_eccerr(err) ? " (ECC error)" : ""; + + if (mtd_is_bitflip(err)) { + /* + * -EUCLEAN is reported if there was a bit-flip which + * was corrected, so this is harmless. + * + * We do not report about it here unless debugging is + * enabled. A corresponding message will be printed + * later, when it is has been scrubbed. + */ + ubi_msg(ubi, "fixable bit-flip detected at PEB %d", + pnum); + ubi_assert(len == read); + return UBI_IO_BITFLIPS; + } + + if (retries++ < UBI_IO_RETRIES) { + ubi_warn(ubi, "error %d%s while reading %d bytes from PEB %d:%d, read only %zd bytes, retry", + err, errstr, len, pnum, offset, read); + yield(); + goto retry; + } + + ubi_err(ubi, "error %d%s while reading %d bytes from PEB %d:%d, read %zd bytes", + err, errstr, len, pnum, offset, read); + dump_stack(); + + /* + * The driver should never return -EBADMSG if it failed to read + * all the requested data. But some buggy drivers might do + * this, so we change it to -EIO. + */ + if (read != len && mtd_is_eccerr(err)) { + ubi_assert(0); + err = -EIO; + } + } else { + ubi_assert(len == read); + + if (ubi_dbg_is_bitflip(ubi)) { + dbg_gen("bit-flip (emulated)"); + err = UBI_IO_BITFLIPS; + } + } + + return err; +} + +/** + * ubi_io_write - write data to a physical eraseblock. + * @ubi: UBI device description object + * @buf: buffer with the data to write + * @pnum: physical eraseblock number to write to + * @offset: offset within the physical eraseblock where to write + * @len: how many bytes to write + * + * This function writes @len bytes of data from buffer @buf to offset @offset + * of physical eraseblock @pnum. If all the data were successfully written, + * zero is returned. If an error occurred, this function returns a negative + * error code. If %-EIO is returned, the physical eraseblock most probably went + * bad. + * + * Note, in case of an error, it is possible that something was still written + * to the flash media, but may be some garbage. + */ +int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, + int len) +{ + int err; + size_t written; + loff_t addr; + + dbg_io("write %d bytes to PEB %d:%d", len, pnum, offset); + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + ubi_assert(offset >= 0 && offset + len <= ubi->peb_size); + ubi_assert(offset % ubi->hdrs_min_io_size == 0); + ubi_assert(len > 0 && len % ubi->hdrs_min_io_size == 0); + + if (ubi->ro_mode) { + ubi_err(ubi, "read-only mode"); + return -EROFS; + } + + err = self_check_not_bad(ubi, pnum); + if (err) + return err; + + /* The area we are writing to has to contain all 0xFF bytes */ + err = ubi_self_check_all_ff(ubi, pnum, offset, len); + if (err) + return err; + + if (offset >= ubi->leb_start) { + /* + * We write to the data area of the physical eraseblock. Make + * sure it has valid EC and VID headers. + */ + err = self_check_peb_ec_hdr(ubi, pnum); + if (err) + return err; + err = self_check_peb_vid_hdr(ubi, pnum); + if (err) + return err; + } + + if (ubi_dbg_is_write_failure(ubi)) { + ubi_err(ubi, "cannot write %d bytes to PEB %d:%d (emulated)", + len, pnum, offset); + dump_stack(); + return -EIO; + } + + addr = (loff_t)pnum * ubi->peb_size + offset; + err = mtd_write(ubi->mtd, addr, len, &written, buf); + if (err) { + ubi_err(ubi, "error %d while writing %d bytes to PEB %d:%d, written %zd bytes", + err, len, pnum, offset, written); + dump_stack(); + ubi_dump_flash(ubi, pnum, offset, len); + } else + ubi_assert(written == len); + + if (!err) { + err = self_check_write(ubi, buf, pnum, offset, len); + if (err) + return err; + + /* + * Since we always write sequentially, the rest of the PEB has + * to contain only 0xFF bytes. + */ + offset += len; + len = ubi->peb_size - offset; + if (len) + err = ubi_self_check_all_ff(ubi, pnum, offset, len); + } + + return err; +} + +/** + * erase_callback - MTD erasure call-back. + * @ei: MTD erase information object. + * + * Note, even though MTD erase interface is asynchronous, all the current + * implementations are synchronous anyway. + */ +static void erase_callback(struct erase_info *ei) +{ + wake_up_interruptible((wait_queue_head_t *)ei->priv); +} + +/** + * do_sync_erase - synchronously erase a physical eraseblock. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to erase + * + * This function synchronously erases physical eraseblock @pnum and returns + * zero in case of success and a negative error code in case of failure. If + * %-EIO is returned, the physical eraseblock most probably went bad. + */ +static int do_sync_erase(struct ubi_device *ubi, int pnum) +{ + int err, retries = 0; + struct erase_info ei; + wait_queue_head_t wq; + + dbg_io("erase PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + if (ubi->ro_mode) { + ubi_err(ubi, "read-only mode"); + return -EROFS; + } + +retry: + init_waitqueue_head(&wq); + memset(&ei, 0, sizeof(struct erase_info)); + + ei.mtd = ubi->mtd; + ei.addr = (loff_t)pnum * ubi->peb_size; + ei.len = ubi->peb_size; + ei.callback = erase_callback; + ei.priv = (unsigned long)&wq; + + err = mtd_erase(ubi->mtd, &ei); + if (err) { + if (retries++ < UBI_IO_RETRIES) { + ubi_warn(ubi, "error %d while erasing PEB %d, retry", + err, pnum); + yield(); + goto retry; + } + ubi_err(ubi, "cannot erase PEB %d, error %d", pnum, err); + dump_stack(); + return err; + } + + err = wait_event_interruptible(wq, ei.state == MTD_ERASE_DONE || + ei.state == MTD_ERASE_FAILED); + if (err) { + ubi_err(ubi, "interrupted PEB %d erasure", pnum); + return -EINTR; + } + + if (ei.state == MTD_ERASE_FAILED) { + if (retries++ < UBI_IO_RETRIES) { + ubi_warn(ubi, "error while erasing PEB %d, retry", + pnum); + yield(); + goto retry; + } + ubi_err(ubi, "cannot erase PEB %d", pnum); + dump_stack(); + return -EIO; + } + + err = ubi_self_check_all_ff(ubi, pnum, 0, ubi->peb_size); + if (err) + return err; + + if (ubi_dbg_is_erase_failure(ubi)) { + ubi_err(ubi, "cannot erase PEB %d (emulated)", pnum); + return -EIO; + } + + return 0; +} + +/* Patterns to write to a physical eraseblock when torturing it */ +static uint8_t patterns[] = {0xa5, 0x5a, 0x0}; + +/** + * torture_peb - test a supposedly bad physical eraseblock. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to test + * + * This function returns %-EIO if the physical eraseblock did not pass the + * test, a positive number of erase operations done if the test was + * successfully passed, and other negative error codes in case of other errors. + */ +static int torture_peb(struct ubi_device *ubi, int pnum) +{ + int err, i, patt_count; + + ubi_msg(ubi, "run torture test for PEB %d", pnum); + patt_count = ARRAY_SIZE(patterns); + ubi_assert(patt_count > 0); + + mutex_lock(&ubi->buf_mutex); + for (i = 0; i < patt_count; i++) { + err = do_sync_erase(ubi, pnum); + if (err) + goto out; + + /* Make sure the PEB contains only 0xFF bytes */ + err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); + if (err) + goto out; + + err = ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->peb_size); + if (err == 0) { + ubi_err(ubi, "erased PEB %d, but a non-0xFF byte found", + pnum); + err = -EIO; + goto out; + } + + /* Write a pattern and check it */ + memset(ubi->peb_buf, patterns[i], ubi->peb_size); + err = ubi_io_write(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); + if (err) + goto out; + + memset(ubi->peb_buf, ~patterns[i], ubi->peb_size); + err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); + if (err) + goto out; + + err = ubi_check_pattern(ubi->peb_buf, patterns[i], + ubi->peb_size); + if (err == 0) { + ubi_err(ubi, "pattern %x checking failed for PEB %d", + patterns[i], pnum); + err = -EIO; + goto out; + } + } + + err = patt_count; + ubi_msg(ubi, "PEB %d passed torture test, do not mark it as bad", pnum); + +out: + mutex_unlock(&ubi->buf_mutex); + if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) { + /* + * If a bit-flip or data integrity error was detected, the test + * has not passed because it happened on a freshly erased + * physical eraseblock which means something is wrong with it. + */ + ubi_err(ubi, "read problems on freshly erased PEB %d, must be bad", + pnum); + err = -EIO; + } + return err; +} + +/** + * nor_erase_prepare - prepare a NOR flash PEB for erasure. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to prepare + * + * NOR flash, or at least some of them, have peculiar embedded PEB erasure + * algorithm: the PEB is first filled with zeroes, then it is erased. And + * filling with zeroes starts from the end of the PEB. This was observed with + * Spansion S29GL512N NOR flash. + * + * This means that in case of a power cut we may end up with intact data at the + * beginning of the PEB, and all zeroes at the end of PEB. In other words, the + * EC and VID headers are OK, but a large chunk of data at the end of PEB is + * zeroed. This makes UBI mistakenly treat this PEB as used and associate it + * with an LEB, which leads to subsequent failures (e.g., UBIFS fails). + * + * This function is called before erasing NOR PEBs and it zeroes out EC and VID + * magic numbers in order to invalidate them and prevent the failures. Returns + * zero in case of success and a negative error code in case of failure. + */ +static int nor_erase_prepare(struct ubi_device *ubi, int pnum) +{ + int err; + size_t written; + loff_t addr; + uint32_t data = 0; + struct ubi_ec_hdr ec_hdr; + + /* + * Note, we cannot generally define VID header buffers on stack, + * because of the way we deal with these buffers (see the header + * comment in this file). But we know this is a NOR-specific piece of + * code, so we can do this. But yes, this is error-prone and we should + * (pre-)allocate VID header buffer instead. + */ + struct ubi_vid_hdr vid_hdr; + + /* + * If VID or EC is valid, we have to corrupt them before erasing. + * It is important to first invalidate the EC header, and then the VID + * header. Otherwise a power cut may lead to valid EC header and + * invalid VID header, in which case UBI will treat this PEB as + * corrupted and will try to preserve it, and print scary warnings. + */ + addr = (loff_t)pnum * ubi->peb_size; + err = ubi_io_read_ec_hdr(ubi, pnum, &ec_hdr, 0); + if (err != UBI_IO_BAD_HDR_EBADMSG && err != UBI_IO_BAD_HDR && + err != UBI_IO_FF){ + err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data); + if(err) + goto error; + } + + err = ubi_io_read_vid_hdr(ubi, pnum, &vid_hdr, 0); + if (err != UBI_IO_BAD_HDR_EBADMSG && err != UBI_IO_BAD_HDR && + err != UBI_IO_FF){ + addr += ubi->vid_hdr_aloffset; + err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data); + if (err) + goto error; + } + return 0; + +error: + /* + * The PEB contains a valid VID or EC header, but we cannot invalidate + * it. Supposedly the flash media or the driver is screwed up, so + * return an error. + */ + ubi_err(ubi, "cannot invalidate PEB %d, write returned %d", pnum, err); + ubi_dump_flash(ubi, pnum, 0, ubi->peb_size); + return -EIO; +} + +/** + * ubi_io_sync_erase - synchronously erase a physical eraseblock. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to erase + * @torture: if this physical eraseblock has to be tortured + * + * This function synchronously erases physical eraseblock @pnum. If @torture + * flag is not zero, the physical eraseblock is checked by means of writing + * different patterns to it and reading them back. If the torturing is enabled, + * the physical eraseblock is erased more than once. + * + * This function returns the number of erasures made in case of success, %-EIO + * if the erasure failed or the torturing test failed, and other negative error + * codes in case of other errors. Note, %-EIO means that the physical + * eraseblock is bad. + */ +int ubi_io_sync_erase(struct ubi_device *ubi, int pnum, int torture) +{ + int err, ret = 0; + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + err = self_check_not_bad(ubi, pnum); + if (err != 0) + return err; + + if (ubi->ro_mode) { + ubi_err(ubi, "read-only mode"); + return -EROFS; + } + + if (ubi->nor_flash) { + err = nor_erase_prepare(ubi, pnum); + if (err) + return err; + } + + if (torture) { + ret = torture_peb(ubi, pnum); + if (ret < 0) + return ret; + } + + err = do_sync_erase(ubi, pnum); + if (err) + return err; + + return ret + 1; +} + +/** + * ubi_io_is_bad - check if a physical eraseblock is bad. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * + * This function returns a positive number if the physical eraseblock is bad, + * zero if not, and a negative error code if an error occurred. + */ +int ubi_io_is_bad(const struct ubi_device *ubi, int pnum) +{ + struct mtd_info *mtd = ubi->mtd; + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + if (ubi->bad_allowed) { + int ret; + + ret = mtd_block_isbad(mtd, (loff_t)pnum * ubi->peb_size); + if (ret < 0) + ubi_err(ubi, "error %d while checking if PEB %d is bad", + ret, pnum); + else if (ret) + dbg_io("PEB %d is bad", pnum); + return ret; + } + + return 0; +} + +/** + * ubi_io_mark_bad - mark a physical eraseblock as bad. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to mark + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum) +{ + int err; + struct mtd_info *mtd = ubi->mtd; + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + if (ubi->ro_mode) { + ubi_err(ubi, "read-only mode"); + return -EROFS; + } + + if (!ubi->bad_allowed) + return 0; + + err = mtd_block_markbad(mtd, (loff_t)pnum * ubi->peb_size); + if (err) + ubi_err(ubi, "cannot mark PEB %d bad, error %d", pnum, err); + return err; +} + +/** + * validate_ec_hdr - validate an erase counter header. + * @ubi: UBI device description object + * @ec_hdr: the erase counter header to check + * + * This function returns zero if the erase counter header is OK, and %1 if + * not. + */ +static int validate_ec_hdr(const struct ubi_device *ubi, + const struct ubi_ec_hdr *ec_hdr) +{ + long long ec; + int vid_hdr_offset, leb_start; + + ec = be64_to_cpu(ec_hdr->ec); + vid_hdr_offset = be32_to_cpu(ec_hdr->vid_hdr_offset); + leb_start = be32_to_cpu(ec_hdr->data_offset); + + if (ec_hdr->version != UBI_VERSION) { + ubi_err(ubi, "node with incompatible UBI version found: this UBI version is %d, image version is %d", + UBI_VERSION, (int)ec_hdr->version); + goto bad; + } + + if (vid_hdr_offset != ubi->vid_hdr_offset) { + ubi_err(ubi, "bad VID header offset %d, expected %d", + vid_hdr_offset, ubi->vid_hdr_offset); + goto bad; + } + + if (leb_start != ubi->leb_start) { + ubi_err(ubi, "bad data offset %d, expected %d", + leb_start, ubi->leb_start); + goto bad; + } + + if (ec < 0 || ec > UBI_MAX_ERASECOUNTER) { + ubi_err(ubi, "bad erase counter %lld", ec); + goto bad; + } + + return 0; + +bad: + ubi_err(ubi, "bad EC header"); + ubi_dump_ec_hdr(ec_hdr); + dump_stack(); + return 1; +} + +/** + * ubi_io_read_ec_hdr - read and check an erase counter header. + * @ubi: UBI device description object + * @pnum: physical eraseblock to read from + * @ec_hdr: a &struct ubi_ec_hdr object where to store the read erase counter + * header + * @verbose: be verbose if the header is corrupted or was not found + * + * This function reads erase counter header from physical eraseblock @pnum and + * stores it in @ec_hdr. This function also checks CRC checksum of the read + * erase counter header. The following codes may be returned: + * + * o %0 if the CRC checksum is correct and the header was successfully read; + * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected + * and corrected by the flash driver; this is harmless but may indicate that + * this eraseblock may become bad soon (but may be not); + * o %UBI_IO_BAD_HDR if the erase counter header is corrupted (a CRC error); + * o %UBI_IO_BAD_HDR_EBADMSG is the same as %UBI_IO_BAD_HDR, but there also was + * a data integrity error (uncorrectable ECC error in case of NAND); + * o %UBI_IO_FF if only 0xFF bytes were read (the PEB is supposedly empty) + * o a negative error code in case of failure. + */ +int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr, int verbose) +{ + int err, read_err; + uint32_t crc, magic, hdr_crc; + + dbg_io("read EC header from PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + read_err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); + if (read_err) { + if (read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) + return read_err; + + /* + * We read all the data, but either a correctable bit-flip + * occurred, or MTD reported a data integrity error + * (uncorrectable ECC error in case of NAND). The former is + * harmless, the later may mean that the read data is + * corrupted. But we have a CRC check-sum and we will detect + * this. If the EC header is still OK, we just report this as + * there was a bit-flip, to force scrubbing. + */ + } + + magic = be32_to_cpu(ec_hdr->magic); + if (magic != UBI_EC_HDR_MAGIC) { + if (mtd_is_eccerr(read_err)) + return UBI_IO_BAD_HDR_EBADMSG; + + /* + * The magic field is wrong. Let's check if we have read all + * 0xFF. If yes, this physical eraseblock is assumed to be + * empty. + */ + if (ubi_check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) { + /* The physical eraseblock is supposedly empty */ + if (verbose) + ubi_warn(ubi, "no EC header found at PEB %d, only 0xFF bytes", + pnum); + dbg_bld("no EC header found at PEB %d, only 0xFF bytes", + pnum); + if (!read_err) + return UBI_IO_FF; + else + return UBI_IO_FF_BITFLIPS; + } + + /* + * This is not a valid erase counter header, and these are not + * 0xFF bytes. Report that the header is corrupted. + */ + if (verbose) { + ubi_warn(ubi, "bad magic number at PEB %d: %08x instead of %08x", + pnum, magic, UBI_EC_HDR_MAGIC); + ubi_dump_ec_hdr(ec_hdr); + } + dbg_bld("bad magic number at PEB %d: %08x instead of %08x", + pnum, magic, UBI_EC_HDR_MAGIC); + return UBI_IO_BAD_HDR; + } + + crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); + hdr_crc = be32_to_cpu(ec_hdr->hdr_crc); + + if (hdr_crc != crc) { + if (verbose) { + ubi_warn(ubi, "bad EC header CRC at PEB %d, calculated %#08x, read %#08x", + pnum, crc, hdr_crc); + ubi_dump_ec_hdr(ec_hdr); + } + dbg_bld("bad EC header CRC at PEB %d, calculated %#08x, read %#08x", + pnum, crc, hdr_crc); + + if (!read_err) + return UBI_IO_BAD_HDR; + else + return UBI_IO_BAD_HDR_EBADMSG; + } + + /* And of course validate what has just been read from the media */ + err = validate_ec_hdr(ubi, ec_hdr); + if (err) { + ubi_err(ubi, "validation failed for PEB %d", pnum); + return -EINVAL; + } + + /* + * If there was %-EBADMSG, but the header CRC is still OK, report about + * a bit-flip to force scrubbing on this PEB. + */ + return read_err ? UBI_IO_BITFLIPS : 0; +} + +/** + * ubi_io_write_ec_hdr - write an erase counter header. + * @ubi: UBI device description object + * @pnum: physical eraseblock to write to + * @ec_hdr: the erase counter header to write + * + * This function writes erase counter header described by @ec_hdr to physical + * eraseblock @pnum. It also fills most fields of @ec_hdr before writing, so + * the caller do not have to fill them. Callers must only fill the @ec_hdr->ec + * field. + * + * This function returns zero in case of success and a negative error code in + * case of failure. If %-EIO is returned, the physical eraseblock most probably + * went bad. + */ +int ubi_io_write_ec_hdr(struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr) +{ + int err; + uint32_t crc; + + dbg_io("write EC header to PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + ec_hdr->magic = cpu_to_be32(UBI_EC_HDR_MAGIC); + ec_hdr->version = UBI_VERSION; + ec_hdr->vid_hdr_offset = cpu_to_be32(ubi->vid_hdr_offset); + ec_hdr->data_offset = cpu_to_be32(ubi->leb_start); + ec_hdr->image_seq = cpu_to_be32(ubi->image_seq); + crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); + ec_hdr->hdr_crc = cpu_to_be32(crc); + + err = self_check_ec_hdr(ubi, pnum, ec_hdr); + if (err) + return err; + + if (ubi_dbg_power_cut(ubi, POWER_CUT_EC_WRITE)) + return -EROFS; + + err = ubi_io_write(ubi, ec_hdr, pnum, 0, ubi->ec_hdr_alsize); + return err; +} + +/** + * validate_vid_hdr - validate a volume identifier header. + * @ubi: UBI device description object + * @vid_hdr: the volume identifier header to check + * + * This function checks that data stored in the volume identifier header + * @vid_hdr. Returns zero if the VID header is OK and %1 if not. + */ +static int validate_vid_hdr(const struct ubi_device *ubi, + const struct ubi_vid_hdr *vid_hdr) +{ + int vol_type = vid_hdr->vol_type; + int copy_flag = vid_hdr->copy_flag; + int vol_id = be32_to_cpu(vid_hdr->vol_id); + int lnum = be32_to_cpu(vid_hdr->lnum); + int compat = vid_hdr->compat; + int data_size = be32_to_cpu(vid_hdr->data_size); + int used_ebs = be32_to_cpu(vid_hdr->used_ebs); + int data_pad = be32_to_cpu(vid_hdr->data_pad); + int data_crc = be32_to_cpu(vid_hdr->data_crc); + int usable_leb_size = ubi->leb_size - data_pad; + + if (copy_flag != 0 && copy_flag != 1) { + ubi_err(ubi, "bad copy_flag"); + goto bad; + } + + if (vol_id < 0 || lnum < 0 || data_size < 0 || used_ebs < 0 || + data_pad < 0) { + ubi_err(ubi, "negative values"); + goto bad; + } + + if (vol_id >= UBI_MAX_VOLUMES && vol_id < UBI_INTERNAL_VOL_START) { + ubi_err(ubi, "bad vol_id"); + goto bad; + } + + if (vol_id < UBI_INTERNAL_VOL_START && compat != 0) { + ubi_err(ubi, "bad compat"); + goto bad; + } + + if (vol_id >= UBI_INTERNAL_VOL_START && compat != UBI_COMPAT_DELETE && + compat != UBI_COMPAT_RO && compat != UBI_COMPAT_PRESERVE && + compat != UBI_COMPAT_REJECT) { + ubi_err(ubi, "bad compat"); + goto bad; + } + + if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { + ubi_err(ubi, "bad vol_type"); + goto bad; + } + + if (data_pad >= ubi->leb_size / 2) { + ubi_err(ubi, "bad data_pad"); + goto bad; + } + + if (vol_type == UBI_VID_STATIC) { + /* + * Although from high-level point of view static volumes may + * contain zero bytes of data, but no VID headers can contain + * zero at these fields, because they empty volumes do not have + * mapped logical eraseblocks. + */ + if (used_ebs == 0) { + ubi_err(ubi, "zero used_ebs"); + goto bad; + } + if (data_size == 0) { + ubi_err(ubi, "zero data_size"); + goto bad; + } + if (lnum < used_ebs - 1) { + if (data_size != usable_leb_size) { + ubi_err(ubi, "bad data_size"); + goto bad; + } + } else if (lnum == used_ebs - 1) { + if (data_size == 0) { + ubi_err(ubi, "bad data_size at last LEB"); + goto bad; + } + } else { + ubi_err(ubi, "too high lnum"); + goto bad; + } + } else { + if (copy_flag == 0) { + if (data_crc != 0) { + ubi_err(ubi, "non-zero data CRC"); + goto bad; + } + if (data_size != 0) { + ubi_err(ubi, "non-zero data_size"); + goto bad; + } + } else { + if (data_size == 0) { + ubi_err(ubi, "zero data_size of copy"); + goto bad; + } + } + if (used_ebs != 0) { + ubi_err(ubi, "bad used_ebs"); + goto bad; + } + } + + return 0; + +bad: + ubi_err(ubi, "bad VID header"); + ubi_dump_vid_hdr(vid_hdr); + dump_stack(); + return 1; +} + +/** + * ubi_io_read_vid_hdr - read and check a volume identifier header. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to read from + * @vid_hdr: &struct ubi_vid_hdr object where to store the read volume + * identifier header + * @verbose: be verbose if the header is corrupted or wasn't found + * + * This function reads the volume identifier header from physical eraseblock + * @pnum and stores it in @vid_hdr. It also checks CRC checksum of the read + * volume identifier header. The error codes are the same as in + * 'ubi_io_read_ec_hdr()'. + * + * Note, the implementation of this function is also very similar to + * 'ubi_io_read_ec_hdr()', so refer commentaries in 'ubi_io_read_ec_hdr()'. + */ +int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr, int verbose) +{ + int err, read_err; + uint32_t crc, magic, hdr_crc; + void *p; + + dbg_io("read VID header from PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + p = (char *)vid_hdr - ubi->vid_hdr_shift; + read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); + if (read_err && read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) + return read_err; + + magic = be32_to_cpu(vid_hdr->magic); + if (magic != UBI_VID_HDR_MAGIC) { + if (mtd_is_eccerr(read_err)) + return UBI_IO_BAD_HDR_EBADMSG; + + if (ubi_check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { + if (verbose) + ubi_warn(ubi, "no VID header found at PEB %d, only 0xFF bytes", + pnum); + dbg_bld("no VID header found at PEB %d, only 0xFF bytes", + pnum); + if (!read_err) + return UBI_IO_FF; + else + return UBI_IO_FF_BITFLIPS; + } + + if (verbose) { + ubi_warn(ubi, "bad magic number at PEB %d: %08x instead of %08x", + pnum, magic, UBI_VID_HDR_MAGIC); + ubi_dump_vid_hdr(vid_hdr); + } + dbg_bld("bad magic number at PEB %d: %08x instead of %08x", + pnum, magic, UBI_VID_HDR_MAGIC); + return UBI_IO_BAD_HDR; + } + + crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); + hdr_crc = be32_to_cpu(vid_hdr->hdr_crc); + + if (hdr_crc != crc) { + if (verbose) { + ubi_warn(ubi, "bad CRC at PEB %d, calculated %#08x, read %#08x", + pnum, crc, hdr_crc); + ubi_dump_vid_hdr(vid_hdr); + } + dbg_bld("bad CRC at PEB %d, calculated %#08x, read %#08x", + pnum, crc, hdr_crc); + if (!read_err) + return UBI_IO_BAD_HDR; + else + return UBI_IO_BAD_HDR_EBADMSG; + } + + err = validate_vid_hdr(ubi, vid_hdr); + if (err) { + ubi_err(ubi, "validation failed for PEB %d", pnum); + return -EINVAL; + } + + return read_err ? UBI_IO_BITFLIPS : 0; +} + +/** + * ubi_io_write_vid_hdr - write a volume identifier header. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to write to + * @vid_hdr: the volume identifier header to write + * + * This function writes the volume identifier header described by @vid_hdr to + * physical eraseblock @pnum. This function automatically fills the + * @vid_hdr->magic and the @vid_hdr->version fields, as well as calculates + * header CRC checksum and stores it at vid_hdr->hdr_crc. + * + * This function returns zero in case of success and a negative error code in + * case of failure. If %-EIO is returned, the physical eraseblock probably went + * bad. + */ +int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr) +{ + int err; + uint32_t crc; + void *p; + + dbg_io("write VID header to PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + err = self_check_peb_ec_hdr(ubi, pnum); + if (err) + return err; + + vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC); + vid_hdr->version = UBI_VERSION; + crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); + vid_hdr->hdr_crc = cpu_to_be32(crc); + + err = self_check_vid_hdr(ubi, pnum, vid_hdr); + if (err) + return err; + + if (ubi_dbg_power_cut(ubi, POWER_CUT_VID_WRITE)) + return -EROFS; + + p = (char *)vid_hdr - ubi->vid_hdr_shift; + err = ubi_io_write(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); + return err; +} + +/** + * self_check_not_bad - ensure that a physical eraseblock is not bad. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to check + * + * This function returns zero if the physical eraseblock is good, %-EINVAL if + * it is bad and a negative error code if an error occurred. + */ +static int self_check_not_bad(const struct ubi_device *ubi, int pnum) +{ + int err; + + if (!ubi_dbg_chk_io(ubi)) + return 0; + + err = ubi_io_is_bad(ubi, pnum); + if (!err) + return err; + + ubi_err(ubi, "self-check failed for PEB %d", pnum); + dump_stack(); + return err > 0 ? -EINVAL : err; +} + +/** + * self_check_ec_hdr - check if an erase counter header is all right. + * @ubi: UBI device description object + * @pnum: physical eraseblock number the erase counter header belongs to + * @ec_hdr: the erase counter header to check + * + * This function returns zero if the erase counter header contains valid + * values, and %-EINVAL if not. + */ +static int self_check_ec_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_ec_hdr *ec_hdr) +{ + int err; + uint32_t magic; + + if (!ubi_dbg_chk_io(ubi)) + return 0; + + magic = be32_to_cpu(ec_hdr->magic); + if (magic != UBI_EC_HDR_MAGIC) { + ubi_err(ubi, "bad magic %#08x, must be %#08x", + magic, UBI_EC_HDR_MAGIC); + goto fail; + } + + err = validate_ec_hdr(ubi, ec_hdr); + if (err) { + ubi_err(ubi, "self-check failed for PEB %d", pnum); + goto fail; + } + + return 0; + +fail: + ubi_dump_ec_hdr(ec_hdr); + dump_stack(); + return -EINVAL; +} + +/** + * self_check_peb_ec_hdr - check erase counter header. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * + * This function returns zero if the erase counter header is all right and and + * a negative error code if not or if an error occurred. + */ +static int self_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) +{ + int err; + uint32_t crc, hdr_crc; + struct ubi_ec_hdr *ec_hdr; + + if (!ubi_dbg_chk_io(ubi)) + return 0; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); + if (!ec_hdr) + return -ENOMEM; + + err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); + if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) + goto exit; + + crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); + hdr_crc = be32_to_cpu(ec_hdr->hdr_crc); + if (hdr_crc != crc) { + ubi_err(ubi, "bad CRC, calculated %#08x, read %#08x", + crc, hdr_crc); + ubi_err(ubi, "self-check failed for PEB %d", pnum); + ubi_dump_ec_hdr(ec_hdr); + dump_stack(); + err = -EINVAL; + goto exit; + } + + err = self_check_ec_hdr(ubi, pnum, ec_hdr); + +exit: + kfree(ec_hdr); + return err; +} + +/** + * self_check_vid_hdr - check that a volume identifier header is all right. + * @ubi: UBI device description object + * @pnum: physical eraseblock number the volume identifier header belongs to + * @vid_hdr: the volume identifier header to check + * + * This function returns zero if the volume identifier header is all right, and + * %-EINVAL if not. + */ +static int self_check_vid_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_vid_hdr *vid_hdr) +{ + int err; + uint32_t magic; + + if (!ubi_dbg_chk_io(ubi)) + return 0; + + magic = be32_to_cpu(vid_hdr->magic); + if (magic != UBI_VID_HDR_MAGIC) { + ubi_err(ubi, "bad VID header magic %#08x at PEB %d, must be %#08x", + magic, pnum, UBI_VID_HDR_MAGIC); + goto fail; + } + + err = validate_vid_hdr(ubi, vid_hdr); + if (err) { + ubi_err(ubi, "self-check failed for PEB %d", pnum); + goto fail; + } + + return err; + +fail: + ubi_err(ubi, "self-check failed for PEB %d", pnum); + ubi_dump_vid_hdr(vid_hdr); + dump_stack(); + return -EINVAL; + +} + +/** + * self_check_peb_vid_hdr - check volume identifier header. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * + * This function returns zero if the volume identifier header is all right, + * and a negative error code if not or if an error occurred. + */ +static int self_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) +{ + int err; + uint32_t crc, hdr_crc; + struct ubi_vid_hdr *vid_hdr; + void *p; + + if (!ubi_dbg_chk_io(ubi)) + return 0; + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) + return -ENOMEM; + + p = (char *)vid_hdr - ubi->vid_hdr_shift; + err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); + if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) + goto exit; + + crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_EC_HDR_SIZE_CRC); + hdr_crc = be32_to_cpu(vid_hdr->hdr_crc); + if (hdr_crc != crc) { + ubi_err(ubi, "bad VID header CRC at PEB %d, calculated %#08x, read %#08x", + pnum, crc, hdr_crc); + ubi_err(ubi, "self-check failed for PEB %d", pnum); + ubi_dump_vid_hdr(vid_hdr); + dump_stack(); + err = -EINVAL; + goto exit; + } + + err = self_check_vid_hdr(ubi, pnum, vid_hdr); + +exit: + ubi_free_vid_hdr(ubi, vid_hdr); + return err; +} + +/** + * self_check_write - make sure write succeeded. + * @ubi: UBI device description object + * @buf: buffer with data which were written + * @pnum: physical eraseblock number the data were written to + * @offset: offset within the physical eraseblock the data were written to + * @len: how many bytes were written + * + * This functions reads data which were recently written and compares it with + * the original data buffer - the data have to match. Returns zero if the data + * match and a negative error code if not or in case of failure. + */ +static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum, + int offset, int len) +{ + int err, i; + size_t read; + void *buf1; + loff_t addr = (loff_t)pnum * ubi->peb_size + offset; + + if (!ubi_dbg_chk_io(ubi)) + return 0; + + buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); + if (!buf1) { + ubi_err(ubi, "cannot allocate memory to check writes"); + return 0; + } + + err = mtd_read(ubi->mtd, addr, len, &read, buf1); + if (err && !mtd_is_bitflip(err)) + goto out_free; + + for (i = 0; i < len; i++) { + uint8_t c = ((uint8_t *)buf)[i]; + uint8_t c1 = ((uint8_t *)buf1)[i]; + int dump_len; + + if (c == c1) + continue; + + ubi_err(ubi, "self-check failed for PEB %d:%d, len %d", + pnum, offset, len); + ubi_msg(ubi, "data differ at position %d", i); + dump_len = max_t(int, 128, len - i); + ubi_msg(ubi, "hex dump of the original buffer from %d to %d", + i, i + dump_len); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + buf + i, dump_len, 1); + ubi_msg(ubi, "hex dump of the read buffer from %d to %d", + i, i + dump_len); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + buf1 + i, dump_len, 1); + dump_stack(); + err = -EINVAL; + goto out_free; + } + + vfree(buf1); + return 0; + +out_free: + vfree(buf1); + return err; +} + +/** + * ubi_self_check_all_ff - check that a region of flash is empty. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * @offset: the starting offset within the physical eraseblock to check + * @len: the length of the region to check + * + * This function returns zero if only 0xFF bytes are present at offset + * @offset of the physical eraseblock @pnum, and a negative error code if not + * or if an error occurred. + */ +int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) +{ + size_t read; + int err; + void *buf; + loff_t addr = (loff_t)pnum * ubi->peb_size + offset; + + if (!ubi_dbg_chk_io(ubi)) + return 0; + + buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); + if (!buf) { + ubi_err(ubi, "cannot allocate memory to check for 0xFFs"); + return 0; + } + + err = mtd_read(ubi->mtd, addr, len, &read, buf); + if (err && !mtd_is_bitflip(err)) { + ubi_err(ubi, "err %d while reading %d bytes from PEB %d:%d, read %zd bytes", + err, len, pnum, offset, read); + goto error; + } + + err = ubi_check_pattern(buf, 0xFF, len); + if (err == 0) { + ubi_err(ubi, "flash region at PEB %d:%d, length %d does not contain all 0xFF bytes", + pnum, offset, len); + goto fail; + } + + vfree(buf); + return 0; + +fail: + ubi_err(ubi, "self-check failed for PEB %d", pnum); + ubi_msg(ubi, "hex dump of the %d-%d region", offset, offset + len); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1); + err = -EINVAL; +error: + dump_stack(); + vfree(buf); + return err; +} diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c new file mode 100644 index 000000000..e84488773 --- /dev/null +++ b/drivers/mtd/ubi/kapi.c @@ -0,0 +1,864 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* This file mostly implements UBI kernel API functions */ + +#include <linux/module.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/namei.h> +#include <linux/fs.h> +#include <asm/div64.h> +#include "ubi.h" + +/** + * ubi_do_get_device_info - get information about UBI device. + * @ubi: UBI device description object + * @di: the information is stored here + * + * This function is the same as 'ubi_get_device_info()', but it assumes the UBI + * device is locked and cannot disappear. + */ +void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di) +{ + di->ubi_num = ubi->ubi_num; + di->leb_size = ubi->leb_size; + di->leb_start = ubi->leb_start; + di->min_io_size = ubi->min_io_size; + di->max_write_size = ubi->max_write_size; + di->ro_mode = ubi->ro_mode; + di->cdev = ubi->cdev.dev; +} +EXPORT_SYMBOL_GPL(ubi_do_get_device_info); + +/** + * ubi_get_device_info - get information about UBI device. + * @ubi_num: UBI device number + * @di: the information is stored here + * + * This function returns %0 in case of success, %-EINVAL if the UBI device + * number is invalid, and %-ENODEV if there is no such UBI device. + */ +int ubi_get_device_info(int ubi_num, struct ubi_device_info *di) +{ + struct ubi_device *ubi; + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) + return -EINVAL; + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + ubi_do_get_device_info(ubi, di); + ubi_put_device(ubi); + return 0; +} +EXPORT_SYMBOL_GPL(ubi_get_device_info); + +/** + * ubi_do_get_volume_info - get information about UBI volume. + * @ubi: UBI device description object + * @vol: volume description object + * @vi: the information is stored here + */ +void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol, + struct ubi_volume_info *vi) +{ + vi->vol_id = vol->vol_id; + vi->ubi_num = ubi->ubi_num; + vi->size = vol->reserved_pebs; + vi->used_bytes = vol->used_bytes; + vi->vol_type = vol->vol_type; + vi->corrupted = vol->corrupted; + vi->upd_marker = vol->upd_marker; + vi->alignment = vol->alignment; + vi->usable_leb_size = vol->usable_leb_size; + vi->name_len = vol->name_len; + vi->name = vol->name; + vi->cdev = vol->cdev.dev; +} + +/** + * ubi_get_volume_info - get information about UBI volume. + * @desc: volume descriptor + * @vi: the information is stored here + */ +void ubi_get_volume_info(struct ubi_volume_desc *desc, + struct ubi_volume_info *vi) +{ + ubi_do_get_volume_info(desc->vol->ubi, desc->vol, vi); +} +EXPORT_SYMBOL_GPL(ubi_get_volume_info); + +/** + * ubi_open_volume - open UBI volume. + * @ubi_num: UBI device number + * @vol_id: volume ID + * @mode: open mode + * + * The @mode parameter specifies if the volume should be opened in read-only + * mode, read-write mode, or exclusive mode. The exclusive mode guarantees that + * nobody else will be able to open this volume. UBI allows to have many volume + * readers and one writer at a time. + * + * If a static volume is being opened for the first time since boot, it will be + * checked by this function, which means it will be fully read and the CRC + * checksum of each logical eraseblock will be checked. + * + * This function returns volume descriptor in case of success and a negative + * error code in case of failure. + */ +struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode) +{ + int err; + struct ubi_volume_desc *desc; + struct ubi_device *ubi; + struct ubi_volume *vol; + + dbg_gen("open device %d, volume %d, mode %d", ubi_num, vol_id, mode); + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) + return ERR_PTR(-EINVAL); + + if (mode != UBI_READONLY && mode != UBI_READWRITE && + mode != UBI_EXCLUSIVE && mode != UBI_METAONLY) + return ERR_PTR(-EINVAL); + + /* + * First of all, we have to get the UBI device to prevent its removal. + */ + ubi = ubi_get_device(ubi_num); + if (!ubi) + return ERR_PTR(-ENODEV); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots) { + err = -EINVAL; + goto out_put_ubi; + } + + desc = kmalloc(sizeof(struct ubi_volume_desc), GFP_KERNEL); + if (!desc) { + err = -ENOMEM; + goto out_put_ubi; + } + + err = -ENODEV; + if (!try_module_get(THIS_MODULE)) + goto out_free; + + spin_lock(&ubi->volumes_lock); + vol = ubi->volumes[vol_id]; + if (!vol) + goto out_unlock; + + err = -EBUSY; + switch (mode) { + case UBI_READONLY: + if (vol->exclusive) + goto out_unlock; + vol->readers += 1; + break; + + case UBI_READWRITE: + if (vol->exclusive || vol->writers > 0) + goto out_unlock; + vol->writers += 1; + break; + + case UBI_EXCLUSIVE: + if (vol->exclusive || vol->writers || vol->readers || + vol->metaonly) + goto out_unlock; + vol->exclusive = 1; + break; + + case UBI_METAONLY: + if (vol->metaonly || vol->exclusive) + goto out_unlock; + vol->metaonly = 1; + break; + } + get_device(&vol->dev); + vol->ref_count += 1; + spin_unlock(&ubi->volumes_lock); + + desc->vol = vol; + desc->mode = mode; + + mutex_lock(&ubi->ckvol_mutex); + if (!vol->checked) { + /* This is the first open - check the volume */ + err = ubi_check_volume(ubi, vol_id); + if (err < 0) { + mutex_unlock(&ubi->ckvol_mutex); + ubi_close_volume(desc); + return ERR_PTR(err); + } + if (err == 1) { + ubi_warn(ubi, "volume %d on UBI device %d is corrupted", + vol_id, ubi->ubi_num); + vol->corrupted = 1; + } + vol->checked = 1; + } + mutex_unlock(&ubi->ckvol_mutex); + + return desc; + +out_unlock: + spin_unlock(&ubi->volumes_lock); + module_put(THIS_MODULE); +out_free: + kfree(desc); +out_put_ubi: + ubi_put_device(ubi); + ubi_err(ubi, "cannot open device %d, volume %d, error %d", + ubi_num, vol_id, err); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(ubi_open_volume); + +/** + * ubi_open_volume_nm - open UBI volume by name. + * @ubi_num: UBI device number + * @name: volume name + * @mode: open mode + * + * This function is similar to 'ubi_open_volume()', but opens a volume by name. + */ +struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name, + int mode) +{ + int i, vol_id = -1, len; + struct ubi_device *ubi; + struct ubi_volume_desc *ret; + + dbg_gen("open device %d, volume %s, mode %d", ubi_num, name, mode); + + if (!name) + return ERR_PTR(-EINVAL); + + len = strnlen(name, UBI_VOL_NAME_MAX + 1); + if (len > UBI_VOL_NAME_MAX) + return ERR_PTR(-EINVAL); + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) + return ERR_PTR(-EINVAL); + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return ERR_PTR(-ENODEV); + + spin_lock(&ubi->volumes_lock); + /* Walk all volumes of this UBI device */ + for (i = 0; i < ubi->vtbl_slots; i++) { + struct ubi_volume *vol = ubi->volumes[i]; + + if (vol && len == vol->name_len && !strcmp(name, vol->name)) { + vol_id = i; + break; + } + } + spin_unlock(&ubi->volumes_lock); + + if (vol_id >= 0) + ret = ubi_open_volume(ubi_num, vol_id, mode); + else + ret = ERR_PTR(-ENODEV); + + /* + * We should put the UBI device even in case of success, because + * 'ubi_open_volume()' took a reference as well. + */ + ubi_put_device(ubi); + return ret; +} +EXPORT_SYMBOL_GPL(ubi_open_volume_nm); + +/** + * ubi_open_volume_path - open UBI volume by its character device node path. + * @pathname: volume character device node path + * @mode: open mode + * + * This function is similar to 'ubi_open_volume()', but opens a volume the path + * to its character device node. + */ +struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode) +{ + int error, ubi_num, vol_id, mod; + struct inode *inode; + struct path path; + + dbg_gen("open volume %s, mode %d", pathname, mode); + + if (!pathname || !*pathname) + return ERR_PTR(-EINVAL); + + error = kern_path(pathname, LOOKUP_FOLLOW, &path); + if (error) + return ERR_PTR(error); + + inode = d_backing_inode(path.dentry); + mod = inode->i_mode; + ubi_num = ubi_major2num(imajor(inode)); + vol_id = iminor(inode) - 1; + path_put(&path); + + if (!S_ISCHR(mod)) + return ERR_PTR(-EINVAL); + if (vol_id >= 0 && ubi_num >= 0) + return ubi_open_volume(ubi_num, vol_id, mode); + return ERR_PTR(-ENODEV); +} +EXPORT_SYMBOL_GPL(ubi_open_volume_path); + +/** + * ubi_close_volume - close UBI volume. + * @desc: volume descriptor + */ +void ubi_close_volume(struct ubi_volume_desc *desc) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + + dbg_gen("close device %d, volume %d, mode %d", + ubi->ubi_num, vol->vol_id, desc->mode); + + spin_lock(&ubi->volumes_lock); + switch (desc->mode) { + case UBI_READONLY: + vol->readers -= 1; + break; + case UBI_READWRITE: + vol->writers -= 1; + break; + case UBI_EXCLUSIVE: + vol->exclusive = 0; + break; + case UBI_METAONLY: + vol->metaonly = 0; + break; + } + vol->ref_count -= 1; + spin_unlock(&ubi->volumes_lock); + + kfree(desc); + put_device(&vol->dev); + ubi_put_device(ubi); + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(ubi_close_volume); + +/** + * leb_read_sanity_check - does sanity checks on read requests. + * @desc: volume descriptor + * @lnum: logical eraseblock number to read from + * @offset: offset within the logical eraseblock to read from + * @len: how many bytes to read + * + * This function is used by ubi_leb_read() and ubi_leb_read_sg() + * to perform sanity checks. + */ +static int leb_read_sanity_check(struct ubi_volume_desc *desc, int lnum, + int offset, int len) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int vol_id = vol->vol_id; + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 || + lnum >= vol->used_ebs || offset < 0 || len < 0 || + offset + len > vol->usable_leb_size) + return -EINVAL; + + if (vol->vol_type == UBI_STATIC_VOLUME) { + if (vol->used_ebs == 0) + /* Empty static UBI volume */ + return 0; + if (lnum == vol->used_ebs - 1 && + offset + len > vol->last_eb_bytes) + return -EINVAL; + } + + if (vol->upd_marker) + return -EBADF; + + return 0; +} + +/** + * ubi_leb_read - read data. + * @desc: volume descriptor + * @lnum: logical eraseblock number to read from + * @buf: buffer where to store the read data + * @offset: offset within the logical eraseblock to read from + * @len: how many bytes to read + * @check: whether UBI has to check the read data's CRC or not. + * + * This function reads data from offset @offset of logical eraseblock @lnum and + * stores the data at @buf. When reading from static volumes, @check specifies + * whether the data has to be checked or not. If yes, the whole logical + * eraseblock will be read and its CRC checksum will be checked (i.e., the CRC + * checksum is per-eraseblock). So checking may substantially slow down the + * read speed. The @check argument is ignored for dynamic volumes. + * + * In case of success, this function returns zero. In case of failure, this + * function returns a negative error code. + * + * %-EBADMSG error code is returned: + * o for both static and dynamic volumes if MTD driver has detected a data + * integrity problem (unrecoverable ECC checksum mismatch in case of NAND); + * o for static volumes in case of data CRC mismatch. + * + * If the volume is damaged because of an interrupted update this function just + * returns immediately with %-EBADF error code. + */ +int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int err, vol_id = vol->vol_id; + + dbg_gen("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); + + err = leb_read_sanity_check(desc, lnum, offset, len); + if (err < 0) + return err; + + if (len == 0) + return 0; + + err = ubi_eba_read_leb(ubi, vol, lnum, buf, offset, len, check); + if (err && mtd_is_eccerr(err) && vol->vol_type == UBI_STATIC_VOLUME) { + ubi_warn(ubi, "mark volume %d as corrupted", vol_id); + vol->corrupted = 1; + } + + return err; +} +EXPORT_SYMBOL_GPL(ubi_leb_read); + + +/** + * ubi_leb_read_sg - read data into a scatter gather list. + * @desc: volume descriptor + * @lnum: logical eraseblock number to read from + * @buf: buffer where to store the read data + * @offset: offset within the logical eraseblock to read from + * @len: how many bytes to read + * @check: whether UBI has to check the read data's CRC or not. + * + * This function works exactly like ubi_leb_read_sg(). But instead of + * storing the read data into a buffer it writes to an UBI scatter gather + * list. + */ +int ubi_leb_read_sg(struct ubi_volume_desc *desc, int lnum, struct ubi_sgl *sgl, + int offset, int len, int check) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int err, vol_id = vol->vol_id; + + dbg_gen("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); + + err = leb_read_sanity_check(desc, lnum, offset, len); + if (err < 0) + return err; + + if (len == 0) + return 0; + + err = ubi_eba_read_leb_sg(ubi, vol, sgl, lnum, offset, len, check); + if (err && mtd_is_eccerr(err) && vol->vol_type == UBI_STATIC_VOLUME) { + ubi_warn(ubi, "mark volume %d as corrupted", vol_id); + vol->corrupted = 1; + } + + return err; +} +EXPORT_SYMBOL_GPL(ubi_leb_read_sg); + +/** + * ubi_leb_write - write data. + * @desc: volume descriptor + * @lnum: logical eraseblock number to write to + * @buf: data to write + * @offset: offset within the logical eraseblock where to write + * @len: how many bytes to write + * + * This function writes @len bytes of data from @buf to offset @offset of + * logical eraseblock @lnum. + * + * This function takes care of physical eraseblock write failures. If write to + * the physical eraseblock write operation fails, the logical eraseblock is + * re-mapped to another physical eraseblock, the data is recovered, and the + * write finishes. UBI has a pool of reserved physical eraseblocks for this. + * + * If all the data were successfully written, zero is returned. If an error + * occurred and UBI has not been able to recover from it, this function returns + * a negative error code. Note, in case of an error, it is possible that + * something was still written to the flash media, but that may be some + * garbage. + * + * If the volume is damaged because of an interrupted update this function just + * returns immediately with %-EBADF code. + */ +int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, + int offset, int len) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int vol_id = vol->vol_id; + + dbg_gen("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots) + return -EINVAL; + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 || + offset + len > vol->usable_leb_size || + offset & (ubi->min_io_size - 1) || len & (ubi->min_io_size - 1)) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + if (len == 0) + return 0; + + return ubi_eba_write_leb(ubi, vol, lnum, buf, offset, len); +} +EXPORT_SYMBOL_GPL(ubi_leb_write); + +/* + * ubi_leb_change - change logical eraseblock atomically. + * @desc: volume descriptor + * @lnum: logical eraseblock number to change + * @buf: data to write + * @len: how many bytes to write + * + * This function changes the contents of a logical eraseblock atomically. @buf + * has to contain new logical eraseblock data, and @len - the length of the + * data, which has to be aligned. The length may be shorter than the logical + * eraseblock size, ant the logical eraseblock may be appended to more times + * later on. This function guarantees that in case of an unclean reboot the old + * contents is preserved. Returns zero in case of success and a negative error + * code in case of failure. + */ +int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, + int len) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int vol_id = vol->vol_id; + + dbg_gen("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots) + return -EINVAL; + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 || + len > vol->usable_leb_size || len & (ubi->min_io_size - 1)) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + if (len == 0) + return 0; + + return ubi_eba_atomic_leb_change(ubi, vol, lnum, buf, len); +} +EXPORT_SYMBOL_GPL(ubi_leb_change); + +/** + * ubi_leb_erase - erase logical eraseblock. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * + * This function un-maps logical eraseblock @lnum and synchronously erases the + * correspondent physical eraseblock. Returns zero in case of success and a + * negative error code in case of failure. + * + * If the volume is damaged because of an interrupted update this function just + * returns immediately with %-EBADF code. + */ +int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int err; + + dbg_gen("erase LEB %d:%d", vol->vol_id, lnum); + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + err = ubi_eba_unmap_leb(ubi, vol, lnum); + if (err) + return err; + + return ubi_wl_flush(ubi, vol->vol_id, lnum); +} +EXPORT_SYMBOL_GPL(ubi_leb_erase); + +/** + * ubi_leb_unmap - un-map logical eraseblock. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * + * This function un-maps logical eraseblock @lnum and schedules the + * corresponding physical eraseblock for erasure, so that it will eventually be + * physically erased in background. This operation is much faster than the + * erase operation. + * + * Unlike erase, the un-map operation does not guarantee that the logical + * eraseblock will contain all 0xFF bytes when UBI is initialized again. For + * example, if several logical eraseblocks are un-mapped, and an unclean reboot + * happens after this, the logical eraseblocks will not necessarily be + * un-mapped again when this MTD device is attached. They may actually be + * mapped to the same physical eraseblocks again. So, this function has to be + * used with care. + * + * In other words, when un-mapping a logical eraseblock, UBI does not store + * any information about this on the flash media, it just marks the logical + * eraseblock as "un-mapped" in RAM. If UBI is detached before the physical + * eraseblock is physically erased, it will be mapped again to the same logical + * eraseblock when the MTD device is attached again. + * + * The main and obvious use-case of this function is when the contents of a + * logical eraseblock has to be re-written. Then it is much more efficient to + * first un-map it, then write new data, rather than first erase it, then write + * new data. Note, once new data has been written to the logical eraseblock, + * UBI guarantees that the old contents has gone forever. In other words, if an + * unclean reboot happens after the logical eraseblock has been un-mapped and + * then written to, it will contain the last written data. + * + * This function returns zero in case of success and a negative error code in + * case of failure. If the volume is damaged because of an interrupted update + * this function just returns immediately with %-EBADF code. + */ +int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + + dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum); + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + return ubi_eba_unmap_leb(ubi, vol, lnum); +} +EXPORT_SYMBOL_GPL(ubi_leb_unmap); + +/** + * ubi_leb_map - map logical eraseblock to a physical eraseblock. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * + * This function maps an un-mapped logical eraseblock @lnum to a physical + * eraseblock. This means, that after a successful invocation of this + * function the logical eraseblock @lnum will be empty (contain only %0xFF + * bytes) and be mapped to a physical eraseblock, even if an unclean reboot + * happens. + * + * This function returns zero in case of success, %-EBADF if the volume is + * damaged because of an interrupted update, %-EBADMSG if the logical + * eraseblock is already mapped, and other negative error codes in case of + * other failures. + */ +int ubi_leb_map(struct ubi_volume_desc *desc, int lnum) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + + dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum); + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + if (vol->eba_tbl[lnum] >= 0) + return -EBADMSG; + + return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0); +} +EXPORT_SYMBOL_GPL(ubi_leb_map); + +/** + * ubi_is_mapped - check if logical eraseblock is mapped. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * + * This function checks if logical eraseblock @lnum is mapped to a physical + * eraseblock. If a logical eraseblock is un-mapped, this does not necessarily + * mean it will still be un-mapped after the UBI device is re-attached. The + * logical eraseblock may become mapped to the physical eraseblock it was last + * mapped to. + * + * This function returns %1 if the LEB is mapped, %0 if not, and a negative + * error code in case of failure. If the volume is damaged because of an + * interrupted update this function just returns immediately with %-EBADF error + * code. + */ +int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum) +{ + struct ubi_volume *vol = desc->vol; + + dbg_gen("test LEB %d:%d", vol->vol_id, lnum); + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + return vol->eba_tbl[lnum] >= 0; +} +EXPORT_SYMBOL_GPL(ubi_is_mapped); + +/** + * ubi_sync - synchronize UBI device buffers. + * @ubi_num: UBI device to synchronize + * + * The underlying MTD device may cache data in hardware or in software. This + * function ensures the caches are flushed. Returns zero in case of success and + * a negative error code in case of failure. + */ +int ubi_sync(int ubi_num) +{ + struct ubi_device *ubi; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + + mtd_sync(ubi->mtd); + ubi_put_device(ubi); + return 0; +} +EXPORT_SYMBOL_GPL(ubi_sync); + +/** + * ubi_flush - flush UBI work queue. + * @ubi_num: UBI device to flush work queue + * @vol_id: volume id to flush for + * @lnum: logical eraseblock number to flush for + * + * This function executes all pending works for a particular volume id / logical + * eraseblock number pair. If either value is set to %UBI_ALL, then it acts as + * a wildcard for all of the corresponding volume numbers or logical + * eraseblock numbers. It returns zero in case of success and a negative error + * code in case of failure. + */ +int ubi_flush(int ubi_num, int vol_id, int lnum) +{ + struct ubi_device *ubi; + int err = 0; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + + err = ubi_wl_flush(ubi, vol_id, lnum); + ubi_put_device(ubi); + return err; +} +EXPORT_SYMBOL_GPL(ubi_flush); + +BLOCKING_NOTIFIER_HEAD(ubi_notifiers); + +/** + * ubi_register_volume_notifier - register a volume notifier. + * @nb: the notifier description object + * @ignore_existing: if non-zero, do not send "added" notification for all + * already existing volumes + * + * This function registers a volume notifier, which means that + * 'nb->notifier_call()' will be invoked when an UBI volume is created, + * removed, re-sized, re-named, or updated. The first argument of the function + * is the notification type. The second argument is pointer to a + * &struct ubi_notification object which describes the notification event. + * Using UBI API from the volume notifier is prohibited. + * + * This function returns zero in case of success and a negative error code + * in case of failure. + */ +int ubi_register_volume_notifier(struct notifier_block *nb, + int ignore_existing) +{ + int err; + + err = blocking_notifier_chain_register(&ubi_notifiers, nb); + if (err != 0) + return err; + if (ignore_existing) + return 0; + + /* + * We are going to walk all UBI devices and all volumes, and + * notify the user about existing volumes by the %UBI_VOLUME_ADDED + * event. We have to lock the @ubi_devices_mutex to make sure UBI + * devices do not disappear. + */ + mutex_lock(&ubi_devices_mutex); + ubi_enumerate_volumes(nb); + mutex_unlock(&ubi_devices_mutex); + + return err; +} +EXPORT_SYMBOL_GPL(ubi_register_volume_notifier); + +/** + * ubi_unregister_volume_notifier - unregister the volume notifier. + * @nb: the notifier description object + * + * This function unregisters volume notifier @nm and returns zero in case of + * success and a negative error code in case of failure. + */ +int ubi_unregister_volume_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&ubi_notifiers, nb); +} +EXPORT_SYMBOL_GPL(ubi_unregister_volume_notifier); diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c new file mode 100644 index 000000000..2a45ac210 --- /dev/null +++ b/drivers/mtd/ubi/misc.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* Here we keep miscellaneous functions which are used all over the UBI code */ + +#include "ubi.h" + +/** + * calc_data_len - calculate how much real data is stored in a buffer. + * @ubi: UBI device description object + * @buf: a buffer with the contents of the physical eraseblock + * @length: the buffer length + * + * This function calculates how much "real data" is stored in @buf and returnes + * the length. Continuous 0xFF bytes at the end of the buffer are not + * considered as "real data". + */ +int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, + int length) +{ + int i; + + ubi_assert(!(length & (ubi->min_io_size - 1))); + + for (i = length - 1; i >= 0; i--) + if (((const uint8_t *)buf)[i] != 0xFF) + break; + + /* The resulting length must be aligned to the minimum flash I/O size */ + length = ALIGN(i + 1, ubi->min_io_size); + return length; +} + +/** + * ubi_check_volume - check the contents of a static volume. + * @ubi: UBI device description object + * @vol_id: ID of the volume to check + * + * This function checks if static volume @vol_id is corrupted by fully reading + * it and checking data CRC. This function returns %0 if the volume is not + * corrupted, %1 if it is corrupted and a negative error code in case of + * failure. Dynamic volumes are not checked and zero is returned immediately. + */ +int ubi_check_volume(struct ubi_device *ubi, int vol_id) +{ + void *buf; + int err = 0, i; + struct ubi_volume *vol = ubi->volumes[vol_id]; + + if (vol->vol_type != UBI_STATIC_VOLUME) + return 0; + + buf = vmalloc(vol->usable_leb_size); + if (!buf) + return -ENOMEM; + + for (i = 0; i < vol->used_ebs; i++) { + int size; + + cond_resched(); + + if (i == vol->used_ebs - 1) + size = vol->last_eb_bytes; + else + size = vol->usable_leb_size; + + err = ubi_eba_read_leb(ubi, vol, i, buf, 0, size, 1); + if (err) { + if (mtd_is_eccerr(err)) + err = 1; + break; + } + } + + vfree(buf); + return err; +} + +/** + * ubi_update_reserved - update bad eraseblock handling accounting data. + * @ubi: UBI device description object + * + * This function calculates the gap between current number of PEBs reserved for + * bad eraseblock handling and the required level of PEBs that must be + * reserved, and if necessary, reserves more PEBs to fill that gap, according + * to availability. Should be called with ubi->volumes_lock held. + */ +void ubi_update_reserved(struct ubi_device *ubi) +{ + int need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs; + + if (need <= 0 || ubi->avail_pebs == 0) + return; + + need = min_t(int, need, ubi->avail_pebs); + ubi->avail_pebs -= need; + ubi->rsvd_pebs += need; + ubi->beb_rsvd_pebs += need; + ubi_msg(ubi, "reserved more %d PEBs for bad PEB handling", need); +} + +/** + * ubi_calculate_reserved - calculate how many PEBs must be reserved for bad + * eraseblock handling. + * @ubi: UBI device description object + */ +void ubi_calculate_reserved(struct ubi_device *ubi) +{ + /* + * Calculate the actual number of PEBs currently needed to be reserved + * for future bad eraseblock handling. + */ + ubi->beb_rsvd_level = ubi->bad_peb_limit - ubi->bad_peb_count; + if (ubi->beb_rsvd_level < 0) { + ubi->beb_rsvd_level = 0; + ubi_warn(ubi, "number of bad PEBs (%d) is above the expected limit (%d), not reserving any PEBs for bad PEB handling, will use available PEBs (if any)", + ubi->bad_peb_count, ubi->bad_peb_limit); + } +} + +/** + * ubi_check_pattern - check if buffer contains only a certain byte pattern. + * @buf: buffer to check + * @patt: the pattern to check + * @size: buffer size in bytes + * + * This function returns %1 in there are only @patt bytes in @buf, and %0 if + * something else was also found. + */ +int ubi_check_pattern(const void *buf, uint8_t patt, int size) +{ + int i; + + for (i = 0; i < size; i++) + if (((const uint8_t *)buf)[i] != patt) + return 0; + return 1; +} diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h new file mode 100644 index 000000000..d0d072e7c --- /dev/null +++ b/drivers/mtd/ubi/ubi-media.h @@ -0,0 +1,513 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Thomas Gleixner + * Frank Haverkamp + * Oliver Lohmann + * Andreas Arnez + */ + +/* + * This file defines the layout of UBI headers and all the other UBI on-flash + * data structures. + */ + +#ifndef __UBI_MEDIA_H__ +#define __UBI_MEDIA_H__ + +#include <asm/byteorder.h> + +/* The version of UBI images supported by this implementation */ +#define UBI_VERSION 1 + +/* The highest erase counter value supported by this implementation */ +#define UBI_MAX_ERASECOUNTER 0x7FFFFFFF + +/* The initial CRC32 value used when calculating CRC checksums */ +#define UBI_CRC32_INIT 0xFFFFFFFFU + +/* Erase counter header magic number (ASCII "UBI#") */ +#define UBI_EC_HDR_MAGIC 0x55424923 +/* Volume identifier header magic number (ASCII "UBI!") */ +#define UBI_VID_HDR_MAGIC 0x55424921 + +/* + * Volume type constants used in the volume identifier header. + * + * @UBI_VID_DYNAMIC: dynamic volume + * @UBI_VID_STATIC: static volume + */ +enum { + UBI_VID_DYNAMIC = 1, + UBI_VID_STATIC = 2 +}; + +/* + * Volume flags used in the volume table record. + * + * @UBI_VTBL_AUTORESIZE_FLG: auto-resize this volume + * + * %UBI_VTBL_AUTORESIZE_FLG flag can be set only for one volume in the volume + * table. UBI automatically re-sizes the volume which has this flag and makes + * the volume to be of largest possible size. This means that if after the + * initialization UBI finds out that there are available physical eraseblocks + * present on the device, it automatically appends all of them to the volume + * (the physical eraseblocks reserved for bad eraseblocks handling and other + * reserved physical eraseblocks are not taken). So, if there is a volume with + * the %UBI_VTBL_AUTORESIZE_FLG flag set, the amount of available logical + * eraseblocks will be zero after UBI is loaded, because all of them will be + * reserved for this volume. Note, the %UBI_VTBL_AUTORESIZE_FLG bit is cleared + * after the volume had been initialized. + * + * The auto-resize feature is useful for device production purposes. For + * example, different NAND flash chips may have different amount of initial bad + * eraseblocks, depending of particular chip instance. Manufacturers of NAND + * chips usually guarantee that the amount of initial bad eraseblocks does not + * exceed certain percent, e.g. 2%. When one creates an UBI image which will be + * flashed to the end devices in production, he does not know the exact amount + * of good physical eraseblocks the NAND chip on the device will have, but this + * number is required to calculate the volume sized and put them to the volume + * table of the UBI image. In this case, one of the volumes (e.g., the one + * which will store the root file system) is marked as "auto-resizable", and + * UBI will adjust its size on the first boot if needed. + * + * Note, first UBI reserves some amount of physical eraseblocks for bad + * eraseblock handling, and then re-sizes the volume, not vice-versa. This + * means that the pool of reserved physical eraseblocks will always be present. + */ +enum { + UBI_VTBL_AUTORESIZE_FLG = 0x01, +}; + +/* + * Compatibility constants used by internal volumes. + * + * @UBI_COMPAT_DELETE: delete this internal volume before anything is written + * to the flash + * @UBI_COMPAT_RO: attach this device in read-only mode + * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its + * physical eraseblocks, don't allow the wear-leveling + * sub-system to move them + * @UBI_COMPAT_REJECT: reject this UBI image + */ +enum { + UBI_COMPAT_DELETE = 1, + UBI_COMPAT_RO = 2, + UBI_COMPAT_PRESERVE = 4, + UBI_COMPAT_REJECT = 5 +}; + +/* Sizes of UBI headers */ +#define UBI_EC_HDR_SIZE sizeof(struct ubi_ec_hdr) +#define UBI_VID_HDR_SIZE sizeof(struct ubi_vid_hdr) + +/* Sizes of UBI headers without the ending CRC */ +#define UBI_EC_HDR_SIZE_CRC (UBI_EC_HDR_SIZE - sizeof(__be32)) +#define UBI_VID_HDR_SIZE_CRC (UBI_VID_HDR_SIZE - sizeof(__be32)) + +/** + * struct ubi_ec_hdr - UBI erase counter header. + * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC) + * @version: version of UBI implementation which is supposed to accept this + * UBI image + * @padding1: reserved for future, zeroes + * @ec: the erase counter + * @vid_hdr_offset: where the VID header starts + * @data_offset: where the user data start + * @image_seq: image sequence number + * @padding2: reserved for future, zeroes + * @hdr_crc: erase counter header CRC checksum + * + * The erase counter header takes 64 bytes and has a plenty of unused space for + * future usage. The unused fields are zeroed. The @version field is used to + * indicate the version of UBI implementation which is supposed to be able to + * work with this UBI image. If @version is greater than the current UBI + * version, the image is rejected. This may be useful in future if something + * is changed radically. This field is duplicated in the volume identifier + * header. + * + * The @vid_hdr_offset and @data_offset fields contain the offset of the the + * volume identifier header and user data, relative to the beginning of the + * physical eraseblock. These values have to be the same for all physical + * eraseblocks. + * + * The @image_seq field is used to validate a UBI image that has been prepared + * for a UBI device. The @image_seq value can be any value, but it must be the + * same on all eraseblocks. UBI will ensure that all new erase counter headers + * also contain this value, and will check the value when attaching the flash. + * One way to make use of @image_seq is to increase its value by one every time + * an image is flashed over an existing image, then, if the flashing does not + * complete, UBI will detect the error when attaching the media. + */ +struct ubi_ec_hdr { + __be32 magic; + __u8 version; + __u8 padding1[3]; + __be64 ec; /* Warning: the current limit is 31-bit anyway! */ + __be32 vid_hdr_offset; + __be32 data_offset; + __be32 image_seq; + __u8 padding2[32]; + __be32 hdr_crc; +} __packed; + +/** + * struct ubi_vid_hdr - on-flash UBI volume identifier header. + * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC) + * @version: UBI implementation version which is supposed to accept this UBI + * image (%UBI_VERSION) + * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC) + * @copy_flag: if this logical eraseblock was copied from another physical + * eraseblock (for wear-leveling reasons) + * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE, + * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT) + * @vol_id: ID of this volume + * @lnum: logical eraseblock number + * @padding1: reserved for future, zeroes + * @data_size: how many bytes of data this logical eraseblock contains + * @used_ebs: total number of used logical eraseblocks in this volume + * @data_pad: how many bytes at the end of this physical eraseblock are not + * used + * @data_crc: CRC checksum of the data stored in this logical eraseblock + * @padding2: reserved for future, zeroes + * @sqnum: sequence number + * @padding3: reserved for future, zeroes + * @hdr_crc: volume identifier header CRC checksum + * + * The @sqnum is the value of the global sequence counter at the time when this + * VID header was created. The global sequence counter is incremented each time + * UBI writes a new VID header to the flash, i.e. when it maps a logical + * eraseblock to a new physical eraseblock. The global sequence counter is an + * unsigned 64-bit integer and we assume it never overflows. The @sqnum + * (sequence number) is used to distinguish between older and newer versions of + * logical eraseblocks. + * + * There are 2 situations when there may be more than one physical eraseblock + * corresponding to the same logical eraseblock, i.e., having the same @vol_id + * and @lnum values in the volume identifier header. Suppose we have a logical + * eraseblock L and it is mapped to the physical eraseblock P. + * + * 1. Because UBI may erase physical eraseblocks asynchronously, the following + * situation is possible: L is asynchronously erased, so P is scheduled for + * erasure, then L is written to,i.e. mapped to another physical eraseblock P1, + * so P1 is written to, then an unclean reboot happens. Result - there are 2 + * physical eraseblocks P and P1 corresponding to the same logical eraseblock + * L. But P1 has greater sequence number, so UBI picks P1 when it attaches the + * flash. + * + * 2. From time to time UBI moves logical eraseblocks to other physical + * eraseblocks for wear-leveling reasons. If, for example, UBI moves L from P + * to P1, and an unclean reboot happens before P is physically erased, there + * are two physical eraseblocks P and P1 corresponding to L and UBI has to + * select one of them when the flash is attached. The @sqnum field says which + * PEB is the original (obviously P will have lower @sqnum) and the copy. But + * it is not enough to select the physical eraseblock with the higher sequence + * number, because the unclean reboot could have happen in the middle of the + * copying process, so the data in P is corrupted. It is also not enough to + * just select the physical eraseblock with lower sequence number, because the + * data there may be old (consider a case if more data was added to P1 after + * the copying). Moreover, the unclean reboot may happen when the erasure of P + * was just started, so it result in unstable P, which is "mostly" OK, but + * still has unstable bits. + * + * UBI uses the @copy_flag field to indicate that this logical eraseblock is a + * copy. UBI also calculates data CRC when the data is moved and stores it at + * the @data_crc field of the copy (P1). So when UBI needs to pick one physical + * eraseblock of two (P or P1), the @copy_flag of the newer one (P1) is + * examined. If it is cleared, the situation* is simple and the newer one is + * picked. If it is set, the data CRC of the copy (P1) is examined. If the CRC + * checksum is correct, this physical eraseblock is selected (P1). Otherwise + * the older one (P) is selected. + * + * There are 2 sorts of volumes in UBI: user volumes and internal volumes. + * Internal volumes are not seen from outside and are used for various internal + * UBI purposes. In this implementation there is only one internal volume - the + * layout volume. Internal volumes are the main mechanism of UBI extensions. + * For example, in future one may introduce a journal internal volume. Internal + * volumes have their own reserved range of IDs. + * + * The @compat field is only used for internal volumes and contains the "degree + * of their compatibility". It is always zero for user volumes. This field + * provides a mechanism to introduce UBI extensions and to be still compatible + * with older UBI binaries. For example, if someone introduced a journal in + * future, he would probably use %UBI_COMPAT_DELETE compatibility for the + * journal volume. And in this case, older UBI binaries, which know nothing + * about the journal volume, would just delete this volume and work perfectly + * fine. This is similar to what Ext2fs does when it is fed by an Ext3fs image + * - it just ignores the Ext3fs journal. + * + * The @data_crc field contains the CRC checksum of the contents of the logical + * eraseblock if this is a static volume. In case of dynamic volumes, it does + * not contain the CRC checksum as a rule. The only exception is when the + * data of the physical eraseblock was moved by the wear-leveling sub-system, + * then the wear-leveling sub-system calculates the data CRC and stores it in + * the @data_crc field. And of course, the @copy_flag is %in this case. + * + * The @data_size field is used only for static volumes because UBI has to know + * how many bytes of data are stored in this eraseblock. For dynamic volumes, + * this field usually contains zero. The only exception is when the data of the + * physical eraseblock was moved to another physical eraseblock for + * wear-leveling reasons. In this case, UBI calculates CRC checksum of the + * contents and uses both @data_crc and @data_size fields. In this case, the + * @data_size field contains data size. + * + * The @used_ebs field is used only for static volumes and indicates how many + * eraseblocks the data of the volume takes. For dynamic volumes this field is + * not used and always contains zero. + * + * The @data_pad is calculated when volumes are created using the alignment + * parameter. So, effectively, the @data_pad field reduces the size of logical + * eraseblocks of this volume. This is very handy when one uses block-oriented + * software (say, cramfs) on top of the UBI volume. + */ +struct ubi_vid_hdr { + __be32 magic; + __u8 version; + __u8 vol_type; + __u8 copy_flag; + __u8 compat; + __be32 vol_id; + __be32 lnum; + __u8 padding1[4]; + __be32 data_size; + __be32 used_ebs; + __be32 data_pad; + __be32 data_crc; + __u8 padding2[4]; + __be64 sqnum; + __u8 padding3[12]; + __be32 hdr_crc; +} __packed; + +/* Internal UBI volumes count */ +#define UBI_INT_VOL_COUNT 1 + +/* + * Starting ID of internal volumes: 0x7fffefff. + * There is reserved room for 4096 internal volumes. + */ +#define UBI_INTERNAL_VOL_START (0x7FFFFFFF - 4096) + +/* The layout volume contains the volume table */ + +#define UBI_LAYOUT_VOLUME_ID UBI_INTERNAL_VOL_START +#define UBI_LAYOUT_VOLUME_TYPE UBI_VID_DYNAMIC +#define UBI_LAYOUT_VOLUME_ALIGN 1 +#define UBI_LAYOUT_VOLUME_EBS 2 +#define UBI_LAYOUT_VOLUME_NAME "layout volume" +#define UBI_LAYOUT_VOLUME_COMPAT UBI_COMPAT_REJECT + +/* The maximum number of volumes per one UBI device */ +#define UBI_MAX_VOLUMES 128 + +/* The maximum volume name length */ +#define UBI_VOL_NAME_MAX 127 + +/* Size of the volume table record */ +#define UBI_VTBL_RECORD_SIZE sizeof(struct ubi_vtbl_record) + +/* Size of the volume table record without the ending CRC */ +#define UBI_VTBL_RECORD_SIZE_CRC (UBI_VTBL_RECORD_SIZE - sizeof(__be32)) + +/** + * struct ubi_vtbl_record - a record in the volume table. + * @reserved_pebs: how many physical eraseblocks are reserved for this volume + * @alignment: volume alignment + * @data_pad: how many bytes are unused at the end of the each physical + * eraseblock to satisfy the requested alignment + * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) + * @upd_marker: if volume update was started but not finished + * @name_len: volume name length + * @name: the volume name + * @flags: volume flags (%UBI_VTBL_AUTORESIZE_FLG) + * @padding: reserved, zeroes + * @crc: a CRC32 checksum of the record + * + * The volume table records are stored in the volume table, which is stored in + * the layout volume. The layout volume consists of 2 logical eraseblock, each + * of which contains a copy of the volume table (i.e., the volume table is + * duplicated). The volume table is an array of &struct ubi_vtbl_record + * objects indexed by the volume ID. + * + * If the size of the logical eraseblock is large enough to fit + * %UBI_MAX_VOLUMES records, the volume table contains %UBI_MAX_VOLUMES + * records. Otherwise, it contains as many records as it can fit (i.e., size of + * logical eraseblock divided by sizeof(struct ubi_vtbl_record)). + * + * The @upd_marker flag is used to implement volume update. It is set to %1 + * before update and set to %0 after the update. So if the update operation was + * interrupted, UBI knows that the volume is corrupted. + * + * The @alignment field is specified when the volume is created and cannot be + * later changed. It may be useful, for example, when a block-oriented file + * system works on top of UBI. The @data_pad field is calculated using the + * logical eraseblock size and @alignment. The alignment must be multiple to the + * minimal flash I/O unit. If @alignment is 1, all the available space of + * the physical eraseblocks is used. + * + * Empty records contain all zeroes and the CRC checksum of those zeroes. + */ +struct ubi_vtbl_record { + __be32 reserved_pebs; + __be32 alignment; + __be32 data_pad; + __u8 vol_type; + __u8 upd_marker; + __be16 name_len; + __u8 name[UBI_VOL_NAME_MAX+1]; + __u8 flags; + __u8 padding[23]; + __be32 crc; +} __packed; + +/* UBI fastmap on-flash data structures */ + +#define UBI_FM_SB_VOLUME_ID (UBI_LAYOUT_VOLUME_ID + 1) +#define UBI_FM_DATA_VOLUME_ID (UBI_LAYOUT_VOLUME_ID + 2) + +/* fastmap on-flash data structure format version */ +#define UBI_FM_FMT_VERSION 1 + +#define UBI_FM_SB_MAGIC 0x7B11D69F +#define UBI_FM_HDR_MAGIC 0xD4B82EF7 +#define UBI_FM_VHDR_MAGIC 0xFA370ED1 +#define UBI_FM_POOL_MAGIC 0x67AF4D08 +#define UBI_FM_EBA_MAGIC 0xf0c040a8 + +/* A fastmap supber block can be located between PEB 0 and + * UBI_FM_MAX_START */ +#define UBI_FM_MAX_START 64 + +/* A fastmap can use up to UBI_FM_MAX_BLOCKS PEBs */ +#define UBI_FM_MAX_BLOCKS 32 + +/* 5% of the total number of PEBs have to be scanned while attaching + * from a fastmap. + * But the size of this pool is limited to be between UBI_FM_MIN_POOL_SIZE and + * UBI_FM_MAX_POOL_SIZE */ +#define UBI_FM_MIN_POOL_SIZE 8 +#define UBI_FM_MAX_POOL_SIZE 256 + +/** + * struct ubi_fm_sb - UBI fastmap super block + * @magic: fastmap super block magic number (%UBI_FM_SB_MAGIC) + * @version: format version of this fastmap + * @data_crc: CRC over the fastmap data + * @used_blocks: number of PEBs used by this fastmap + * @block_loc: an array containing the location of all PEBs of the fastmap + * @block_ec: the erase counter of each used PEB + * @sqnum: highest sequence number value at the time while taking the fastmap + * + */ +struct ubi_fm_sb { + __be32 magic; + __u8 version; + __u8 padding1[3]; + __be32 data_crc; + __be32 used_blocks; + __be32 block_loc[UBI_FM_MAX_BLOCKS]; + __be32 block_ec[UBI_FM_MAX_BLOCKS]; + __be64 sqnum; + __u8 padding2[32]; +} __packed; + +/** + * struct ubi_fm_hdr - header of the fastmap data set + * @magic: fastmap header magic number (%UBI_FM_HDR_MAGIC) + * @free_peb_count: number of free PEBs known by this fastmap + * @used_peb_count: number of used PEBs known by this fastmap + * @scrub_peb_count: number of to be scrubbed PEBs known by this fastmap + * @bad_peb_count: number of bad PEBs known by this fastmap + * @erase_peb_count: number of bad PEBs which have to be erased + * @vol_count: number of UBI volumes known by this fastmap + */ +struct ubi_fm_hdr { + __be32 magic; + __be32 free_peb_count; + __be32 used_peb_count; + __be32 scrub_peb_count; + __be32 bad_peb_count; + __be32 erase_peb_count; + __be32 vol_count; + __u8 padding[4]; +} __packed; + +/* struct ubi_fm_hdr is followed by two struct ubi_fm_scan_pool */ + +/** + * struct ubi_fm_scan_pool - Fastmap pool PEBs to be scanned while attaching + * @magic: pool magic numer (%UBI_FM_POOL_MAGIC) + * @size: current pool size + * @max_size: maximal pool size + * @pebs: an array containing the location of all PEBs in this pool + */ +struct ubi_fm_scan_pool { + __be32 magic; + __be16 size; + __be16 max_size; + __be32 pebs[UBI_FM_MAX_POOL_SIZE]; + __be32 padding[4]; +} __packed; + +/* ubi_fm_scan_pool is followed by nfree+nused struct ubi_fm_ec records */ + +/** + * struct ubi_fm_ec - stores the erase counter of a PEB + * @pnum: PEB number + * @ec: ec of this PEB + */ +struct ubi_fm_ec { + __be32 pnum; + __be32 ec; +} __packed; + +/** + * struct ubi_fm_volhdr - Fastmap volume header + * it identifies the start of an eba table + * @magic: Fastmap volume header magic number (%UBI_FM_VHDR_MAGIC) + * @vol_id: volume id of the fastmapped volume + * @vol_type: type of the fastmapped volume + * @data_pad: data_pad value of the fastmapped volume + * @used_ebs: number of used LEBs within this volume + * @last_eb_bytes: number of bytes used in the last LEB + */ +struct ubi_fm_volhdr { + __be32 magic; + __be32 vol_id; + __u8 vol_type; + __u8 padding1[3]; + __be32 data_pad; + __be32 used_ebs; + __be32 last_eb_bytes; + __u8 padding2[8]; +} __packed; + +/* struct ubi_fm_volhdr is followed by one struct ubi_fm_eba records */ + +/** + * struct ubi_fm_eba - denotes an association beween a PEB and LEB + * @magic: EBA table magic number + * @reserved_pebs: number of table entries + * @pnum: PEB number of LEB (LEB is the index) + */ +struct ubi_fm_eba { + __be32 magic; + __be32 reserved_pebs; + __be32 pnum[0]; +} __packed; +#endif /* !__UBI_MEDIA_H__ */ diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h new file mode 100644 index 000000000..c998212fc --- /dev/null +++ b/drivers/mtd/ubi/ubi.h @@ -0,0 +1,1102 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +#ifndef __UBI_UBI_H__ +#define __UBI_UBI_H__ + +#include <linux/types.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/mutex.h> +#include <linux/rwsem.h> +#include <linux/spinlock.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/vmalloc.h> +#include <linux/notifier.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/ubi.h> +#include <asm/pgtable.h> + +#include "ubi-media.h" + +/* Maximum number of supported UBI devices */ +#define UBI_MAX_DEVICES 32 + +/* UBI name used for character devices, sysfs, etc */ +#define UBI_NAME_STR "ubi" + +/* Normal UBI messages */ +#define ubi_msg(ubi, fmt, ...) pr_notice(UBI_NAME_STR "%d: " fmt "\n", \ + ubi->ubi_num, ##__VA_ARGS__) +/* UBI warning messages */ +#define ubi_warn(ubi, fmt, ...) pr_warn(UBI_NAME_STR "%d warning: %s: " fmt "\n", \ + ubi->ubi_num, __func__, ##__VA_ARGS__) +/* UBI error messages */ +#define ubi_err(ubi, fmt, ...) pr_err(UBI_NAME_STR "%d error: %s: " fmt "\n", \ + ubi->ubi_num, __func__, ##__VA_ARGS__) + +/* Background thread name pattern */ +#define UBI_BGT_NAME_PATTERN "ubi_bgt%dd" + +/* + * This marker in the EBA table means that the LEB is um-mapped. + * NOTE! It has to have the same value as %UBI_ALL. + */ +#define UBI_LEB_UNMAPPED -1 + +/* + * In case of errors, UBI tries to repeat the operation several times before + * returning error. The below constant defines how many times UBI re-tries. + */ +#define UBI_IO_RETRIES 3 + +/* + * Length of the protection queue. The length is effectively equivalent to the + * number of (global) erase cycles PEBs are protected from the wear-leveling + * worker. + */ +#define UBI_PROT_QUEUE_LEN 10 + +/* The volume ID/LEB number/erase counter is unknown */ +#define UBI_UNKNOWN -1 + +/* + * The UBI debugfs directory name pattern and maximum name length (3 for "ubi" + * + 2 for the number plus 1 for the trailing zero byte. + */ +#define UBI_DFS_DIR_NAME "ubi%d" +#define UBI_DFS_DIR_LEN (3 + 2 + 1) + +/* + * Error codes returned by the I/O sub-system. + * + * UBI_IO_FF: the read region of flash contains only 0xFFs + * UBI_IO_FF_BITFLIPS: the same as %UBI_IO_FF, but also also there was a data + * integrity error reported by the MTD driver + * (uncorrectable ECC error in case of NAND) + * UBI_IO_BAD_HDR: the EC or VID header is corrupted (bad magic or CRC) + * UBI_IO_BAD_HDR_EBADMSG: the same as %UBI_IO_BAD_HDR, but also there was a + * data integrity error reported by the MTD driver + * (uncorrectable ECC error in case of NAND) + * UBI_IO_BITFLIPS: bit-flips were detected and corrected + * + * Note, it is probably better to have bit-flip and ebadmsg as flags which can + * be or'ed with other error code. But this is a big change because there are + * may callers, so it does not worth the risk of introducing a bug + */ +enum { + UBI_IO_FF = 1, + UBI_IO_FF_BITFLIPS, + UBI_IO_BAD_HDR, + UBI_IO_BAD_HDR_EBADMSG, + UBI_IO_BITFLIPS, +}; + +/* + * Return codes of the 'ubi_eba_copy_leb()' function. + * + * MOVE_CANCEL_RACE: canceled because the volume is being deleted, the source + * PEB was put meanwhile, or there is I/O on the source PEB + * MOVE_SOURCE_RD_ERR: canceled because there was a read error from the source + * PEB + * MOVE_TARGET_RD_ERR: canceled because there was a read error from the target + * PEB + * MOVE_TARGET_WR_ERR: canceled because there was a write error to the target + * PEB + * MOVE_TARGET_BITFLIPS: canceled because a bit-flip was detected in the + * target PEB + * MOVE_RETRY: retry scrubbing the PEB + */ +enum { + MOVE_CANCEL_RACE = 1, + MOVE_SOURCE_RD_ERR, + MOVE_TARGET_RD_ERR, + MOVE_TARGET_WR_ERR, + MOVE_TARGET_BITFLIPS, + MOVE_RETRY, +}; + +/* + * Return codes of the fastmap sub-system + * + * UBI_NO_FASTMAP: No fastmap super block was found + * UBI_BAD_FASTMAP: A fastmap was found but it's unusable + */ +enum { + UBI_NO_FASTMAP = 1, + UBI_BAD_FASTMAP, +}; + +/* + * Flags for emulate_power_cut in ubi_debug_info + * + * POWER_CUT_EC_WRITE: Emulate a power cut when writing an EC header + * POWER_CUT_VID_WRITE: Emulate a power cut when writing a VID header + */ +enum { + POWER_CUT_EC_WRITE = 0x01, + POWER_CUT_VID_WRITE = 0x02, +}; + +/** + * struct ubi_wl_entry - wear-leveling entry. + * @u.rb: link in the corresponding (free/used) RB-tree + * @u.list: link in the protection queue + * @ec: erase counter + * @pnum: physical eraseblock number + * + * This data structure is used in the WL sub-system. Each physical eraseblock + * has a corresponding &struct wl_entry object which may be kept in different + * RB-trees. See WL sub-system for details. + */ +struct ubi_wl_entry { + union { + struct rb_node rb; + struct list_head list; + } u; + int ec; + int pnum; +}; + +/** + * struct ubi_ltree_entry - an entry in the lock tree. + * @rb: links RB-tree nodes + * @vol_id: volume ID of the locked logical eraseblock + * @lnum: locked logical eraseblock number + * @users: how many tasks are using this logical eraseblock or wait for it + * @mutex: read/write mutex to implement read/write access serialization to + * the (@vol_id, @lnum) logical eraseblock + * + * This data structure is used in the EBA sub-system to implement per-LEB + * locking. When a logical eraseblock is being locked - corresponding + * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree). + * See EBA sub-system for details. + */ +struct ubi_ltree_entry { + struct rb_node rb; + int vol_id; + int lnum; + int users; + struct rw_semaphore mutex; +}; + +/** + * struct ubi_rename_entry - volume re-name description data structure. + * @new_name_len: new volume name length + * @new_name: new volume name + * @remove: if not zero, this volume should be removed, not re-named + * @desc: descriptor of the volume + * @list: links re-name entries into a list + * + * This data structure is utilized in the multiple volume re-name code. Namely, + * UBI first creates a list of &struct ubi_rename_entry objects from the + * &struct ubi_rnvol_req request object, and then utilizes this list to do all + * the job. + */ +struct ubi_rename_entry { + int new_name_len; + char new_name[UBI_VOL_NAME_MAX + 1]; + int remove; + struct ubi_volume_desc *desc; + struct list_head list; +}; + +struct ubi_volume_desc; + +/** + * struct ubi_fastmap_layout - in-memory fastmap data structure. + * @e: PEBs used by the current fastmap + * @to_be_tortured: if non-zero tortured this PEB + * @used_blocks: number of used PEBs + * @max_pool_size: maximal size of the user pool + * @max_wl_pool_size: maximal size of the pool used by the WL sub-system + */ +struct ubi_fastmap_layout { + struct ubi_wl_entry *e[UBI_FM_MAX_BLOCKS]; + int to_be_tortured[UBI_FM_MAX_BLOCKS]; + int used_blocks; + int max_pool_size; + int max_wl_pool_size; +}; + +/** + * struct ubi_fm_pool - in-memory fastmap pool + * @pebs: PEBs in this pool + * @used: number of used PEBs + * @size: total number of PEBs in this pool + * @max_size: maximal size of the pool + * + * A pool gets filled with up to max_size. + * If all PEBs within the pool are used a new fastmap will be written + * to the flash and the pool gets refilled with empty PEBs. + * + */ +struct ubi_fm_pool { + int pebs[UBI_FM_MAX_POOL_SIZE]; + int used; + int size; + int max_size; +}; + +/** + * struct ubi_volume - UBI volume description data structure. + * @dev: device object to make use of the the Linux device model + * @cdev: character device object to create character device + * @ubi: reference to the UBI device description object + * @vol_id: volume ID + * @ref_count: volume reference count + * @readers: number of users holding this volume in read-only mode + * @writers: number of users holding this volume in read-write mode + * @exclusive: whether somebody holds this volume in exclusive mode + * @metaonly: whether somebody is altering only meta data of this volume + * + * @reserved_pebs: how many physical eraseblocks are reserved for this volume + * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) + * @usable_leb_size: logical eraseblock size without padding + * @used_ebs: how many logical eraseblocks in this volume contain data + * @last_eb_bytes: how many bytes are stored in the last logical eraseblock + * @used_bytes: how many bytes of data this volume contains + * @alignment: volume alignment + * @data_pad: how many bytes are not used at the end of physical eraseblocks to + * satisfy the requested alignment + * @name_len: volume name length + * @name: volume name + * + * @upd_ebs: how many eraseblocks are expected to be updated + * @ch_lnum: LEB number which is being changing by the atomic LEB change + * operation + * @upd_bytes: how many bytes are expected to be received for volume update or + * atomic LEB change + * @upd_received: how many bytes were already received for volume update or + * atomic LEB change + * @upd_buf: update buffer which is used to collect update data or data for + * atomic LEB change + * + * @eba_tbl: EBA table of this volume (LEB->PEB mapping) + * @checked: %1 if this static volume was checked + * @corrupted: %1 if the volume is corrupted (static volumes only) + * @upd_marker: %1 if the update marker is set for this volume + * @updating: %1 if the volume is being updated + * @changing_leb: %1 if the atomic LEB change ioctl command is in progress + * @direct_writes: %1 if direct writes are enabled for this volume + * + * The @corrupted field indicates that the volume's contents is corrupted. + * Since UBI protects only static volumes, this field is not relevant to + * dynamic volumes - it is user's responsibility to assure their data + * integrity. + * + * The @upd_marker flag indicates that this volume is either being updated at + * the moment or is damaged because of an unclean reboot. + */ +struct ubi_volume { + struct device dev; + struct cdev cdev; + struct ubi_device *ubi; + int vol_id; + int ref_count; + int readers; + int writers; + int exclusive; + int metaonly; + + int reserved_pebs; + int vol_type; + int usable_leb_size; + int used_ebs; + int last_eb_bytes; + long long used_bytes; + int alignment; + int data_pad; + int name_len; + char name[UBI_VOL_NAME_MAX + 1]; + + int upd_ebs; + int ch_lnum; + long long upd_bytes; + long long upd_received; + void *upd_buf; + + int *eba_tbl; + unsigned int checked:1; + unsigned int corrupted:1; + unsigned int upd_marker:1; + unsigned int updating:1; + unsigned int changing_leb:1; + unsigned int direct_writes:1; +}; + +/** + * struct ubi_volume_desc - UBI volume descriptor returned when it is opened. + * @vol: reference to the corresponding volume description object + * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, %UBI_EXCLUSIVE + * or %UBI_METAONLY) + */ +struct ubi_volume_desc { + struct ubi_volume *vol; + int mode; +}; + +struct ubi_wl_entry; + +/** + * struct ubi_debug_info - debugging information for an UBI device. + * + * @chk_gen: if UBI general extra checks are enabled + * @chk_io: if UBI I/O extra checks are enabled + * @chk_fastmap: if UBI fastmap extra checks are enabled + * @disable_bgt: disable the background task for testing purposes + * @emulate_bitflips: emulate bit-flips for testing purposes + * @emulate_io_failures: emulate write/erase failures for testing purposes + * @emulate_power_cut: emulate power cut for testing purposes + * @power_cut_counter: count down for writes left until emulated power cut + * @power_cut_min: minimum number of writes before emulating a power cut + * @power_cut_max: maximum number of writes until emulating a power cut + * @dfs_dir_name: name of debugfs directory containing files of this UBI device + * @dfs_dir: direntry object of the UBI device debugfs directory + * @dfs_chk_gen: debugfs knob to enable UBI general extra checks + * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks + * @dfs_chk_fastmap: debugfs knob to enable UBI fastmap extra checks + * @dfs_disable_bgt: debugfs knob to disable the background task + * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips + * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures + * @dfs_emulate_power_cut: debugfs knob to emulate power cuts + * @dfs_power_cut_min: debugfs knob for minimum writes before power cut + * @dfs_power_cut_max: debugfs knob for maximum writes until power cut + */ +struct ubi_debug_info { + unsigned int chk_gen:1; + unsigned int chk_io:1; + unsigned int chk_fastmap:1; + unsigned int disable_bgt:1; + unsigned int emulate_bitflips:1; + unsigned int emulate_io_failures:1; + unsigned int emulate_power_cut:2; + unsigned int power_cut_counter; + unsigned int power_cut_min; + unsigned int power_cut_max; + char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; + struct dentry *dfs_dir; + struct dentry *dfs_chk_gen; + struct dentry *dfs_chk_io; + struct dentry *dfs_chk_fastmap; + struct dentry *dfs_disable_bgt; + struct dentry *dfs_emulate_bitflips; + struct dentry *dfs_emulate_io_failures; + struct dentry *dfs_emulate_power_cut; + struct dentry *dfs_power_cut_min; + struct dentry *dfs_power_cut_max; +}; + +/** + * struct ubi_device - UBI device description structure + * @dev: UBI device object to use the the Linux device model + * @cdev: character device object to create character device + * @ubi_num: UBI device number + * @ubi_name: UBI device name + * @vol_count: number of volumes in this UBI device + * @volumes: volumes of this UBI device + * @volumes_lock: protects @volumes, @rsvd_pebs, @avail_pebs, beb_rsvd_pebs, + * @beb_rsvd_level, @bad_peb_count, @good_peb_count, @vol_count, + * @vol->readers, @vol->writers, @vol->exclusive, + * @vol->metaonly, @vol->ref_count, @vol->mapping and + * @vol->eba_tbl. + * @ref_count: count of references on the UBI device + * @image_seq: image sequence number recorded on EC headers + * + * @rsvd_pebs: count of reserved physical eraseblocks + * @avail_pebs: count of available physical eraseblocks + * @beb_rsvd_pebs: how many physical eraseblocks are reserved for bad PEB + * handling + * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling + * + * @autoresize_vol_id: ID of the volume which has to be auto-resized at the end + * of UBI initialization + * @vtbl_slots: how many slots are available in the volume table + * @vtbl_size: size of the volume table in bytes + * @vtbl: in-RAM volume table copy + * @device_mutex: protects on-flash volume table and serializes volume + * creation, deletion, update, re-size, re-name and set + * property + * + * @max_ec: current highest erase counter value + * @mean_ec: current mean erase counter value + * + * @global_sqnum: global sequence number + * @ltree_lock: protects the lock tree and @global_sqnum + * @ltree: the lock tree + * @alc_mutex: serializes "atomic LEB change" operations + * + * @fm_disabled: non-zero if fastmap is disabled (default) + * @fm: in-memory data structure of the currently used fastmap + * @fm_pool: in-memory data structure of the fastmap pool + * @fm_wl_pool: in-memory data structure of the fastmap pool used by the WL + * sub-system + * @fm_protect: serializes ubi_update_fastmap(), protects @fm_buf and makes sure + * that critical sections cannot be interrupted by ubi_update_fastmap() + * @fm_buf: vmalloc()'d buffer which holds the raw fastmap + * @fm_size: fastmap size in bytes + * @fm_eba_sem: allows ubi_update_fastmap() to block EBA table changes + * @fm_work: fastmap work queue + * @fm_work_scheduled: non-zero if fastmap work was scheduled + * + * @used: RB-tree of used physical eraseblocks + * @erroneous: RB-tree of erroneous used physical eraseblocks + * @free: RB-tree of free physical eraseblocks + * @free_count: Contains the number of elements in @free + * @scrub: RB-tree of physical eraseblocks which need scrubbing + * @pq: protection queue (contain physical eraseblocks which are temporarily + * protected from the wear-leveling worker) + * @pq_head: protection queue head + * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from, + * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works, + * @erroneous, @erroneous_peb_count, @fm_work_scheduled, @fm_pool, + * and @fm_wl_pool fields + * @move_mutex: serializes eraseblock moves + * @work_sem: used to wait for all the scheduled works to finish and prevent + * new works from being submitted + * @wl_scheduled: non-zero if the wear-leveling was scheduled + * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any + * physical eraseblock + * @move_from: physical eraseblock from where the data is being moved + * @move_to: physical eraseblock where the data is being moved to + * @move_to_put: if the "to" PEB was put + * @works: list of pending works + * @works_count: count of pending works + * @bgt_thread: background thread description object + * @thread_enabled: if the background thread is enabled + * @bgt_name: background thread name + * + * @flash_size: underlying MTD device size (in bytes) + * @peb_count: count of physical eraseblocks on the MTD device + * @peb_size: physical eraseblock size + * @bad_peb_limit: top limit of expected bad physical eraseblocks + * @bad_peb_count: count of bad physical eraseblocks + * @good_peb_count: count of good physical eraseblocks + * @corr_peb_count: count of corrupted physical eraseblocks (preserved and not + * used by UBI) + * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous + * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks + * @min_io_size: minimal input/output unit size of the underlying MTD device + * @hdrs_min_io_size: minimal I/O unit size used for VID and EC headers + * @ro_mode: if the UBI device is in read-only mode + * @leb_size: logical eraseblock size + * @leb_start: starting offset of logical eraseblocks within physical + * eraseblocks + * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size + * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size + * @vid_hdr_offset: starting offset of the volume identifier header (might be + * unaligned) + * @vid_hdr_aloffset: starting offset of the VID header aligned to + * @hdrs_min_io_size + * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset + * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or + * not + * @nor_flash: non-zero if working on top of NOR flash + * @max_write_size: maximum amount of bytes the underlying flash can write at a + * time (MTD write buffer size) + * @mtd: MTD device descriptor + * + * @peb_buf: a buffer of PEB size used for different purposes + * @buf_mutex: protects @peb_buf + * @ckvol_mutex: serializes static volume checking when opening + * + * @dbg: debugging information for this UBI device + */ +struct ubi_device { + struct cdev cdev; + struct device dev; + int ubi_num; + char ubi_name[sizeof(UBI_NAME_STR)+5]; + int vol_count; + struct ubi_volume *volumes[UBI_MAX_VOLUMES+UBI_INT_VOL_COUNT]; + spinlock_t volumes_lock; + int ref_count; + int image_seq; + + int rsvd_pebs; + int avail_pebs; + int beb_rsvd_pebs; + int beb_rsvd_level; + int bad_peb_limit; + + int autoresize_vol_id; + int vtbl_slots; + int vtbl_size; + struct ubi_vtbl_record *vtbl; + struct mutex device_mutex; + + int max_ec; + /* Note, mean_ec is not updated run-time - should be fixed */ + int mean_ec; + + /* EBA sub-system's stuff */ + unsigned long long global_sqnum; + spinlock_t ltree_lock; + struct rb_root ltree; + struct mutex alc_mutex; + + /* Fastmap stuff */ + int fm_disabled; + struct ubi_fastmap_layout *fm; + struct ubi_fm_pool fm_pool; + struct ubi_fm_pool fm_wl_pool; + struct rw_semaphore fm_eba_sem; + struct rw_semaphore fm_protect; + void *fm_buf; + size_t fm_size; + struct work_struct fm_work; + int fm_work_scheduled; + + /* Wear-leveling sub-system's stuff */ + struct rb_root used; + struct rb_root erroneous; + struct rb_root free; + int free_count; + struct rb_root scrub; + struct list_head pq[UBI_PROT_QUEUE_LEN]; + int pq_head; + spinlock_t wl_lock; + struct mutex move_mutex; + struct rw_semaphore work_sem; + int wl_scheduled; + struct ubi_wl_entry **lookuptbl; + struct ubi_wl_entry *move_from; + struct ubi_wl_entry *move_to; + int move_to_put; + struct list_head works; + int works_count; + struct task_struct *bgt_thread; + int thread_enabled; + char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2]; + + /* I/O sub-system's stuff */ + long long flash_size; + int peb_count; + int peb_size; + int bad_peb_count; + int good_peb_count; + int corr_peb_count; + int erroneous_peb_count; + int max_erroneous; + int min_io_size; + int hdrs_min_io_size; + int ro_mode; + int leb_size; + int leb_start; + int ec_hdr_alsize; + int vid_hdr_alsize; + int vid_hdr_offset; + int vid_hdr_aloffset; + int vid_hdr_shift; + unsigned int bad_allowed:1; + unsigned int nor_flash:1; + int max_write_size; + struct mtd_info *mtd; + + void *peb_buf; + struct mutex buf_mutex; + struct mutex ckvol_mutex; + + struct ubi_debug_info dbg; +}; + +/** + * struct ubi_ainf_peb - attach information about a physical eraseblock. + * @ec: erase counter (%UBI_UNKNOWN if it is unknown) + * @pnum: physical eraseblock number + * @vol_id: ID of the volume this LEB belongs to + * @lnum: logical eraseblock number + * @scrub: if this physical eraseblock needs scrubbing + * @copy_flag: this LEB is a copy (@copy_flag is set in VID header of this LEB) + * @sqnum: sequence number + * @u: unions RB-tree or @list links + * @u.rb: link in the per-volume RB-tree of &struct ubi_ainf_peb objects + * @u.list: link in one of the eraseblock lists + * + * One object of this type is allocated for each physical eraseblock when + * attaching an MTD device. Note, if this PEB does not belong to any LEB / + * volume, the @vol_id and @lnum fields are initialized to %UBI_UNKNOWN. + */ +struct ubi_ainf_peb { + int ec; + int pnum; + int vol_id; + int lnum; + unsigned int scrub:1; + unsigned int copy_flag:1; + unsigned long long sqnum; + union { + struct rb_node rb; + struct list_head list; + } u; +}; + +/** + * struct ubi_ainf_volume - attaching information about a volume. + * @vol_id: volume ID + * @highest_lnum: highest logical eraseblock number in this volume + * @leb_count: number of logical eraseblocks in this volume + * @vol_type: volume type + * @used_ebs: number of used logical eraseblocks in this volume (only for + * static volumes) + * @last_data_size: amount of data in the last logical eraseblock of this + * volume (always equivalent to the usable logical eraseblock + * size in case of dynamic volumes) + * @data_pad: how many bytes at the end of logical eraseblocks of this volume + * are not used (due to volume alignment) + * @compat: compatibility flags of this volume + * @rb: link in the volume RB-tree + * @root: root of the RB-tree containing all the eraseblock belonging to this + * volume (&struct ubi_ainf_peb objects) + * + * One object of this type is allocated for each volume when attaching an MTD + * device. + */ +struct ubi_ainf_volume { + int vol_id; + int highest_lnum; + int leb_count; + int vol_type; + int used_ebs; + int last_data_size; + int data_pad; + int compat; + struct rb_node rb; + struct rb_root root; +}; + +/** + * struct ubi_attach_info - MTD device attaching information. + * @volumes: root of the volume RB-tree + * @corr: list of corrupted physical eraseblocks + * @free: list of free physical eraseblocks + * @erase: list of physical eraseblocks which have to be erased + * @alien: list of physical eraseblocks which should not be used by UBI (e.g., + * those belonging to "preserve"-compatible internal volumes) + * @corr_peb_count: count of PEBs in the @corr list + * @empty_peb_count: count of PEBs which are presumably empty (contain only + * 0xFF bytes) + * @alien_peb_count: count of PEBs in the @alien list + * @bad_peb_count: count of bad physical eraseblocks + * @maybe_bad_peb_count: count of bad physical eraseblocks which are not marked + * as bad yet, but which look like bad + * @vols_found: number of volumes found + * @highest_vol_id: highest volume ID + * @is_empty: flag indicating whether the MTD device is empty or not + * @min_ec: lowest erase counter value + * @max_ec: highest erase counter value + * @max_sqnum: highest sequence number value + * @mean_ec: mean erase counter value + * @ec_sum: a temporary variable used when calculating @mean_ec + * @ec_count: a temporary variable used when calculating @mean_ec + * @aeb_slab_cache: slab cache for &struct ubi_ainf_peb objects + * + * This data structure contains the result of attaching an MTD device and may + * be used by other UBI sub-systems to build final UBI data structures, further + * error-recovery and so on. + */ +struct ubi_attach_info { + struct rb_root volumes; + struct list_head corr; + struct list_head free; + struct list_head erase; + struct list_head alien; + int corr_peb_count; + int empty_peb_count; + int alien_peb_count; + int bad_peb_count; + int maybe_bad_peb_count; + int vols_found; + int highest_vol_id; + int is_empty; + int min_ec; + int max_ec; + unsigned long long max_sqnum; + int mean_ec; + uint64_t ec_sum; + int ec_count; + struct kmem_cache *aeb_slab_cache; +}; + +/** + * struct ubi_work - UBI work description data structure. + * @list: a link in the list of pending works + * @func: worker function + * @e: physical eraseblock to erase + * @vol_id: the volume ID on which this erasure is being performed + * @lnum: the logical eraseblock number + * @torture: if the physical eraseblock has to be tortured + * @anchor: produce a anchor PEB to by used by fastmap + * + * The @func pointer points to the worker function. If the @shutdown argument is + * not zero, the worker has to free the resources and exit immediately as the + * WL sub-system is shutting down. + * The worker has to return zero in case of success and a negative error code in + * case of failure. + */ +struct ubi_work { + struct list_head list; + int (*func)(struct ubi_device *ubi, struct ubi_work *wrk, int shutdown); + /* The below fields are only relevant to erasure works */ + struct ubi_wl_entry *e; + int vol_id; + int lnum; + int torture; + int anchor; +}; + +#include "debug.h" + +extern struct kmem_cache *ubi_wl_entry_slab; +extern const struct file_operations ubi_ctrl_cdev_operations; +extern const struct file_operations ubi_cdev_operations; +extern const struct file_operations ubi_vol_cdev_operations; +extern struct class *ubi_class; +extern struct mutex ubi_devices_mutex; +extern struct blocking_notifier_head ubi_notifiers; + +/* attach.c */ +int ubi_add_to_av(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum, + int ec, const struct ubi_vid_hdr *vid_hdr, int bitflips); +struct ubi_ainf_volume *ubi_find_av(const struct ubi_attach_info *ai, + int vol_id); +void ubi_remove_av(struct ubi_attach_info *ai, struct ubi_ainf_volume *av); +struct ubi_ainf_peb *ubi_early_get_peb(struct ubi_device *ubi, + struct ubi_attach_info *ai); +int ubi_attach(struct ubi_device *ubi, int force_scan); +void ubi_destroy_ai(struct ubi_attach_info *ai); + +/* vtbl.c */ +int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, + struct ubi_vtbl_record *vtbl_rec); +int ubi_vtbl_rename_volumes(struct ubi_device *ubi, + struct list_head *rename_list); +int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai); + +/* vmt.c */ +int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req); +int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl); +int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs); +int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list); +int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol); +void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol); + +/* upd.c */ +int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, + long long bytes); +int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, + const void __user *buf, int count); +int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, + const struct ubi_leb_change_req *req); +int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, + const void __user *buf, int count); + +/* misc.c */ +int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, + int length); +int ubi_check_volume(struct ubi_device *ubi, int vol_id); +void ubi_update_reserved(struct ubi_device *ubi); +void ubi_calculate_reserved(struct ubi_device *ubi); +int ubi_check_pattern(const void *buf, uint8_t patt, int size); + +/* eba.c */ +int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum); +int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + void *buf, int offset, int len, int check); +int ubi_eba_read_leb_sg(struct ubi_device *ubi, struct ubi_volume *vol, + struct ubi_sgl *sgl, int lnum, int offset, int len, + int check); +int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + const void *buf, int offset, int len); +int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum, const void *buf, int len, int used_ebs); +int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum, const void *buf, int len); +int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + struct ubi_vid_hdr *vid_hdr); +int ubi_eba_init(struct ubi_device *ubi, struct ubi_attach_info *ai); +unsigned long long ubi_next_sqnum(struct ubi_device *ubi); +int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, + struct ubi_attach_info *ai_scan); + +/* wl.c */ +int ubi_wl_get_peb(struct ubi_device *ubi); +int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum, + int pnum, int torture); +int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum); +int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum); +int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai); +void ubi_wl_close(struct ubi_device *ubi); +int ubi_thread(void *u); +struct ubi_wl_entry *ubi_wl_get_fm_peb(struct ubi_device *ubi, int anchor); +int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *used_e, + int lnum, int torture); +int ubi_is_erase_work(struct ubi_work *wrk); +void ubi_refill_pools(struct ubi_device *ubi); +int ubi_ensure_anchor_pebs(struct ubi_device *ubi); + +/* io.c */ +int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, + int len); +int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, + int len); +int ubi_io_sync_erase(struct ubi_device *ubi, int pnum, int torture); +int ubi_io_is_bad(const struct ubi_device *ubi, int pnum); +int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum); +int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr, int verbose); +int ubi_io_write_ec_hdr(struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr); +int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr, int verbose); +int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr); + +/* build.c */ +int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, + int vid_hdr_offset, int max_beb_per1024); +int ubi_detach_mtd_dev(int ubi_num, int anyway); +struct ubi_device *ubi_get_device(int ubi_num); +void ubi_put_device(struct ubi_device *ubi); +struct ubi_device *ubi_get_by_major(int major); +int ubi_major2num(int major); +int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, + int ntype); +int ubi_notify_all(struct ubi_device *ubi, int ntype, + struct notifier_block *nb); +int ubi_enumerate_volumes(struct notifier_block *nb); +void ubi_free_internal_volumes(struct ubi_device *ubi); + +/* kapi.c */ +void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di); +void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol, + struct ubi_volume_info *vi); +/* scan.c */ +int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, + int pnum, const struct ubi_vid_hdr *vid_hdr); + +/* fastmap.c */ +#ifdef CONFIG_MTD_UBI_FASTMAP +size_t ubi_calc_fm_size(struct ubi_device *ubi); +int ubi_update_fastmap(struct ubi_device *ubi); +int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, + int fm_anchor); +#else +static inline int ubi_update_fastmap(struct ubi_device *ubi) { return 0; } +#endif + +/* block.c */ +#ifdef CONFIG_MTD_UBI_BLOCK +int ubiblock_init(void); +void ubiblock_exit(void); +int ubiblock_create(struct ubi_volume_info *vi); +int ubiblock_remove(struct ubi_volume_info *vi); +#else +static inline int ubiblock_init(void) { return 0; } +static inline void ubiblock_exit(void) {} +static inline int ubiblock_create(struct ubi_volume_info *vi) +{ + return -ENOSYS; +} +static inline int ubiblock_remove(struct ubi_volume_info *vi) +{ + return -ENOSYS; +} +#endif + +/* + * ubi_for_each_free_peb - walk the UBI free RB tree. + * @ubi: UBI device description object + * @e: a pointer to a ubi_wl_entry to use as cursor + * @pos: a pointer to RB-tree entry type to use as a loop counter + */ +#define ubi_for_each_free_peb(ubi, e, tmp_rb) \ + ubi_rb_for_each_entry((tmp_rb), (e), &(ubi)->free, u.rb) + +/* + * ubi_for_each_used_peb - walk the UBI used RB tree. + * @ubi: UBI device description object + * @e: a pointer to a ubi_wl_entry to use as cursor + * @pos: a pointer to RB-tree entry type to use as a loop counter + */ +#define ubi_for_each_used_peb(ubi, e, tmp_rb) \ + ubi_rb_for_each_entry((tmp_rb), (e), &(ubi)->used, u.rb) + +/* + * ubi_for_each_scub_peb - walk the UBI scub RB tree. + * @ubi: UBI device description object + * @e: a pointer to a ubi_wl_entry to use as cursor + * @pos: a pointer to RB-tree entry type to use as a loop counter + */ +#define ubi_for_each_scrub_peb(ubi, e, tmp_rb) \ + ubi_rb_for_each_entry((tmp_rb), (e), &(ubi)->scrub, u.rb) + +/* + * ubi_for_each_protected_peb - walk the UBI protection queue. + * @ubi: UBI device description object + * @i: a integer used as counter + * @e: a pointer to a ubi_wl_entry to use as cursor + */ +#define ubi_for_each_protected_peb(ubi, i, e) \ + for ((i) = 0; (i) < UBI_PROT_QUEUE_LEN; (i)++) \ + list_for_each_entry((e), &(ubi->pq[(i)]), u.list) + +/* + * ubi_rb_for_each_entry - walk an RB-tree. + * @rb: a pointer to type 'struct rb_node' to use as a loop counter + * @pos: a pointer to RB-tree entry type to use as a loop counter + * @root: RB-tree's root + * @member: the name of the 'struct rb_node' within the RB-tree entry + */ +#define ubi_rb_for_each_entry(rb, pos, root, member) \ + for (rb = rb_first(root), \ + pos = (rb ? container_of(rb, typeof(*pos), member) : NULL); \ + rb; \ + rb = rb_next(rb), \ + pos = (rb ? container_of(rb, typeof(*pos), member) : NULL)) + +/* + * ubi_move_aeb_to_list - move a PEB from the volume tree to a list. + * + * @av: volume attaching information + * @aeb: attaching eraseblock information + * @list: the list to move to + */ +static inline void ubi_move_aeb_to_list(struct ubi_ainf_volume *av, + struct ubi_ainf_peb *aeb, + struct list_head *list) +{ + rb_erase(&aeb->u.rb, &av->root); + list_add_tail(&aeb->u.list, list); +} + +/** + * ubi_zalloc_vid_hdr - allocate a volume identifier header object. + * @ubi: UBI device description object + * @gfp_flags: GFP flags to allocate with + * + * This function returns a pointer to the newly allocated and zero-filled + * volume identifier header object in case of success and %NULL in case of + * failure. + */ +static inline struct ubi_vid_hdr * +ubi_zalloc_vid_hdr(const struct ubi_device *ubi, gfp_t gfp_flags) +{ + void *vid_hdr; + + vid_hdr = kzalloc(ubi->vid_hdr_alsize, gfp_flags); + if (!vid_hdr) + return NULL; + + /* + * VID headers may be stored at un-aligned flash offsets, so we shift + * the pointer. + */ + return vid_hdr + ubi->vid_hdr_shift; +} + +/** + * ubi_free_vid_hdr - free a volume identifier header object. + * @ubi: UBI device description object + * @vid_hdr: the object to free + */ +static inline void ubi_free_vid_hdr(const struct ubi_device *ubi, + struct ubi_vid_hdr *vid_hdr) +{ + void *p = vid_hdr; + + if (!p) + return; + + kfree(p - ubi->vid_hdr_shift); +} + +/* + * This function is equivalent to 'ubi_io_read()', but @offset is relative to + * the beginning of the logical eraseblock, not to the beginning of the + * physical eraseblock. + */ +static inline int ubi_io_read_data(const struct ubi_device *ubi, void *buf, + int pnum, int offset, int len) +{ + ubi_assert(offset >= 0); + return ubi_io_read(ubi, buf, pnum, offset + ubi->leb_start, len); +} + +/* + * This function is equivalent to 'ubi_io_write()', but @offset is relative to + * the beginning of the logical eraseblock, not to the beginning of the + * physical eraseblock. + */ +static inline int ubi_io_write_data(struct ubi_device *ubi, const void *buf, + int pnum, int offset, int len) +{ + ubi_assert(offset >= 0); + return ubi_io_write(ubi, buf, pnum, offset + ubi->leb_start, len); +} + +/** + * ubi_ro_mode - switch to read-only mode. + * @ubi: UBI device description object + */ +static inline void ubi_ro_mode(struct ubi_device *ubi) +{ + if (!ubi->ro_mode) { + ubi->ro_mode = 1; + ubi_warn(ubi, "switch to read-only mode"); + dump_stack(); + } +} + +/** + * vol_id2idx - get table index by volume ID. + * @ubi: UBI device description object + * @vol_id: volume ID + */ +static inline int vol_id2idx(const struct ubi_device *ubi, int vol_id) +{ + if (vol_id >= UBI_INTERNAL_VOL_START) + return vol_id - UBI_INTERNAL_VOL_START + ubi->vtbl_slots; + else + return vol_id; +} + +/** + * idx2vol_id - get volume ID by table index. + * @ubi: UBI device description object + * @idx: table index + */ +static inline int idx2vol_id(const struct ubi_device *ubi, int idx) +{ + if (idx >= ubi->vtbl_slots) + return idx - ubi->vtbl_slots + UBI_INTERNAL_VOL_START; + else + return idx; +} + +#endif /* !__UBI_UBI_H__ */ diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c new file mode 100644 index 000000000..2a1b6e037 --- /dev/null +++ b/drivers/mtd/ubi/upd.c @@ -0,0 +1,433 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + * + * Jan 2007: Alexander Schmidt, hacked per-volume update. + */ + +/* + * This file contains implementation of the volume update and atomic LEB change + * functionality. + * + * The update operation is based on the per-volume update marker which is + * stored in the volume table. The update marker is set before the update + * starts, and removed after the update has been finished. So if the update was + * interrupted by an unclean re-boot or due to some other reasons, the update + * marker stays on the flash media and UBI finds it when it attaches the MTD + * device next time. If the update marker is set for a volume, the volume is + * treated as damaged and most I/O operations are prohibited. Only a new update + * operation is allowed. + * + * Note, in general it is possible to implement the update operation as a + * transaction with a roll-back capability. + */ + +#include <linux/err.h> +#include <linux/uaccess.h> +#include <linux/math64.h> +#include "ubi.h" + +/** + * set_update_marker - set update marker. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function sets the update marker flag for volume @vol. Returns zero + * in case of success and a negative error code in case of failure. + */ +static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol) +{ + int err; + struct ubi_vtbl_record vtbl_rec; + + dbg_gen("set update marker for volume %d", vol->vol_id); + + if (vol->upd_marker) { + ubi_assert(ubi->vtbl[vol->vol_id].upd_marker); + dbg_gen("already set"); + return 0; + } + + vtbl_rec = ubi->vtbl[vol->vol_id]; + vtbl_rec.upd_marker = 1; + + mutex_lock(&ubi->device_mutex); + err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); + vol->upd_marker = 1; + mutex_unlock(&ubi->device_mutex); + return err; +} + +/** + * clear_update_marker - clear update marker. + * @ubi: UBI device description object + * @vol: volume description object + * @bytes: new data size in bytes + * + * This function clears the update marker for volume @vol, sets new volume + * data size and clears the "corrupted" flag (static volumes only). Returns + * zero in case of success and a negative error code in case of failure. + */ +static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol, + long long bytes) +{ + int err; + struct ubi_vtbl_record vtbl_rec; + + dbg_gen("clear update marker for volume %d", vol->vol_id); + + vtbl_rec = ubi->vtbl[vol->vol_id]; + ubi_assert(vol->upd_marker && vtbl_rec.upd_marker); + vtbl_rec.upd_marker = 0; + + if (vol->vol_type == UBI_STATIC_VOLUME) { + vol->corrupted = 0; + vol->used_bytes = bytes; + vol->used_ebs = div_u64_rem(bytes, vol->usable_leb_size, + &vol->last_eb_bytes); + if (vol->last_eb_bytes) + vol->used_ebs += 1; + else + vol->last_eb_bytes = vol->usable_leb_size; + } + + mutex_lock(&ubi->device_mutex); + err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); + vol->upd_marker = 0; + mutex_unlock(&ubi->device_mutex); + return err; +} + +/** + * ubi_start_update - start volume update. + * @ubi: UBI device description object + * @vol: volume description object + * @bytes: update bytes + * + * This function starts volume update operation. If @bytes is zero, the volume + * is just wiped out. Returns zero in case of success and a negative error code + * in case of failure. + */ +int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, + long long bytes) +{ + int i, err; + + dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes); + ubi_assert(!vol->updating && !vol->changing_leb); + vol->updating = 1; + + vol->upd_buf = vmalloc(ubi->leb_size); + if (!vol->upd_buf) + return -ENOMEM; + + err = set_update_marker(ubi, vol); + if (err) + return err; + + /* Before updating - wipe out the volume */ + for (i = 0; i < vol->reserved_pebs; i++) { + err = ubi_eba_unmap_leb(ubi, vol, i); + if (err) + return err; + } + + if (bytes == 0) { + err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL); + if (err) + return err; + + err = clear_update_marker(ubi, vol, 0); + if (err) + return err; + + vfree(vol->upd_buf); + vol->updating = 0; + return 0; + } + + vol->upd_ebs = div_u64(bytes + vol->usable_leb_size - 1, + vol->usable_leb_size); + vol->upd_bytes = bytes; + vol->upd_received = 0; + return 0; +} + +/** + * ubi_start_leb_change - start atomic LEB change. + * @ubi: UBI device description object + * @vol: volume description object + * @req: operation request + * + * This function starts atomic LEB change operation. Returns zero in case of + * success and a negative error code in case of failure. + */ +int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, + const struct ubi_leb_change_req *req) +{ + ubi_assert(!vol->updating && !vol->changing_leb); + + dbg_gen("start changing LEB %d:%d, %u bytes", + vol->vol_id, req->lnum, req->bytes); + if (req->bytes == 0) + return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0); + + vol->upd_bytes = req->bytes; + vol->upd_received = 0; + vol->changing_leb = 1; + vol->ch_lnum = req->lnum; + + vol->upd_buf = vmalloc(req->bytes); + if (!vol->upd_buf) + return -ENOMEM; + + return 0; +} + +/** + * write_leb - write update data. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: data to write + * @len: data size + * @used_ebs: how many logical eraseblocks will this volume contain (static + * volumes only) + * + * This function writes update data to corresponding logical eraseblock. In + * case of dynamic volume, this function checks if the data contains 0xFF bytes + * at the end. If yes, the 0xFF bytes are cut and not written. So if the whole + * buffer contains only 0xFF bytes, the LEB is left unmapped. + * + * The reason why we skip the trailing 0xFF bytes in case of dynamic volume is + * that we want to make sure that more data may be appended to the logical + * eraseblock in future. Indeed, writing 0xFF bytes may have side effects and + * this PEB won't be writable anymore. So if one writes the file-system image + * to the UBI volume where 0xFFs mean free space - UBI makes sure this free + * space is writable after the update. + * + * We do not do this for static volumes because they are read-only. But this + * also cannot be done because we have to store per-LEB CRC and the correct + * data length. + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + void *buf, int len, int used_ebs) +{ + int err; + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + int l = ALIGN(len, ubi->min_io_size); + + memset(buf + len, 0xFF, l - len); + len = ubi_calc_data_len(ubi, buf, l); + if (len == 0) { + dbg_gen("all %d bytes contain 0xFF - skip", len); + return 0; + } + + err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len); + } else { + /* + * When writing static volume, and this is the last logical + * eraseblock, the length (@len) does not have to be aligned to + * the minimal flash I/O unit. The 'ubi_eba_write_leb_st()' + * function accepts exact (unaligned) length and stores it in + * the VID header. And it takes care of proper alignment by + * padding the buffer. Here we just make sure the padding will + * contain zeros, not random trash. + */ + memset(buf + len, 0, vol->usable_leb_size - len); + err = ubi_eba_write_leb_st(ubi, vol, lnum, buf, len, used_ebs); + } + + return err; +} + +/** + * ubi_more_update_data - write more update data. + * @ubi: UBI device description object + * @vol: volume description object + * @buf: write data (user-space memory buffer) + * @count: how much bytes to write + * + * This function writes more data to the volume which is being updated. It may + * be called arbitrary number of times until all the update data arriveis. This + * function returns %0 in case of success, number of bytes written during the + * last call if the whole volume update has been successfully finished, and a + * negative error code in case of failure. + */ +int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, + const void __user *buf, int count) +{ + int lnum, offs, err = 0, len, to_write = count; + + dbg_gen("write %d of %lld bytes, %lld already passed", + count, vol->upd_bytes, vol->upd_received); + + if (ubi->ro_mode) + return -EROFS; + + lnum = div_u64_rem(vol->upd_received, vol->usable_leb_size, &offs); + if (vol->upd_received + count > vol->upd_bytes) + to_write = count = vol->upd_bytes - vol->upd_received; + + /* + * When updating volumes, we accumulate whole logical eraseblock of + * data and write it at once. + */ + if (offs != 0) { + /* + * This is a write to the middle of the logical eraseblock. We + * copy the data to our update buffer and wait for more data or + * flush it if the whole eraseblock is written or the update + * is finished. + */ + + len = vol->usable_leb_size - offs; + if (len > count) + len = count; + + err = copy_from_user(vol->upd_buf + offs, buf, len); + if (err) + return -EFAULT; + + if (offs + len == vol->usable_leb_size || + vol->upd_received + len == vol->upd_bytes) { + int flush_len = offs + len; + + /* + * OK, we gathered either the whole eraseblock or this + * is the last chunk, it's time to flush the buffer. + */ + ubi_assert(flush_len <= vol->usable_leb_size); + err = write_leb(ubi, vol, lnum, vol->upd_buf, flush_len, + vol->upd_ebs); + if (err) + return err; + } + + vol->upd_received += len; + count -= len; + buf += len; + lnum += 1; + } + + /* + * If we've got more to write, let's continue. At this point we know we + * are starting from the beginning of an eraseblock. + */ + while (count) { + if (count > vol->usable_leb_size) + len = vol->usable_leb_size; + else + len = count; + + err = copy_from_user(vol->upd_buf, buf, len); + if (err) + return -EFAULT; + + if (len == vol->usable_leb_size || + vol->upd_received + len == vol->upd_bytes) { + err = write_leb(ubi, vol, lnum, vol->upd_buf, + len, vol->upd_ebs); + if (err) + break; + } + + vol->upd_received += len; + count -= len; + lnum += 1; + buf += len; + } + + ubi_assert(vol->upd_received <= vol->upd_bytes); + if (vol->upd_received == vol->upd_bytes) { + err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL); + if (err) + return err; + /* The update is finished, clear the update marker */ + err = clear_update_marker(ubi, vol, vol->upd_bytes); + if (err) + return err; + vol->updating = 0; + err = to_write; + vfree(vol->upd_buf); + } + + return err; +} + +/** + * ubi_more_leb_change_data - accept more data for atomic LEB change. + * @ubi: UBI device description object + * @vol: volume description object + * @buf: write data (user-space memory buffer) + * @count: how much bytes to write + * + * This function accepts more data to the volume which is being under the + * "atomic LEB change" operation. It may be called arbitrary number of times + * until all data arrives. This function returns %0 in case of success, number + * of bytes written during the last call if the whole "atomic LEB change" + * operation has been successfully finished, and a negative error code in case + * of failure. + */ +int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, + const void __user *buf, int count) +{ + int err; + + dbg_gen("write %d of %lld bytes, %lld already passed", + count, vol->upd_bytes, vol->upd_received); + + if (ubi->ro_mode) + return -EROFS; + + if (vol->upd_received + count > vol->upd_bytes) + count = vol->upd_bytes - vol->upd_received; + + err = copy_from_user(vol->upd_buf + vol->upd_received, buf, count); + if (err) + return -EFAULT; + + vol->upd_received += count; + + if (vol->upd_received == vol->upd_bytes) { + int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size); + + memset(vol->upd_buf + vol->upd_bytes, 0xFF, + len - vol->upd_bytes); + len = ubi_calc_data_len(ubi, vol->upd_buf, len); + err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum, + vol->upd_buf, len); + if (err) + return err; + } + + ubi_assert(vol->upd_received <= vol->upd_bytes); + if (vol->upd_received == vol->upd_bytes) { + vol->changing_leb = 0; + err = count; + vfree(vol->upd_buf); + } + + return err; +} diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c new file mode 100644 index 000000000..ff4d97848 --- /dev/null +++ b/drivers/mtd/ubi/vmt.c @@ -0,0 +1,863 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file contains implementation of volume creation, deletion, updating and + * resizing. + */ + +#include <linux/err.h> +#include <linux/math64.h> +#include <linux/slab.h> +#include <linux/export.h> +#include "ubi.h" + +static int self_check_volumes(struct ubi_device *ubi); + +static ssize_t vol_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf); + +/* Device attributes corresponding to files in '/<sysfs>/class/ubi/ubiX_Y' */ +static struct device_attribute attr_vol_reserved_ebs = + __ATTR(reserved_ebs, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_type = + __ATTR(type, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_name = + __ATTR(name, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_corrupted = + __ATTR(corrupted, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_alignment = + __ATTR(alignment, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_usable_eb_size = + __ATTR(usable_eb_size, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_data_bytes = + __ATTR(data_bytes, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_upd_marker = + __ATTR(upd_marker, S_IRUGO, vol_attribute_show, NULL); + +/* + * "Show" method for files in '/<sysfs>/class/ubi/ubiX_Y/'. + * + * Consider a situation: + * A. process 1 opens a sysfs file related to volume Y, say + * /<sysfs>/class/ubi/ubiX_Y/reserved_ebs; + * B. process 2 removes volume Y; + * C. process 1 starts reading the /<sysfs>/class/ubi/ubiX_Y/reserved_ebs file; + * + * In this situation, this function will return %-ENODEV because it will find + * out that the volume was removed from the @ubi->volumes array. + */ +static ssize_t vol_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); + struct ubi_device *ubi; + + ubi = ubi_get_device(vol->ubi->ubi_num); + if (!ubi) + return -ENODEV; + + spin_lock(&ubi->volumes_lock); + if (!ubi->volumes[vol->vol_id]) { + spin_unlock(&ubi->volumes_lock); + ubi_put_device(ubi); + return -ENODEV; + } + /* Take a reference to prevent volume removal */ + vol->ref_count += 1; + spin_unlock(&ubi->volumes_lock); + + if (attr == &attr_vol_reserved_ebs) + ret = sprintf(buf, "%d\n", vol->reserved_pebs); + else if (attr == &attr_vol_type) { + const char *tp; + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + tp = "dynamic"; + else + tp = "static"; + ret = sprintf(buf, "%s\n", tp); + } else if (attr == &attr_vol_name) + ret = sprintf(buf, "%s\n", vol->name); + else if (attr == &attr_vol_corrupted) + ret = sprintf(buf, "%d\n", vol->corrupted); + else if (attr == &attr_vol_alignment) + ret = sprintf(buf, "%d\n", vol->alignment); + else if (attr == &attr_vol_usable_eb_size) + ret = sprintf(buf, "%d\n", vol->usable_leb_size); + else if (attr == &attr_vol_data_bytes) + ret = sprintf(buf, "%lld\n", vol->used_bytes); + else if (attr == &attr_vol_upd_marker) + ret = sprintf(buf, "%d\n", vol->upd_marker); + else + /* This must be a bug */ + ret = -EINVAL; + + /* We've done the operation, drop volume and UBI device references */ + spin_lock(&ubi->volumes_lock); + vol->ref_count -= 1; + ubi_assert(vol->ref_count >= 0); + spin_unlock(&ubi->volumes_lock); + ubi_put_device(ubi); + return ret; +} + +/* Release method for volume devices */ +static void vol_release(struct device *dev) +{ + struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); + + kfree(vol->eba_tbl); + kfree(vol); +} + +/** + * volume_sysfs_init - initialize sysfs for new volume. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + * + * Note, this function does not free allocated resources in case of failure - + * the caller does it. This is because this would cause release() here and the + * caller would oops. + */ +static int volume_sysfs_init(struct ubi_device *ubi, struct ubi_volume *vol) +{ + int err; + + err = device_create_file(&vol->dev, &attr_vol_reserved_ebs); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_type); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_name); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_corrupted); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_alignment); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_usable_eb_size); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_data_bytes); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_upd_marker); + return err; +} + +/** + * volume_sysfs_close - close sysfs for a volume. + * @vol: volume description object + */ +static void volume_sysfs_close(struct ubi_volume *vol) +{ + device_remove_file(&vol->dev, &attr_vol_upd_marker); + device_remove_file(&vol->dev, &attr_vol_data_bytes); + device_remove_file(&vol->dev, &attr_vol_usable_eb_size); + device_remove_file(&vol->dev, &attr_vol_alignment); + device_remove_file(&vol->dev, &attr_vol_corrupted); + device_remove_file(&vol->dev, &attr_vol_name); + device_remove_file(&vol->dev, &attr_vol_type); + device_remove_file(&vol->dev, &attr_vol_reserved_ebs); + device_unregister(&vol->dev); +} + +/** + * ubi_create_volume - create volume. + * @ubi: UBI device description object + * @req: volume creation request + * + * This function creates volume described by @req. If @req->vol_id id + * %UBI_VOL_NUM_AUTO, this function automatically assign ID to the new volume + * and saves it in @req->vol_id. Returns zero in case of success and a negative + * error code in case of failure. Note, the caller has to have the + * @ubi->device_mutex locked. + */ +int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) +{ + int i, err, vol_id = req->vol_id, do_free = 1; + struct ubi_volume *vol; + struct ubi_vtbl_record vtbl_rec; + dev_t dev; + + if (ubi->ro_mode) + return -EROFS; + + vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); + if (!vol) + return -ENOMEM; + + spin_lock(&ubi->volumes_lock); + if (vol_id == UBI_VOL_NUM_AUTO) { + /* Find unused volume ID */ + dbg_gen("search for vacant volume ID"); + for (i = 0; i < ubi->vtbl_slots; i++) + if (!ubi->volumes[i]) { + vol_id = i; + break; + } + + if (vol_id == UBI_VOL_NUM_AUTO) { + ubi_err(ubi, "out of volume IDs"); + err = -ENFILE; + goto out_unlock; + } + req->vol_id = vol_id; + } + + dbg_gen("create device %d, volume %d, %llu bytes, type %d, name %s", + ubi->ubi_num, vol_id, (unsigned long long)req->bytes, + (int)req->vol_type, req->name); + + /* Ensure that this volume does not exist */ + err = -EEXIST; + if (ubi->volumes[vol_id]) { + ubi_err(ubi, "volume %d already exists", vol_id); + goto out_unlock; + } + + /* Ensure that the name is unique */ + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i] && + ubi->volumes[i]->name_len == req->name_len && + !strcmp(ubi->volumes[i]->name, req->name)) { + ubi_err(ubi, "volume \"%s\" exists (ID %d)", + req->name, i); + goto out_unlock; + } + + /* Calculate how many eraseblocks are requested */ + vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment; + vol->reserved_pebs += div_u64(req->bytes + vol->usable_leb_size - 1, + vol->usable_leb_size); + + /* Reserve physical eraseblocks */ + if (vol->reserved_pebs > ubi->avail_pebs) { + ubi_err(ubi, "not enough PEBs, only %d available", + ubi->avail_pebs); + if (ubi->corr_peb_count) + ubi_err(ubi, "%d PEBs are corrupted and not used", + ubi->corr_peb_count); + err = -ENOSPC; + goto out_unlock; + } + ubi->avail_pebs -= vol->reserved_pebs; + ubi->rsvd_pebs += vol->reserved_pebs; + spin_unlock(&ubi->volumes_lock); + + vol->vol_id = vol_id; + vol->alignment = req->alignment; + vol->data_pad = ubi->leb_size % vol->alignment; + vol->vol_type = req->vol_type; + vol->name_len = req->name_len; + memcpy(vol->name, req->name, vol->name_len); + vol->ubi = ubi; + + /* + * Finish all pending erases because there may be some LEBs belonging + * to the same volume ID. + */ + err = ubi_wl_flush(ubi, vol_id, UBI_ALL); + if (err) + goto out_acc; + + vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int), GFP_KERNEL); + if (!vol->eba_tbl) { + err = -ENOMEM; + goto out_acc; + } + + for (i = 0; i < vol->reserved_pebs; i++) + vol->eba_tbl[i] = UBI_LEB_UNMAPPED; + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + vol->used_ebs = vol->reserved_pebs; + vol->last_eb_bytes = vol->usable_leb_size; + vol->used_bytes = + (long long)vol->used_ebs * vol->usable_leb_size; + } else { + vol->used_ebs = div_u64_rem(vol->used_bytes, + vol->usable_leb_size, + &vol->last_eb_bytes); + if (vol->last_eb_bytes != 0) + vol->used_ebs += 1; + else + vol->last_eb_bytes = vol->usable_leb_size; + } + + /* Register character device for the volume */ + cdev_init(&vol->cdev, &ubi_vol_cdev_operations); + vol->cdev.owner = THIS_MODULE; + dev = MKDEV(MAJOR(ubi->cdev.dev), vol_id + 1); + err = cdev_add(&vol->cdev, dev, 1); + if (err) { + ubi_err(ubi, "cannot add character device"); + goto out_mapping; + } + + vol->dev.release = vol_release; + vol->dev.parent = &ubi->dev; + vol->dev.devt = dev; + vol->dev.class = ubi_class; + + dev_set_name(&vol->dev, "%s_%d", ubi->ubi_name, vol->vol_id); + err = device_register(&vol->dev); + if (err) { + ubi_err(ubi, "cannot register device"); + goto out_cdev; + } + + err = volume_sysfs_init(ubi, vol); + if (err) + goto out_sysfs; + + /* Fill volume table record */ + memset(&vtbl_rec, 0, sizeof(struct ubi_vtbl_record)); + vtbl_rec.reserved_pebs = cpu_to_be32(vol->reserved_pebs); + vtbl_rec.alignment = cpu_to_be32(vol->alignment); + vtbl_rec.data_pad = cpu_to_be32(vol->data_pad); + vtbl_rec.name_len = cpu_to_be16(vol->name_len); + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + vtbl_rec.vol_type = UBI_VID_DYNAMIC; + else + vtbl_rec.vol_type = UBI_VID_STATIC; + memcpy(vtbl_rec.name, vol->name, vol->name_len); + + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + goto out_sysfs; + + spin_lock(&ubi->volumes_lock); + ubi->volumes[vol_id] = vol; + ubi->vol_count += 1; + spin_unlock(&ubi->volumes_lock); + + ubi_volume_notify(ubi, vol, UBI_VOLUME_ADDED); + self_check_volumes(ubi); + return err; + +out_sysfs: + /* + * We have registered our device, we should not free the volume + * description object in this function in case of an error - it is + * freed by the release function. + * + * Get device reference to prevent the release function from being + * called just after sysfs has been closed. + */ + do_free = 0; + get_device(&vol->dev); + volume_sysfs_close(vol); +out_cdev: + cdev_del(&vol->cdev); +out_mapping: + if (do_free) + kfree(vol->eba_tbl); +out_acc: + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= vol->reserved_pebs; + ubi->avail_pebs += vol->reserved_pebs; +out_unlock: + spin_unlock(&ubi->volumes_lock); + if (do_free) + kfree(vol); + else + put_device(&vol->dev); + ubi_err(ubi, "cannot create volume %d, error %d", vol_id, err); + return err; +} + +/** + * ubi_remove_volume - remove volume. + * @desc: volume descriptor + * @no_vtbl: do not change volume table if not zero + * + * This function removes volume described by @desc. The volume has to be opened + * in "exclusive" mode. Returns zero in case of success and a negative error + * code in case of failure. The caller has to have the @ubi->device_mutex + * locked. + */ +int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs; + + dbg_gen("remove device %d, volume %d", ubi->ubi_num, vol_id); + ubi_assert(desc->mode == UBI_EXCLUSIVE); + ubi_assert(vol == ubi->volumes[vol_id]); + + if (ubi->ro_mode) + return -EROFS; + + spin_lock(&ubi->volumes_lock); + if (vol->ref_count > 1) { + /* + * The volume is busy, probably someone is reading one of its + * sysfs files. + */ + err = -EBUSY; + goto out_unlock; + } + ubi->volumes[vol_id] = NULL; + spin_unlock(&ubi->volumes_lock); + + if (!no_vtbl) { + err = ubi_change_vtbl_record(ubi, vol_id, NULL); + if (err) + goto out_err; + } + + for (i = 0; i < vol->reserved_pebs; i++) { + err = ubi_eba_unmap_leb(ubi, vol, i); + if (err) + goto out_err; + } + + cdev_del(&vol->cdev); + volume_sysfs_close(vol); + + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= reserved_pebs; + ubi->avail_pebs += reserved_pebs; + ubi_update_reserved(ubi); + ubi->vol_count -= 1; + spin_unlock(&ubi->volumes_lock); + + ubi_volume_notify(ubi, vol, UBI_VOLUME_REMOVED); + if (!no_vtbl) + self_check_volumes(ubi); + + return err; + +out_err: + ubi_err(ubi, "cannot remove volume %d, error %d", vol_id, err); + spin_lock(&ubi->volumes_lock); + ubi->volumes[vol_id] = vol; +out_unlock: + spin_unlock(&ubi->volumes_lock); + return err; +} + +/** + * ubi_resize_volume - re-size volume. + * @desc: volume descriptor + * @reserved_pebs: new size in physical eraseblocks + * + * This function re-sizes the volume and returns zero in case of success, and a + * negative error code in case of failure. The caller has to have the + * @ubi->device_mutex locked. + */ +int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) +{ + int i, err, pebs, *new_mapping; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + struct ubi_vtbl_record vtbl_rec; + int vol_id = vol->vol_id; + + if (ubi->ro_mode) + return -EROFS; + + dbg_gen("re-size device %d, volume %d to from %d to %d PEBs", + ubi->ubi_num, vol_id, vol->reserved_pebs, reserved_pebs); + + if (vol->vol_type == UBI_STATIC_VOLUME && + reserved_pebs < vol->used_ebs) { + ubi_err(ubi, "too small size %d, %d LEBs contain data", + reserved_pebs, vol->used_ebs); + return -EINVAL; + } + + /* If the size is the same, we have nothing to do */ + if (reserved_pebs == vol->reserved_pebs) + return 0; + + new_mapping = kmalloc(reserved_pebs * sizeof(int), GFP_KERNEL); + if (!new_mapping) + return -ENOMEM; + + for (i = 0; i < reserved_pebs; i++) + new_mapping[i] = UBI_LEB_UNMAPPED; + + spin_lock(&ubi->volumes_lock); + if (vol->ref_count > 1) { + spin_unlock(&ubi->volumes_lock); + err = -EBUSY; + goto out_free; + } + spin_unlock(&ubi->volumes_lock); + + /* Reserve physical eraseblocks */ + pebs = reserved_pebs - vol->reserved_pebs; + if (pebs > 0) { + spin_lock(&ubi->volumes_lock); + if (pebs > ubi->avail_pebs) { + ubi_err(ubi, "not enough PEBs: requested %d, available %d", + pebs, ubi->avail_pebs); + if (ubi->corr_peb_count) + ubi_err(ubi, "%d PEBs are corrupted and not used", + ubi->corr_peb_count); + spin_unlock(&ubi->volumes_lock); + err = -ENOSPC; + goto out_free; + } + ubi->avail_pebs -= pebs; + ubi->rsvd_pebs += pebs; + for (i = 0; i < vol->reserved_pebs; i++) + new_mapping[i] = vol->eba_tbl[i]; + kfree(vol->eba_tbl); + vol->eba_tbl = new_mapping; + spin_unlock(&ubi->volumes_lock); + } + + /* Change volume table record */ + vtbl_rec = ubi->vtbl[vol_id]; + vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + goto out_acc; + + if (pebs < 0) { + for (i = 0; i < -pebs; i++) { + err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i); + if (err) + goto out_acc; + } + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs += pebs; + ubi->avail_pebs -= pebs; + ubi_update_reserved(ubi); + for (i = 0; i < reserved_pebs; i++) + new_mapping[i] = vol->eba_tbl[i]; + kfree(vol->eba_tbl); + vol->eba_tbl = new_mapping; + spin_unlock(&ubi->volumes_lock); + } + + vol->reserved_pebs = reserved_pebs; + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + vol->used_ebs = reserved_pebs; + vol->last_eb_bytes = vol->usable_leb_size; + vol->used_bytes = + (long long)vol->used_ebs * vol->usable_leb_size; + } + + ubi_volume_notify(ubi, vol, UBI_VOLUME_RESIZED); + self_check_volumes(ubi); + return err; + +out_acc: + if (pebs > 0) { + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= pebs; + ubi->avail_pebs += pebs; + spin_unlock(&ubi->volumes_lock); + } +out_free: + kfree(new_mapping); + return err; +} + +/** + * ubi_rename_volumes - re-name UBI volumes. + * @ubi: UBI device description object + * @rename_list: list of &struct ubi_rename_entry objects + * + * This function re-names or removes volumes specified in the re-name list. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list) +{ + int err; + struct ubi_rename_entry *re; + + err = ubi_vtbl_rename_volumes(ubi, rename_list); + if (err) + return err; + + list_for_each_entry(re, rename_list, list) { + if (re->remove) { + err = ubi_remove_volume(re->desc, 1); + if (err) + break; + } else { + struct ubi_volume *vol = re->desc->vol; + + spin_lock(&ubi->volumes_lock); + vol->name_len = re->new_name_len; + memcpy(vol->name, re->new_name, re->new_name_len + 1); + spin_unlock(&ubi->volumes_lock); + ubi_volume_notify(ubi, vol, UBI_VOLUME_RENAMED); + } + } + + if (!err) + self_check_volumes(ubi); + return err; +} + +/** + * ubi_add_volume - add volume. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function adds an existing volume and initializes all its data + * structures. Returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol) +{ + int err, vol_id = vol->vol_id; + dev_t dev; + + dbg_gen("add volume %d", vol_id); + + /* Register character device for the volume */ + cdev_init(&vol->cdev, &ubi_vol_cdev_operations); + vol->cdev.owner = THIS_MODULE; + dev = MKDEV(MAJOR(ubi->cdev.dev), vol->vol_id + 1); + err = cdev_add(&vol->cdev, dev, 1); + if (err) { + ubi_err(ubi, "cannot add character device for volume %d, error %d", + vol_id, err); + return err; + } + + vol->dev.release = vol_release; + vol->dev.parent = &ubi->dev; + vol->dev.devt = dev; + vol->dev.class = ubi_class; + dev_set_name(&vol->dev, "%s_%d", ubi->ubi_name, vol->vol_id); + err = device_register(&vol->dev); + if (err) + goto out_cdev; + + err = volume_sysfs_init(ubi, vol); + if (err) { + cdev_del(&vol->cdev); + volume_sysfs_close(vol); + return err; + } + + self_check_volumes(ubi); + return err; + +out_cdev: + cdev_del(&vol->cdev); + return err; +} + +/** + * ubi_free_volume - free volume. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function frees all resources for volume @vol but does not remove it. + * Used only when the UBI device is detached. + */ +void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol) +{ + dbg_gen("free volume %d", vol->vol_id); + + ubi->volumes[vol->vol_id] = NULL; + cdev_del(&vol->cdev); + volume_sysfs_close(vol); +} + +/** + * self_check_volume - check volume information. + * @ubi: UBI device description object + * @vol_id: volume ID + * + * Returns zero if volume is all right and a a negative error code if not. + */ +static int self_check_volume(struct ubi_device *ubi, int vol_id) +{ + int idx = vol_id2idx(ubi, vol_id); + int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker; + const struct ubi_volume *vol; + long long n; + const char *name; + + spin_lock(&ubi->volumes_lock); + reserved_pebs = be32_to_cpu(ubi->vtbl[vol_id].reserved_pebs); + vol = ubi->volumes[idx]; + + if (!vol) { + if (reserved_pebs) { + ubi_err(ubi, "no volume info, but volume exists"); + goto fail; + } + spin_unlock(&ubi->volumes_lock); + return 0; + } + + if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 || + vol->name_len < 0) { + ubi_err(ubi, "negative values"); + goto fail; + } + if (vol->alignment > ubi->leb_size || vol->alignment == 0) { + ubi_err(ubi, "bad alignment"); + goto fail; + } + + n = vol->alignment & (ubi->min_io_size - 1); + if (vol->alignment != 1 && n) { + ubi_err(ubi, "alignment is not multiple of min I/O unit"); + goto fail; + } + + n = ubi->leb_size % vol->alignment; + if (vol->data_pad != n) { + ubi_err(ubi, "bad data_pad, has to be %lld", n); + goto fail; + } + + if (vol->vol_type != UBI_DYNAMIC_VOLUME && + vol->vol_type != UBI_STATIC_VOLUME) { + ubi_err(ubi, "bad vol_type"); + goto fail; + } + + if (vol->upd_marker && vol->corrupted) { + ubi_err(ubi, "update marker and corrupted simultaneously"); + goto fail; + } + + if (vol->reserved_pebs > ubi->good_peb_count) { + ubi_err(ubi, "too large reserved_pebs"); + goto fail; + } + + n = ubi->leb_size - vol->data_pad; + if (vol->usable_leb_size != ubi->leb_size - vol->data_pad) { + ubi_err(ubi, "bad usable_leb_size, has to be %lld", n); + goto fail; + } + + if (vol->name_len > UBI_VOL_NAME_MAX) { + ubi_err(ubi, "too long volume name, max is %d", + UBI_VOL_NAME_MAX); + goto fail; + } + + n = strnlen(vol->name, vol->name_len + 1); + if (n != vol->name_len) { + ubi_err(ubi, "bad name_len %lld", n); + goto fail; + } + + n = (long long)vol->used_ebs * vol->usable_leb_size; + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + if (vol->corrupted) { + ubi_err(ubi, "corrupted dynamic volume"); + goto fail; + } + if (vol->used_ebs != vol->reserved_pebs) { + ubi_err(ubi, "bad used_ebs"); + goto fail; + } + if (vol->last_eb_bytes != vol->usable_leb_size) { + ubi_err(ubi, "bad last_eb_bytes"); + goto fail; + } + if (vol->used_bytes != n) { + ubi_err(ubi, "bad used_bytes"); + goto fail; + } + } else { + if (vol->used_ebs < 0 || vol->used_ebs > vol->reserved_pebs) { + ubi_err(ubi, "bad used_ebs"); + goto fail; + } + if (vol->last_eb_bytes < 0 || + vol->last_eb_bytes > vol->usable_leb_size) { + ubi_err(ubi, "bad last_eb_bytes"); + goto fail; + } + if (vol->used_bytes < 0 || vol->used_bytes > n || + vol->used_bytes < n - vol->usable_leb_size) { + ubi_err(ubi, "bad used_bytes"); + goto fail; + } + } + + alignment = be32_to_cpu(ubi->vtbl[vol_id].alignment); + data_pad = be32_to_cpu(ubi->vtbl[vol_id].data_pad); + name_len = be16_to_cpu(ubi->vtbl[vol_id].name_len); + upd_marker = ubi->vtbl[vol_id].upd_marker; + name = &ubi->vtbl[vol_id].name[0]; + if (ubi->vtbl[vol_id].vol_type == UBI_VID_DYNAMIC) + vol_type = UBI_DYNAMIC_VOLUME; + else + vol_type = UBI_STATIC_VOLUME; + + if (alignment != vol->alignment || data_pad != vol->data_pad || + upd_marker != vol->upd_marker || vol_type != vol->vol_type || + name_len != vol->name_len || strncmp(name, vol->name, name_len)) { + ubi_err(ubi, "volume info is different"); + goto fail; + } + + spin_unlock(&ubi->volumes_lock); + return 0; + +fail: + ubi_err(ubi, "self-check failed for volume %d", vol_id); + if (vol) + ubi_dump_vol_info(vol); + ubi_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id); + dump_stack(); + spin_unlock(&ubi->volumes_lock); + return -EINVAL; +} + +/** + * self_check_volumes - check information about all volumes. + * @ubi: UBI device description object + * + * Returns zero if volumes are all right and a a negative error code if not. + */ +static int self_check_volumes(struct ubi_device *ubi) +{ + int i, err = 0; + + if (!ubi_dbg_chk_gen(ubi)) + return 0; + + for (i = 0; i < ubi->vtbl_slots; i++) { + err = self_check_volume(ubi, i); + if (err) + break; + } + + return err; +} diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c new file mode 100644 index 000000000..68c9c5ea6 --- /dev/null +++ b/drivers/mtd/ubi/vtbl.c @@ -0,0 +1,864 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file includes volume table manipulation code. The volume table is an + * on-flash table containing volume meta-data like name, number of reserved + * physical eraseblocks, type, etc. The volume table is stored in the so-called + * "layout volume". + * + * The layout volume is an internal volume which is organized as follows. It + * consists of two logical eraseblocks - LEB 0 and LEB 1. Each logical + * eraseblock stores one volume table copy, i.e. LEB 0 and LEB 1 duplicate each + * other. This redundancy guarantees robustness to unclean reboots. The volume + * table is basically an array of volume table records. Each record contains + * full information about the volume and protected by a CRC checksum. Note, + * nowadays we use the atomic LEB change operation when updating the volume + * table, so we do not really need 2 LEBs anymore, but we preserve the older + * design for the backward compatibility reasons. + * + * When the volume table is changed, it is first changed in RAM. Then LEB 0 is + * erased, and the updated volume table is written back to LEB 0. Then same for + * LEB 1. This scheme guarantees recoverability from unclean reboots. + * + * In this UBI implementation the on-flash volume table does not contain any + * information about how much data static volumes contain. + * + * But it would still be beneficial to store this information in the volume + * table. For example, suppose we have a static volume X, and all its physical + * eraseblocks became bad for some reasons. Suppose we are attaching the + * corresponding MTD device, for some reason we find no logical eraseblocks + * corresponding to the volume X. According to the volume table volume X does + * exist. So we don't know whether it is just empty or all its physical + * eraseblocks went bad. So we cannot alarm the user properly. + * + * The volume table also stores so-called "update marker", which is used for + * volume updates. Before updating the volume, the update marker is set, and + * after the update operation is finished, the update marker is cleared. So if + * the update operation was interrupted (e.g. by an unclean reboot) - the + * update marker is still there and we know that the volume's contents is + * damaged. + */ + +#include <linux/crc32.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <asm/div64.h> +#include "ubi.h" + +static void self_vtbl_check(const struct ubi_device *ubi); + +/* Empty volume table record */ +static struct ubi_vtbl_record empty_vtbl_record; + +/** + * ubi_change_vtbl_record - change volume table record. + * @ubi: UBI device description object + * @idx: table index to change + * @vtbl_rec: new volume table record + * + * This function changes volume table record @idx. If @vtbl_rec is %NULL, empty + * volume table record is written. The caller does not have to calculate CRC of + * the record as it is done by this function. Returns zero in case of success + * and a negative error code in case of failure. + */ +int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, + struct ubi_vtbl_record *vtbl_rec) +{ + int i, err; + uint32_t crc; + struct ubi_volume *layout_vol; + + ubi_assert(idx >= 0 && idx < ubi->vtbl_slots); + layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)]; + + if (!vtbl_rec) + vtbl_rec = &empty_vtbl_record; + else { + crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC); + vtbl_rec->crc = cpu_to_be32(crc); + } + + memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record)); + for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { + err = ubi_eba_atomic_leb_change(ubi, layout_vol, i, ubi->vtbl, + ubi->vtbl_size); + if (err) + return err; + } + + self_vtbl_check(ubi); + return 0; +} + +/** + * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table. + * @ubi: UBI device description object + * @rename_list: list of &struct ubi_rename_entry objects + * + * This function re-names multiple volumes specified in @req in the volume + * table. Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubi_vtbl_rename_volumes(struct ubi_device *ubi, + struct list_head *rename_list) +{ + int i, err; + struct ubi_rename_entry *re; + struct ubi_volume *layout_vol; + + list_for_each_entry(re, rename_list, list) { + uint32_t crc; + struct ubi_volume *vol = re->desc->vol; + struct ubi_vtbl_record *vtbl_rec = &ubi->vtbl[vol->vol_id]; + + if (re->remove) { + memcpy(vtbl_rec, &empty_vtbl_record, + sizeof(struct ubi_vtbl_record)); + continue; + } + + vtbl_rec->name_len = cpu_to_be16(re->new_name_len); + memcpy(vtbl_rec->name, re->new_name, re->new_name_len); + memset(vtbl_rec->name + re->new_name_len, 0, + UBI_VOL_NAME_MAX + 1 - re->new_name_len); + crc = crc32(UBI_CRC32_INIT, vtbl_rec, + UBI_VTBL_RECORD_SIZE_CRC); + vtbl_rec->crc = cpu_to_be32(crc); + } + + layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)]; + for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { + err = ubi_eba_atomic_leb_change(ubi, layout_vol, i, ubi->vtbl, + ubi->vtbl_size); + if (err) + return err; + } + + return 0; +} + +/** + * vtbl_check - check if volume table is not corrupted and sensible. + * @ubi: UBI device description object + * @vtbl: volume table + * + * This function returns zero if @vtbl is all right, %1 if CRC is incorrect, + * and %-EINVAL if it contains inconsistent data. + */ +static int vtbl_check(const struct ubi_device *ubi, + const struct ubi_vtbl_record *vtbl) +{ + int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len; + int upd_marker, err; + uint32_t crc; + const char *name; + + for (i = 0; i < ubi->vtbl_slots; i++) { + cond_resched(); + + reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs); + alignment = be32_to_cpu(vtbl[i].alignment); + data_pad = be32_to_cpu(vtbl[i].data_pad); + upd_marker = vtbl[i].upd_marker; + vol_type = vtbl[i].vol_type; + name_len = be16_to_cpu(vtbl[i].name_len); + name = &vtbl[i].name[0]; + + crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC); + if (be32_to_cpu(vtbl[i].crc) != crc) { + ubi_err(ubi, "bad CRC at record %u: %#08x, not %#08x", + i, crc, be32_to_cpu(vtbl[i].crc)); + ubi_dump_vtbl_record(&vtbl[i], i); + return 1; + } + + if (reserved_pebs == 0) { + if (memcmp(&vtbl[i], &empty_vtbl_record, + UBI_VTBL_RECORD_SIZE)) { + err = 2; + goto bad; + } + continue; + } + + if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 || + name_len < 0) { + err = 3; + goto bad; + } + + if (alignment > ubi->leb_size || alignment == 0) { + err = 4; + goto bad; + } + + n = alignment & (ubi->min_io_size - 1); + if (alignment != 1 && n) { + err = 5; + goto bad; + } + + n = ubi->leb_size % alignment; + if (data_pad != n) { + ubi_err(ubi, "bad data_pad, has to be %d", n); + err = 6; + goto bad; + } + + if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { + err = 7; + goto bad; + } + + if (upd_marker != 0 && upd_marker != 1) { + err = 8; + goto bad; + } + + if (reserved_pebs > ubi->good_peb_count) { + ubi_err(ubi, "too large reserved_pebs %d, good PEBs %d", + reserved_pebs, ubi->good_peb_count); + err = 9; + goto bad; + } + + if (name_len > UBI_VOL_NAME_MAX) { + err = 10; + goto bad; + } + + if (name[0] == '\0') { + err = 11; + goto bad; + } + + if (name_len != strnlen(name, name_len + 1)) { + err = 12; + goto bad; + } + } + + /* Checks that all names are unique */ + for (i = 0; i < ubi->vtbl_slots - 1; i++) { + for (n = i + 1; n < ubi->vtbl_slots; n++) { + int len1 = be16_to_cpu(vtbl[i].name_len); + int len2 = be16_to_cpu(vtbl[n].name_len); + + if (len1 > 0 && len1 == len2 && + !strncmp(vtbl[i].name, vtbl[n].name, len1)) { + ubi_err(ubi, "volumes %d and %d have the same name \"%s\"", + i, n, vtbl[i].name); + ubi_dump_vtbl_record(&vtbl[i], i); + ubi_dump_vtbl_record(&vtbl[n], n); + return -EINVAL; + } + } + } + + return 0; + +bad: + ubi_err(ubi, "volume table check failed: record %d, error %d", i, err); + ubi_dump_vtbl_record(&vtbl[i], i); + return -EINVAL; +} + +/** + * create_vtbl - create a copy of volume table. + * @ubi: UBI device description object + * @ai: attaching information + * @copy: number of the volume table copy + * @vtbl: contents of the volume table + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int create_vtbl(struct ubi_device *ubi, struct ubi_attach_info *ai, + int copy, void *vtbl) +{ + int err, tries = 0; + struct ubi_vid_hdr *vid_hdr; + struct ubi_ainf_peb *new_aeb; + + dbg_gen("create volume table (copy #%d)", copy + 1); + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vid_hdr) + return -ENOMEM; + +retry: + new_aeb = ubi_early_get_peb(ubi, ai); + if (IS_ERR(new_aeb)) { + err = PTR_ERR(new_aeb); + goto out_free; + } + + vid_hdr->vol_type = UBI_LAYOUT_VOLUME_TYPE; + vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOLUME_ID); + vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT; + vid_hdr->data_size = vid_hdr->used_ebs = + vid_hdr->data_pad = cpu_to_be32(0); + vid_hdr->lnum = cpu_to_be32(copy); + vid_hdr->sqnum = cpu_to_be64(++ai->max_sqnum); + + /* The EC header is already there, write the VID header */ + err = ubi_io_write_vid_hdr(ubi, new_aeb->pnum, vid_hdr); + if (err) + goto write_error; + + /* Write the layout volume contents */ + err = ubi_io_write_data(ubi, vtbl, new_aeb->pnum, 0, ubi->vtbl_size); + if (err) + goto write_error; + + /* + * And add it to the attaching information. Don't delete the old version + * of this LEB as it will be deleted and freed in 'ubi_add_to_av()'. + */ + err = ubi_add_to_av(ubi, ai, new_aeb->pnum, new_aeb->ec, vid_hdr, 0); + kmem_cache_free(ai->aeb_slab_cache, new_aeb); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + +write_error: + if (err == -EIO && ++tries <= 5) { + /* + * Probably this physical eraseblock went bad, try to pick + * another one. + */ + list_add(&new_aeb->u.list, &ai->erase); + goto retry; + } + kmem_cache_free(ai->aeb_slab_cache, new_aeb); +out_free: + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + +} + +/** + * process_lvol - process the layout volume. + * @ubi: UBI device description object + * @ai: attaching information + * @av: layout volume attaching information + * + * This function is responsible for reading the layout volume, ensuring it is + * not corrupted, and recovering from corruptions if needed. Returns volume + * table in case of success and a negative error code in case of failure. + */ +static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, + struct ubi_attach_info *ai, + struct ubi_ainf_volume *av) +{ + int err; + struct rb_node *rb; + struct ubi_ainf_peb *aeb; + struct ubi_vtbl_record *leb[UBI_LAYOUT_VOLUME_EBS] = { NULL, NULL }; + int leb_corrupted[UBI_LAYOUT_VOLUME_EBS] = {1, 1}; + + /* + * UBI goes through the following steps when it changes the layout + * volume: + * a. erase LEB 0; + * b. write new data to LEB 0; + * c. erase LEB 1; + * d. write new data to LEB 1. + * + * Before the change, both LEBs contain the same data. + * + * Due to unclean reboots, the contents of LEB 0 may be lost, but there + * should LEB 1. So it is OK if LEB 0 is corrupted while LEB 1 is not. + * Similarly, LEB 1 may be lost, but there should be LEB 0. And + * finally, unclean reboots may result in a situation when neither LEB + * 0 nor LEB 1 are corrupted, but they are different. In this case, LEB + * 0 contains more recent information. + * + * So the plan is to first check LEB 0. Then + * a. if LEB 0 is OK, it must be containing the most recent data; then + * we compare it with LEB 1, and if they are different, we copy LEB + * 0 to LEB 1; + * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1 + * to LEB 0. + */ + + dbg_gen("check layout volume"); + + /* Read both LEB 0 and LEB 1 into memory */ + ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) { + leb[aeb->lnum] = vzalloc(ubi->vtbl_size); + if (!leb[aeb->lnum]) { + err = -ENOMEM; + goto out_free; + } + + err = ubi_io_read_data(ubi, leb[aeb->lnum], aeb->pnum, 0, + ubi->vtbl_size); + if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) + /* + * Scrub the PEB later. Note, -EBADMSG indicates an + * uncorrectable ECC error, but we have our own CRC and + * the data will be checked later. If the data is OK, + * the PEB will be scrubbed (because we set + * aeb->scrub). If the data is not OK, the contents of + * the PEB will be recovered from the second copy, and + * aeb->scrub will be cleared in + * 'ubi_add_to_av()'. + */ + aeb->scrub = 1; + else if (err) + goto out_free; + } + + err = -EINVAL; + if (leb[0]) { + leb_corrupted[0] = vtbl_check(ubi, leb[0]); + if (leb_corrupted[0] < 0) + goto out_free; + } + + if (!leb_corrupted[0]) { + /* LEB 0 is OK */ + if (leb[1]) + leb_corrupted[1] = memcmp(leb[0], leb[1], + ubi->vtbl_size); + if (leb_corrupted[1]) { + ubi_warn(ubi, "volume table copy #2 is corrupted"); + err = create_vtbl(ubi, ai, 1, leb[0]); + if (err) + goto out_free; + ubi_msg(ubi, "volume table was restored"); + } + + /* Both LEB 1 and LEB 2 are OK and consistent */ + vfree(leb[1]); + return leb[0]; + } else { + /* LEB 0 is corrupted or does not exist */ + if (leb[1]) { + leb_corrupted[1] = vtbl_check(ubi, leb[1]); + if (leb_corrupted[1] < 0) + goto out_free; + } + if (leb_corrupted[1]) { + /* Both LEB 0 and LEB 1 are corrupted */ + ubi_err(ubi, "both volume tables are corrupted"); + goto out_free; + } + + ubi_warn(ubi, "volume table copy #1 is corrupted"); + err = create_vtbl(ubi, ai, 0, leb[1]); + if (err) + goto out_free; + ubi_msg(ubi, "volume table was restored"); + + vfree(leb[0]); + return leb[1]; + } + +out_free: + vfree(leb[0]); + vfree(leb[1]); + return ERR_PTR(err); +} + +/** + * create_empty_lvol - create empty layout volume. + * @ubi: UBI device description object + * @ai: attaching information + * + * This function returns volume table contents in case of success and a + * negative error code in case of failure. + */ +static struct ubi_vtbl_record *create_empty_lvol(struct ubi_device *ubi, + struct ubi_attach_info *ai) +{ + int i; + struct ubi_vtbl_record *vtbl; + + vtbl = vzalloc(ubi->vtbl_size); + if (!vtbl) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < ubi->vtbl_slots; i++) + memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE); + + for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { + int err; + + err = create_vtbl(ubi, ai, i, vtbl); + if (err) { + vfree(vtbl); + return ERR_PTR(err); + } + } + + return vtbl; +} + +/** + * init_volumes - initialize volume information for existing volumes. + * @ubi: UBI device description object + * @ai: scanning information + * @vtbl: volume table + * + * This function allocates volume description objects for existing volumes. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int init_volumes(struct ubi_device *ubi, + const struct ubi_attach_info *ai, + const struct ubi_vtbl_record *vtbl) +{ + int i, reserved_pebs = 0; + struct ubi_ainf_volume *av; + struct ubi_volume *vol; + + for (i = 0; i < ubi->vtbl_slots; i++) { + cond_resched(); + + if (be32_to_cpu(vtbl[i].reserved_pebs) == 0) + continue; /* Empty record */ + + vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); + if (!vol) + return -ENOMEM; + + vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs); + vol->alignment = be32_to_cpu(vtbl[i].alignment); + vol->data_pad = be32_to_cpu(vtbl[i].data_pad); + vol->upd_marker = vtbl[i].upd_marker; + vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ? + UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; + vol->name_len = be16_to_cpu(vtbl[i].name_len); + vol->usable_leb_size = ubi->leb_size - vol->data_pad; + memcpy(vol->name, vtbl[i].name, vol->name_len); + vol->name[vol->name_len] = '\0'; + vol->vol_id = i; + + if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) { + /* Auto re-size flag may be set only for one volume */ + if (ubi->autoresize_vol_id != -1) { + ubi_err(ubi, "more than one auto-resize volume (%d and %d)", + ubi->autoresize_vol_id, i); + kfree(vol); + return -EINVAL; + } + + ubi->autoresize_vol_id = i; + } + + ubi_assert(!ubi->volumes[i]); + ubi->volumes[i] = vol; + ubi->vol_count += 1; + vol->ubi = ubi; + reserved_pebs += vol->reserved_pebs; + + /* + * In case of dynamic volume UBI knows nothing about how many + * data is stored there. So assume the whole volume is used. + */ + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + vol->used_ebs = vol->reserved_pebs; + vol->last_eb_bytes = vol->usable_leb_size; + vol->used_bytes = + (long long)vol->used_ebs * vol->usable_leb_size; + continue; + } + + /* Static volumes only */ + av = ubi_find_av(ai, i); + if (!av || !av->leb_count) { + /* + * No eraseblocks belonging to this volume found. We + * don't actually know whether this static volume is + * completely corrupted or just contains no data. And + * we cannot know this as long as data size is not + * stored on flash. So we just assume the volume is + * empty. FIXME: this should be handled. + */ + continue; + } + + if (av->leb_count != av->used_ebs) { + /* + * We found a static volume which misses several + * eraseblocks. Treat it as corrupted. + */ + ubi_warn(ubi, "static volume %d misses %d LEBs - corrupted", + av->vol_id, av->used_ebs - av->leb_count); + vol->corrupted = 1; + continue; + } + + vol->used_ebs = av->used_ebs; + vol->used_bytes = + (long long)(vol->used_ebs - 1) * vol->usable_leb_size; + vol->used_bytes += av->last_data_size; + vol->last_eb_bytes = av->last_data_size; + } + + /* And add the layout volume */ + vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); + if (!vol) + return -ENOMEM; + + vol->reserved_pebs = UBI_LAYOUT_VOLUME_EBS; + vol->alignment = UBI_LAYOUT_VOLUME_ALIGN; + vol->vol_type = UBI_DYNAMIC_VOLUME; + vol->name_len = sizeof(UBI_LAYOUT_VOLUME_NAME) - 1; + memcpy(vol->name, UBI_LAYOUT_VOLUME_NAME, vol->name_len + 1); + vol->usable_leb_size = ubi->leb_size; + vol->used_ebs = vol->reserved_pebs; + vol->last_eb_bytes = vol->reserved_pebs; + vol->used_bytes = + (long long)vol->used_ebs * (ubi->leb_size - vol->data_pad); + vol->vol_id = UBI_LAYOUT_VOLUME_ID; + vol->ref_count = 1; + + ubi_assert(!ubi->volumes[i]); + ubi->volumes[vol_id2idx(ubi, vol->vol_id)] = vol; + reserved_pebs += vol->reserved_pebs; + ubi->vol_count += 1; + vol->ubi = ubi; + + if (reserved_pebs > ubi->avail_pebs) { + ubi_err(ubi, "not enough PEBs, required %d, available %d", + reserved_pebs, ubi->avail_pebs); + if (ubi->corr_peb_count) + ubi_err(ubi, "%d PEBs are corrupted and not used", + ubi->corr_peb_count); + } + ubi->rsvd_pebs += reserved_pebs; + ubi->avail_pebs -= reserved_pebs; + + return 0; +} + +/** + * check_av - check volume attaching information. + * @vol: UBI volume description object + * @av: volume attaching information + * + * This function returns zero if the volume attaching information is consistent + * to the data read from the volume tabla, and %-EINVAL if not. + */ +static int check_av(const struct ubi_volume *vol, + const struct ubi_ainf_volume *av) +{ + int err; + + if (av->highest_lnum >= vol->reserved_pebs) { + err = 1; + goto bad; + } + if (av->leb_count > vol->reserved_pebs) { + err = 2; + goto bad; + } + if (av->vol_type != vol->vol_type) { + err = 3; + goto bad; + } + if (av->used_ebs > vol->reserved_pebs) { + err = 4; + goto bad; + } + if (av->data_pad != vol->data_pad) { + err = 5; + goto bad; + } + return 0; + +bad: + ubi_err(vol->ubi, "bad attaching information, error %d", err); + ubi_dump_av(av); + ubi_dump_vol_info(vol); + return -EINVAL; +} + +/** + * check_attaching_info - check that attaching information. + * @ubi: UBI device description object + * @ai: attaching information + * + * Even though we protect on-flash data by CRC checksums, we still don't trust + * the media. This function ensures that attaching information is consistent to + * the information read from the volume table. Returns zero if the attaching + * information is OK and %-EINVAL if it is not. + */ +static int check_attaching_info(const struct ubi_device *ubi, + struct ubi_attach_info *ai) +{ + int err, i; + struct ubi_ainf_volume *av; + struct ubi_volume *vol; + + if (ai->vols_found > UBI_INT_VOL_COUNT + ubi->vtbl_slots) { + ubi_err(ubi, "found %d volumes while attaching, maximum is %d + %d", + ai->vols_found, UBI_INT_VOL_COUNT, ubi->vtbl_slots); + return -EINVAL; + } + + if (ai->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT && + ai->highest_vol_id < UBI_INTERNAL_VOL_START) { + ubi_err(ubi, "too large volume ID %d found", + ai->highest_vol_id); + return -EINVAL; + } + + for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + cond_resched(); + + av = ubi_find_av(ai, i); + vol = ubi->volumes[i]; + if (!vol) { + if (av) + ubi_remove_av(ai, av); + continue; + } + + if (vol->reserved_pebs == 0) { + ubi_assert(i < ubi->vtbl_slots); + + if (!av) + continue; + + /* + * During attaching we found a volume which does not + * exist according to the information in the volume + * table. This must have happened due to an unclean + * reboot while the volume was being removed. Discard + * these eraseblocks. + */ + ubi_msg(ubi, "finish volume %d removal", av->vol_id); + ubi_remove_av(ai, av); + } else if (av) { + err = check_av(vol, av); + if (err) + return err; + } + } + + return 0; +} + +/** + * ubi_read_volume_table - read the volume table. + * @ubi: UBI device description object + * @ai: attaching information + * + * This function reads volume table, checks it, recover from errors if needed, + * or creates it if needed. Returns zero in case of success and a negative + * error code in case of failure. + */ +int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai) +{ + int i, err; + struct ubi_ainf_volume *av; + + empty_vtbl_record.crc = cpu_to_be32(0xf116c36b); + + /* + * The number of supported volumes is limited by the eraseblock size + * and by the UBI_MAX_VOLUMES constant. + */ + ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE; + if (ubi->vtbl_slots > UBI_MAX_VOLUMES) + ubi->vtbl_slots = UBI_MAX_VOLUMES; + + ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE; + ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size); + + av = ubi_find_av(ai, UBI_LAYOUT_VOLUME_ID); + if (!av) { + /* + * No logical eraseblocks belonging to the layout volume were + * found. This could mean that the flash is just empty. In + * this case we create empty layout volume. + * + * But if flash is not empty this must be a corruption or the + * MTD device just contains garbage. + */ + if (ai->is_empty) { + ubi->vtbl = create_empty_lvol(ubi, ai); + if (IS_ERR(ubi->vtbl)) + return PTR_ERR(ubi->vtbl); + } else { + ubi_err(ubi, "the layout volume was not found"); + return -EINVAL; + } + } else { + if (av->leb_count > UBI_LAYOUT_VOLUME_EBS) { + /* This must not happen with proper UBI images */ + ubi_err(ubi, "too many LEBs (%d) in layout volume", + av->leb_count); + return -EINVAL; + } + + ubi->vtbl = process_lvol(ubi, ai, av); + if (IS_ERR(ubi->vtbl)) + return PTR_ERR(ubi->vtbl); + } + + ubi->avail_pebs = ubi->good_peb_count - ubi->corr_peb_count; + + /* + * The layout volume is OK, initialize the corresponding in-RAM data + * structures. + */ + err = init_volumes(ubi, ai, ubi->vtbl); + if (err) + goto out_free; + + /* + * Make sure that the attaching information is consistent to the + * information stored in the volume table. + */ + err = check_attaching_info(ubi, ai); + if (err) + goto out_free; + + return 0; + +out_free: + vfree(ubi->vtbl); + for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + kfree(ubi->volumes[i]); + ubi->volumes[i] = NULL; + } + return err; +} + +/** + * self_vtbl_check - check volume table. + * @ubi: UBI device description object + */ +static void self_vtbl_check(const struct ubi_device *ubi) +{ + if (!ubi_dbg_chk_gen(ubi)) + return; + + if (vtbl_check(ubi, ubi->vtbl)) { + ubi_err(ubi, "self-check failed"); + BUG(); + } +} diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c new file mode 100644 index 000000000..16214d3d5 --- /dev/null +++ b/drivers/mtd/ubi/wl.c @@ -0,0 +1,1844 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner + */ + +/* + * UBI wear-leveling sub-system. + * + * This sub-system is responsible for wear-leveling. It works in terms of + * physical eraseblocks and erase counters and knows nothing about logical + * eraseblocks, volumes, etc. From this sub-system's perspective all physical + * eraseblocks are of two types - used and free. Used physical eraseblocks are + * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical + * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function. + * + * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter + * header. The rest of the physical eraseblock contains only %0xFF bytes. + * + * When physical eraseblocks are returned to the WL sub-system by means of the + * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is + * done asynchronously in context of the per-UBI device background thread, + * which is also managed by the WL sub-system. + * + * The wear-leveling is ensured by means of moving the contents of used + * physical eraseblocks with low erase counter to free physical eraseblocks + * with high erase counter. + * + * If the WL sub-system fails to erase a physical eraseblock, it marks it as + * bad. + * + * This sub-system is also responsible for scrubbing. If a bit-flip is detected + * in a physical eraseblock, it has to be moved. Technically this is the same + * as moving it for wear-leveling reasons. + * + * As it was said, for the UBI sub-system all physical eraseblocks are either + * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while + * used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub + * RB-trees, as well as (temporarily) in the @wl->pq queue. + * + * When the WL sub-system returns a physical eraseblock, the physical + * eraseblock is protected from being moved for some "time". For this reason, + * the physical eraseblock is not directly moved from the @wl->free tree to the + * @wl->used tree. There is a protection queue in between where this + * physical eraseblock is temporarily stored (@wl->pq). + * + * All this protection stuff is needed because: + * o we don't want to move physical eraseblocks just after we have given them + * to the user; instead, we first want to let users fill them up with data; + * + * o there is a chance that the user will put the physical eraseblock very + * soon, so it makes sense not to move it for some time, but wait. + * + * Physical eraseblocks stay protected only for limited time. But the "time" is + * measured in erase cycles in this case. This is implemented with help of the + * protection queue. Eraseblocks are put to the tail of this queue when they + * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the + * head of the queue on each erase operation (for any eraseblock). So the + * length of the queue defines how may (global) erase cycles PEBs are protected. + * + * To put it differently, each physical eraseblock has 2 main states: free and + * used. The former state corresponds to the @wl->free tree. The latter state + * is split up on several sub-states: + * o the WL movement is allowed (@wl->used tree); + * o the WL movement is disallowed (@wl->erroneous) because the PEB is + * erroneous - e.g., there was a read error; + * o the WL movement is temporarily prohibited (@wl->pq queue); + * o scrubbing is needed (@wl->scrub tree). + * + * Depending on the sub-state, wear-leveling entries of the used physical + * eraseblocks may be kept in one of those structures. + * + * Note, in this implementation, we keep a small in-RAM object for each physical + * eraseblock. This is surely not a scalable solution. But it appears to be good + * enough for moderately large flashes and it is simple. In future, one may + * re-work this sub-system and make it more scalable. + * + * At the moment this sub-system does not utilize the sequence number, which + * was introduced relatively recently. But it would be wise to do this because + * the sequence number of a logical eraseblock characterizes how old is it. For + * example, when we move a PEB with low erase counter, and we need to pick the + * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we + * pick target PEB with an average EC if our PEB is not very "old". This is a + * room for future re-works of the WL sub-system. + */ + +#include <linux/slab.h> +#include <linux/crc32.h> +#include <linux/freezer.h> +#include <linux/kthread.h> +#include "ubi.h" +#include "wl.h" + +/* Number of physical eraseblocks reserved for wear-leveling purposes */ +#define WL_RESERVED_PEBS 1 + +/* + * Maximum difference between two erase counters. If this threshold is + * exceeded, the WL sub-system starts moving data from used physical + * eraseblocks with low erase counter to free physical eraseblocks with high + * erase counter. + */ +#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD + +/* + * When a physical eraseblock is moved, the WL sub-system has to pick the target + * physical eraseblock to move to. The simplest way would be just to pick the + * one with the highest erase counter. But in certain workloads this could lead + * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a + * situation when the picked physical eraseblock is constantly erased after the + * data is written to it. So, we have a constant which limits the highest erase + * counter of the free physical eraseblock to pick. Namely, the WL sub-system + * does not pick eraseblocks with erase counter greater than the lowest erase + * counter plus %WL_FREE_MAX_DIFF. + */ +#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) + +/* + * Maximum number of consecutive background thread failures which is enough to + * switch to read-only mode. + */ +#define WL_MAX_FAILURES 32 + +static int self_check_ec(struct ubi_device *ubi, int pnum, int ec); +static int self_check_in_wl_tree(const struct ubi_device *ubi, + struct ubi_wl_entry *e, struct rb_root *root); +static int self_check_in_pq(const struct ubi_device *ubi, + struct ubi_wl_entry *e); + +/** + * wl_tree_add - add a wear-leveling entry to a WL RB-tree. + * @e: the wear-leveling entry to add + * @root: the root of the tree + * + * Note, we use (erase counter, physical eraseblock number) pairs as keys in + * the @ubi->used and @ubi->free RB-trees. + */ +static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) +{ + struct rb_node **p, *parent = NULL; + + p = &root->rb_node; + while (*p) { + struct ubi_wl_entry *e1; + + parent = *p; + e1 = rb_entry(parent, struct ubi_wl_entry, u.rb); + + if (e->ec < e1->ec) + p = &(*p)->rb_left; + else if (e->ec > e1->ec) + p = &(*p)->rb_right; + else { + ubi_assert(e->pnum != e1->pnum); + if (e->pnum < e1->pnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + } + + rb_link_node(&e->u.rb, parent, p); + rb_insert_color(&e->u.rb, root); +} + +/** + * wl_tree_destroy - destroy a wear-leveling entry. + * @ubi: UBI device description object + * @e: the wear-leveling entry to add + * + * This function destroys a wear leveling entry and removes + * the reference from the lookup table. + */ +static void wl_entry_destroy(struct ubi_device *ubi, struct ubi_wl_entry *e) +{ + ubi->lookuptbl[e->pnum] = NULL; + kmem_cache_free(ubi_wl_entry_slab, e); +} + +/** + * do_work - do one pending work. + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int do_work(struct ubi_device *ubi) +{ + int err; + struct ubi_work *wrk; + + cond_resched(); + + /* + * @ubi->work_sem is used to synchronize with the workers. Workers take + * it in read mode, so many of them may be doing works at a time. But + * the queue flush code has to be sure the whole queue of works is + * done, and it takes the mutex in write mode. + */ + down_read(&ubi->work_sem); + spin_lock(&ubi->wl_lock); + if (list_empty(&ubi->works)) { + spin_unlock(&ubi->wl_lock); + up_read(&ubi->work_sem); + return 0; + } + + wrk = list_entry(ubi->works.next, struct ubi_work, list); + list_del(&wrk->list); + ubi->works_count -= 1; + ubi_assert(ubi->works_count >= 0); + spin_unlock(&ubi->wl_lock); + + /* + * Call the worker function. Do not touch the work structure + * after this call as it will have been freed or reused by that + * time by the worker function. + */ + err = wrk->func(ubi, wrk, 0); + if (err) + ubi_err(ubi, "work failed with error code %d", err); + up_read(&ubi->work_sem); + + return err; +} + +/** + * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree. + * @e: the wear-leveling entry to check + * @root: the root of the tree + * + * This function returns non-zero if @e is in the @root RB-tree and zero if it + * is not. + */ +static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) +{ + struct rb_node *p; + + p = root->rb_node; + while (p) { + struct ubi_wl_entry *e1; + + e1 = rb_entry(p, struct ubi_wl_entry, u.rb); + + if (e->pnum == e1->pnum) { + ubi_assert(e == e1); + return 1; + } + + if (e->ec < e1->ec) + p = p->rb_left; + else if (e->ec > e1->ec) + p = p->rb_right; + else { + ubi_assert(e->pnum != e1->pnum); + if (e->pnum < e1->pnum) + p = p->rb_left; + else + p = p->rb_right; + } + } + + return 0; +} + +/** + * prot_queue_add - add physical eraseblock to the protection queue. + * @ubi: UBI device description object + * @e: the physical eraseblock to add + * + * This function adds @e to the tail of the protection queue @ubi->pq, where + * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be + * temporarily protected from the wear-leveling worker. Note, @wl->lock has to + * be locked. + */ +static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) +{ + int pq_tail = ubi->pq_head - 1; + + if (pq_tail < 0) + pq_tail = UBI_PROT_QUEUE_LEN - 1; + ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN); + list_add_tail(&e->u.list, &ubi->pq[pq_tail]); + dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec); +} + +/** + * find_wl_entry - find wear-leveling entry closest to certain erase counter. + * @ubi: UBI device description object + * @root: the RB-tree where to look for + * @diff: maximum possible difference from the smallest erase counter + * + * This function looks for a wear leveling entry with erase counter closest to + * min + @diff, where min is the smallest erase counter. + */ +static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi, + struct rb_root *root, int diff) +{ + struct rb_node *p; + struct ubi_wl_entry *e, *prev_e = NULL; + int max; + + e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); + max = e->ec + diff; + + p = root->rb_node; + while (p) { + struct ubi_wl_entry *e1; + + e1 = rb_entry(p, struct ubi_wl_entry, u.rb); + if (e1->ec >= max) + p = p->rb_left; + else { + p = p->rb_right; + prev_e = e; + e = e1; + } + } + + /* If no fastmap has been written and this WL entry can be used + * as anchor PEB, hold it back and return the second best WL entry + * such that fastmap can use the anchor PEB later. */ + if (prev_e && !ubi->fm_disabled && + !ubi->fm && e->pnum < UBI_FM_MAX_START) + return prev_e; + + return e; +} + +/** + * find_mean_wl_entry - find wear-leveling entry with medium erase counter. + * @ubi: UBI device description object + * @root: the RB-tree where to look for + * + * This function looks for a wear leveling entry with medium erase counter, + * but not greater or equivalent than the lowest erase counter plus + * %WL_FREE_MAX_DIFF/2. + */ +static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi, + struct rb_root *root) +{ + struct ubi_wl_entry *e, *first, *last; + + first = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); + last = rb_entry(rb_last(root), struct ubi_wl_entry, u.rb); + + if (last->ec - first->ec < WL_FREE_MAX_DIFF) { + e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb); + + /* If no fastmap has been written and this WL entry can be used + * as anchor PEB, hold it back and return the second best + * WL entry such that fastmap can use the anchor PEB later. */ + e = may_reserve_for_fm(ubi, e, root); + } else + e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2); + + return e; +} + +/** + * wl_get_wle - get a mean wl entry to be used by ubi_wl_get_peb() or + * refill_wl_user_pool(). + * @ubi: UBI device description object + * + * This function returns a a wear leveling entry in case of success and + * NULL in case of failure. + */ +static struct ubi_wl_entry *wl_get_wle(struct ubi_device *ubi) +{ + struct ubi_wl_entry *e; + + e = find_mean_wl_entry(ubi, &ubi->free); + if (!e) { + ubi_err(ubi, "no free eraseblocks"); + return NULL; + } + + self_check_in_wl_tree(ubi, e, &ubi->free); + + /* + * Move the physical eraseblock to the protection queue where it will + * be protected from being moved for some time. + */ + rb_erase(&e->u.rb, &ubi->free); + ubi->free_count--; + dbg_wl("PEB %d EC %d", e->pnum, e->ec); + + return e; +} + +/** + * prot_queue_del - remove a physical eraseblock from the protection queue. + * @ubi: UBI device description object + * @pnum: the physical eraseblock to remove + * + * This function deletes PEB @pnum from the protection queue and returns zero + * in case of success and %-ENODEV if the PEB was not found. + */ +static int prot_queue_del(struct ubi_device *ubi, int pnum) +{ + struct ubi_wl_entry *e; + + e = ubi->lookuptbl[pnum]; + if (!e) + return -ENODEV; + + if (self_check_in_pq(ubi, e)) + return -ENODEV; + + list_del(&e->u.list); + dbg_wl("deleted PEB %d from the protection queue", e->pnum); + return 0; +} + +/** + * sync_erase - synchronously erase a physical eraseblock. + * @ubi: UBI device description object + * @e: the the physical eraseblock to erase + * @torture: if the physical eraseblock has to be tortured + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, + int torture) +{ + int err; + struct ubi_ec_hdr *ec_hdr; + unsigned long long ec = e->ec; + + dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec); + + err = self_check_ec(ubi, e->pnum, e->ec); + if (err) + return -EINVAL; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); + if (!ec_hdr) + return -ENOMEM; + + err = ubi_io_sync_erase(ubi, e->pnum, torture); + if (err < 0) + goto out_free; + + ec += err; + if (ec > UBI_MAX_ERASECOUNTER) { + /* + * Erase counter overflow. Upgrade UBI and use 64-bit + * erase counters internally. + */ + ubi_err(ubi, "erase counter overflow at PEB %d, EC %llu", + e->pnum, ec); + err = -EINVAL; + goto out_free; + } + + dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec); + + ec_hdr->ec = cpu_to_be64(ec); + + err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); + if (err) + goto out_free; + + e->ec = ec; + spin_lock(&ubi->wl_lock); + if (e->ec > ubi->max_ec) + ubi->max_ec = e->ec; + spin_unlock(&ubi->wl_lock); + +out_free: + kfree(ec_hdr); + return err; +} + +/** + * serve_prot_queue - check if it is time to stop protecting PEBs. + * @ubi: UBI device description object + * + * This function is called after each erase operation and removes PEBs from the + * tail of the protection queue. These PEBs have been protected for long enough + * and should be moved to the used tree. + */ +static void serve_prot_queue(struct ubi_device *ubi) +{ + struct ubi_wl_entry *e, *tmp; + int count; + + /* + * There may be several protected physical eraseblock to remove, + * process them all. + */ +repeat: + count = 0; + spin_lock(&ubi->wl_lock); + list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) { + dbg_wl("PEB %d EC %d protection over, move to used tree", + e->pnum, e->ec); + + list_del(&e->u.list); + wl_tree_add(e, &ubi->used); + if (count++ > 32) { + /* + * Let's be nice and avoid holding the spinlock for + * too long. + */ + spin_unlock(&ubi->wl_lock); + cond_resched(); + goto repeat; + } + } + + ubi->pq_head += 1; + if (ubi->pq_head == UBI_PROT_QUEUE_LEN) + ubi->pq_head = 0; + ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN); + spin_unlock(&ubi->wl_lock); +} + +/** + * __schedule_ubi_work - schedule a work. + * @ubi: UBI device description object + * @wrk: the work to schedule + * + * This function adds a work defined by @wrk to the tail of the pending works + * list. Can only be used if ubi->work_sem is already held in read mode! + */ +static void __schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) +{ + spin_lock(&ubi->wl_lock); + list_add_tail(&wrk->list, &ubi->works); + ubi_assert(ubi->works_count >= 0); + ubi->works_count += 1; + if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi)) + wake_up_process(ubi->bgt_thread); + spin_unlock(&ubi->wl_lock); +} + +/** + * schedule_ubi_work - schedule a work. + * @ubi: UBI device description object + * @wrk: the work to schedule + * + * This function adds a work defined by @wrk to the tail of the pending works + * list. + */ +static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) +{ + down_read(&ubi->work_sem); + __schedule_ubi_work(ubi, wrk); + up_read(&ubi->work_sem); +} + +static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, + int shutdown); + +/** + * schedule_erase - schedule an erase work. + * @ubi: UBI device description object + * @e: the WL entry of the physical eraseblock to erase + * @vol_id: the volume ID that last used this PEB + * @lnum: the last used logical eraseblock number for the PEB + * @torture: if the physical eraseblock has to be tortured + * + * This function returns zero in case of success and a %-ENOMEM in case of + * failure. + */ +static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, + int vol_id, int lnum, int torture) +{ + struct ubi_work *wl_wrk; + + ubi_assert(e); + + dbg_wl("schedule erasure of PEB %d, EC %d, torture %d", + e->pnum, e->ec, torture); + + wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); + if (!wl_wrk) + return -ENOMEM; + + wl_wrk->func = &erase_worker; + wl_wrk->e = e; + wl_wrk->vol_id = vol_id; + wl_wrk->lnum = lnum; + wl_wrk->torture = torture; + + schedule_ubi_work(ubi, wl_wrk); + return 0; +} + +/** + * do_sync_erase - run the erase worker synchronously. + * @ubi: UBI device description object + * @e: the WL entry of the physical eraseblock to erase + * @vol_id: the volume ID that last used this PEB + * @lnum: the last used logical eraseblock number for the PEB + * @torture: if the physical eraseblock has to be tortured + * + */ +static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, + int vol_id, int lnum, int torture) +{ + struct ubi_work *wl_wrk; + + dbg_wl("sync erase of PEB %i", e->pnum); + + wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); + if (!wl_wrk) + return -ENOMEM; + + wl_wrk->e = e; + wl_wrk->vol_id = vol_id; + wl_wrk->lnum = lnum; + wl_wrk->torture = torture; + + return erase_worker(ubi, wl_wrk, 0); +} + +/** + * wear_leveling_worker - wear-leveling worker function. + * @ubi: UBI device description object + * @wrk: the work object + * @shutdown: non-zero if the worker has to free memory and exit + * because the WL-subsystem is shutting down + * + * This function copies a more worn out physical eraseblock to a less worn out + * one. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, + int shutdown) +{ + int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0; + int vol_id = -1, lnum = -1; +#ifdef CONFIG_MTD_UBI_FASTMAP + int anchor = wrk->anchor; +#endif + struct ubi_wl_entry *e1, *e2; + struct ubi_vid_hdr *vid_hdr; + + kfree(wrk); + if (shutdown) + return 0; + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) + return -ENOMEM; + + mutex_lock(&ubi->move_mutex); + spin_lock(&ubi->wl_lock); + ubi_assert(!ubi->move_from && !ubi->move_to); + ubi_assert(!ubi->move_to_put); + + if (!ubi->free.rb_node || + (!ubi->used.rb_node && !ubi->scrub.rb_node)) { + /* + * No free physical eraseblocks? Well, they must be waiting in + * the queue to be erased. Cancel movement - it will be + * triggered again when a free physical eraseblock appears. + * + * No used physical eraseblocks? They must be temporarily + * protected from being moved. They will be moved to the + * @ubi->used tree later and the wear-leveling will be + * triggered again. + */ + dbg_wl("cancel WL, a list is empty: free %d, used %d", + !ubi->free.rb_node, !ubi->used.rb_node); + goto out_cancel; + } + +#ifdef CONFIG_MTD_UBI_FASTMAP + /* Check whether we need to produce an anchor PEB */ + if (!anchor) + anchor = !anchor_pebs_avalible(&ubi->free); + + if (anchor) { + e1 = find_anchor_wl_entry(&ubi->used); + if (!e1) + goto out_cancel; + e2 = get_peb_for_wl(ubi); + if (!e2) + goto out_cancel; + + self_check_in_wl_tree(ubi, e1, &ubi->used); + rb_erase(&e1->u.rb, &ubi->used); + dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum); + } else if (!ubi->scrub.rb_node) { +#else + if (!ubi->scrub.rb_node) { +#endif + /* + * Now pick the least worn-out used physical eraseblock and a + * highly worn-out free physical eraseblock. If the erase + * counters differ much enough, start wear-leveling. + */ + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); + e2 = get_peb_for_wl(ubi); + if (!e2) + goto out_cancel; + + if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { + dbg_wl("no WL needed: min used EC %d, max free EC %d", + e1->ec, e2->ec); + + /* Give the unused PEB back */ + wl_tree_add(e2, &ubi->free); + ubi->free_count++; + goto out_cancel; + } + self_check_in_wl_tree(ubi, e1, &ubi->used); + rb_erase(&e1->u.rb, &ubi->used); + dbg_wl("move PEB %d EC %d to PEB %d EC %d", + e1->pnum, e1->ec, e2->pnum, e2->ec); + } else { + /* Perform scrubbing */ + scrubbing = 1; + e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); + e2 = get_peb_for_wl(ubi); + if (!e2) + goto out_cancel; + + self_check_in_wl_tree(ubi, e1, &ubi->scrub); + rb_erase(&e1->u.rb, &ubi->scrub); + dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); + } + + ubi->move_from = e1; + ubi->move_to = e2; + spin_unlock(&ubi->wl_lock); + + /* + * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum. + * We so far do not know which logical eraseblock our physical + * eraseblock (@e1) belongs to. We have to read the volume identifier + * header first. + * + * Note, we are protected from this PEB being unmapped and erased. The + * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB + * which is being moved was unmapped. + */ + + err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); + if (err && err != UBI_IO_BITFLIPS) { + if (err == UBI_IO_FF) { + /* + * We are trying to move PEB without a VID header. UBI + * always write VID headers shortly after the PEB was + * given, so we have a situation when it has not yet + * had a chance to write it, because it was preempted. + * So add this PEB to the protection queue so far, + * because presumably more data will be written there + * (including the missing VID header), and then we'll + * move it. + */ + dbg_wl("PEB %d has no VID header", e1->pnum); + protect = 1; + goto out_not_moved; + } else if (err == UBI_IO_FF_BITFLIPS) { + /* + * The same situation as %UBI_IO_FF, but bit-flips were + * detected. It is better to schedule this PEB for + * scrubbing. + */ + dbg_wl("PEB %d has no VID header but has bit-flips", + e1->pnum); + scrubbing = 1; + goto out_not_moved; + } + + ubi_err(ubi, "error %d while reading VID header from PEB %d", + err, e1->pnum); + goto out_error; + } + + vol_id = be32_to_cpu(vid_hdr->vol_id); + lnum = be32_to_cpu(vid_hdr->lnum); + + err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); + if (err) { + if (err == MOVE_CANCEL_RACE) { + /* + * The LEB has not been moved because the volume is + * being deleted or the PEB has been put meanwhile. We + * should prevent this PEB from being selected for + * wear-leveling movement again, so put it to the + * protection queue. + */ + protect = 1; + goto out_not_moved; + } + if (err == MOVE_RETRY) { + scrubbing = 1; + goto out_not_moved; + } + if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR || + err == MOVE_TARGET_RD_ERR) { + /* + * Target PEB had bit-flips or write error - torture it. + */ + torture = 1; + goto out_not_moved; + } + + if (err == MOVE_SOURCE_RD_ERR) { + /* + * An error happened while reading the source PEB. Do + * not switch to R/O mode in this case, and give the + * upper layers a possibility to recover from this, + * e.g. by unmapping corresponding LEB. Instead, just + * put this PEB to the @ubi->erroneous list to prevent + * UBI from trying to move it over and over again. + */ + if (ubi->erroneous_peb_count > ubi->max_erroneous) { + ubi_err(ubi, "too many erroneous eraseblocks (%d)", + ubi->erroneous_peb_count); + goto out_error; + } + erroneous = 1; + goto out_not_moved; + } + + if (err < 0) + goto out_error; + + ubi_assert(0); + } + + /* The PEB has been successfully moved */ + if (scrubbing) + ubi_msg(ubi, "scrubbed PEB %d (LEB %d:%d), data moved to PEB %d", + e1->pnum, vol_id, lnum, e2->pnum); + ubi_free_vid_hdr(ubi, vid_hdr); + + spin_lock(&ubi->wl_lock); + if (!ubi->move_to_put) { + wl_tree_add(e2, &ubi->used); + e2 = NULL; + } + ubi->move_from = ubi->move_to = NULL; + ubi->move_to_put = ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + + err = do_sync_erase(ubi, e1, vol_id, lnum, 0); + if (err) { + if (e2) + wl_entry_destroy(ubi, e2); + goto out_ro; + } + + if (e2) { + /* + * Well, the target PEB was put meanwhile, schedule it for + * erasure. + */ + dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase", + e2->pnum, vol_id, lnum); + err = do_sync_erase(ubi, e2, vol_id, lnum, 0); + if (err) + goto out_ro; + } + + dbg_wl("done"); + mutex_unlock(&ubi->move_mutex); + return 0; + + /* + * For some reasons the LEB was not moved, might be an error, might be + * something else. @e1 was not changed, so return it back. @e2 might + * have been changed, schedule it for erasure. + */ +out_not_moved: + if (vol_id != -1) + dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)", + e1->pnum, vol_id, lnum, e2->pnum, err); + else + dbg_wl("cancel moving PEB %d to PEB %d (%d)", + e1->pnum, e2->pnum, err); + spin_lock(&ubi->wl_lock); + if (protect) + prot_queue_add(ubi, e1); + else if (erroneous) { + wl_tree_add(e1, &ubi->erroneous); + ubi->erroneous_peb_count += 1; + } else if (scrubbing) + wl_tree_add(e1, &ubi->scrub); + else + wl_tree_add(e1, &ubi->used); + ubi_assert(!ubi->move_to_put); + ubi->move_from = ubi->move_to = NULL; + ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + + ubi_free_vid_hdr(ubi, vid_hdr); + err = do_sync_erase(ubi, e2, vol_id, lnum, torture); + if (err) + goto out_ro; + + mutex_unlock(&ubi->move_mutex); + return 0; + +out_error: + if (vol_id != -1) + ubi_err(ubi, "error %d while moving PEB %d to PEB %d", + err, e1->pnum, e2->pnum); + else + ubi_err(ubi, "error %d while moving PEB %d (LEB %d:%d) to PEB %d", + err, e1->pnum, vol_id, lnum, e2->pnum); + spin_lock(&ubi->wl_lock); + ubi->move_from = ubi->move_to = NULL; + ubi->move_to_put = ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + + ubi_free_vid_hdr(ubi, vid_hdr); + wl_entry_destroy(ubi, e1); + wl_entry_destroy(ubi, e2); + +out_ro: + ubi_ro_mode(ubi); + mutex_unlock(&ubi->move_mutex); + ubi_assert(err != 0); + return err < 0 ? err : -EIO; + +out_cancel: + ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + mutex_unlock(&ubi->move_mutex); + ubi_free_vid_hdr(ubi, vid_hdr); + return 0; +} + +/** + * ensure_wear_leveling - schedule wear-leveling if it is needed. + * @ubi: UBI device description object + * @nested: set to non-zero if this function is called from UBI worker + * + * This function checks if it is time to start wear-leveling and schedules it + * if yes. This function returns zero in case of success and a negative error + * code in case of failure. + */ +static int ensure_wear_leveling(struct ubi_device *ubi, int nested) +{ + int err = 0; + struct ubi_wl_entry *e1; + struct ubi_wl_entry *e2; + struct ubi_work *wrk; + + spin_lock(&ubi->wl_lock); + if (ubi->wl_scheduled) + /* Wear-leveling is already in the work queue */ + goto out_unlock; + + /* + * If the ubi->scrub tree is not empty, scrubbing is needed, and the + * the WL worker has to be scheduled anyway. + */ + if (!ubi->scrub.rb_node) { + if (!ubi->used.rb_node || !ubi->free.rb_node) + /* No physical eraseblocks - no deal */ + goto out_unlock; + + /* + * We schedule wear-leveling only if the difference between the + * lowest erase counter of used physical eraseblocks and a high + * erase counter of free physical eraseblocks is greater than + * %UBI_WL_THRESHOLD. + */ + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); + e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + + if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) + goto out_unlock; + dbg_wl("schedule wear-leveling"); + } else + dbg_wl("schedule scrubbing"); + + ubi->wl_scheduled = 1; + spin_unlock(&ubi->wl_lock); + + wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); + if (!wrk) { + err = -ENOMEM; + goto out_cancel; + } + + wrk->anchor = 0; + wrk->func = &wear_leveling_worker; + if (nested) + __schedule_ubi_work(ubi, wrk); + else + schedule_ubi_work(ubi, wrk); + return err; + +out_cancel: + spin_lock(&ubi->wl_lock); + ubi->wl_scheduled = 0; +out_unlock: + spin_unlock(&ubi->wl_lock); + return err; +} + +/** + * erase_worker - physical eraseblock erase worker function. + * @ubi: UBI device description object + * @wl_wrk: the work object + * @shutdown: non-zero if the worker has to free memory and exit + * because the WL sub-system is shutting down + * + * This function erases a physical eraseblock and perform torture testing if + * needed. It also takes care about marking the physical eraseblock bad if + * needed. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, + int shutdown) +{ + struct ubi_wl_entry *e = wl_wrk->e; + int pnum = e->pnum; + int vol_id = wl_wrk->vol_id; + int lnum = wl_wrk->lnum; + int err, available_consumed = 0; + + if (shutdown) { + dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec); + kfree(wl_wrk); + wl_entry_destroy(ubi, e); + return 0; + } + + dbg_wl("erase PEB %d EC %d LEB %d:%d", + pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum); + + err = sync_erase(ubi, e, wl_wrk->torture); + if (!err) { + /* Fine, we've erased it successfully */ + kfree(wl_wrk); + + spin_lock(&ubi->wl_lock); + wl_tree_add(e, &ubi->free); + ubi->free_count++; + spin_unlock(&ubi->wl_lock); + + /* + * One more erase operation has happened, take care about + * protected physical eraseblocks. + */ + serve_prot_queue(ubi); + + /* And take care about wear-leveling */ + err = ensure_wear_leveling(ubi, 1); + return err; + } + + ubi_err(ubi, "failed to erase PEB %d, error %d", pnum, err); + kfree(wl_wrk); + + if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || + err == -EBUSY) { + int err1; + + /* Re-schedule the LEB for erasure */ + err1 = schedule_erase(ubi, e, vol_id, lnum, 0); + if (err1) { + err = err1; + goto out_ro; + } + return err; + } + + wl_entry_destroy(ubi, e); + if (err != -EIO) + /* + * If this is not %-EIO, we have no idea what to do. Scheduling + * this physical eraseblock for erasure again would cause + * errors again and again. Well, lets switch to R/O mode. + */ + goto out_ro; + + /* It is %-EIO, the PEB went bad */ + + if (!ubi->bad_allowed) { + ubi_err(ubi, "bad physical eraseblock %d detected", pnum); + goto out_ro; + } + + spin_lock(&ubi->volumes_lock); + if (ubi->beb_rsvd_pebs == 0) { + if (ubi->avail_pebs == 0) { + spin_unlock(&ubi->volumes_lock); + ubi_err(ubi, "no reserved/available physical eraseblocks"); + goto out_ro; + } + ubi->avail_pebs -= 1; + available_consumed = 1; + } + spin_unlock(&ubi->volumes_lock); + + ubi_msg(ubi, "mark PEB %d as bad", pnum); + err = ubi_io_mark_bad(ubi, pnum); + if (err) + goto out_ro; + + spin_lock(&ubi->volumes_lock); + if (ubi->beb_rsvd_pebs > 0) { + if (available_consumed) { + /* + * The amount of reserved PEBs increased since we last + * checked. + */ + ubi->avail_pebs += 1; + available_consumed = 0; + } + ubi->beb_rsvd_pebs -= 1; + } + ubi->bad_peb_count += 1; + ubi->good_peb_count -= 1; + ubi_calculate_reserved(ubi); + if (available_consumed) + ubi_warn(ubi, "no PEBs in the reserved pool, used an available PEB"); + else if (ubi->beb_rsvd_pebs) + ubi_msg(ubi, "%d PEBs left in the reserve", + ubi->beb_rsvd_pebs); + else + ubi_warn(ubi, "last PEB from the reserve was used"); + spin_unlock(&ubi->volumes_lock); + + return err; + +out_ro: + if (available_consumed) { + spin_lock(&ubi->volumes_lock); + ubi->avail_pebs += 1; + spin_unlock(&ubi->volumes_lock); + } + ubi_ro_mode(ubi); + return err; +} + +/** + * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system. + * @ubi: UBI device description object + * @vol_id: the volume ID that last used this PEB + * @lnum: the last used logical eraseblock number for the PEB + * @pnum: physical eraseblock to return + * @torture: if this physical eraseblock has to be tortured + * + * This function is called to return physical eraseblock @pnum to the pool of + * free physical eraseblocks. The @torture flag has to be set if an I/O error + * occurred to this @pnum and it has to be tested. This function returns zero + * in case of success, and a negative error code in case of failure. + */ +int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum, + int pnum, int torture) +{ + int err; + struct ubi_wl_entry *e; + + dbg_wl("PEB %d", pnum); + ubi_assert(pnum >= 0); + ubi_assert(pnum < ubi->peb_count); + + down_read(&ubi->fm_protect); + +retry: + spin_lock(&ubi->wl_lock); + e = ubi->lookuptbl[pnum]; + if (e == ubi->move_from) { + /* + * User is putting the physical eraseblock which was selected to + * be moved. It will be scheduled for erasure in the + * wear-leveling worker. + */ + dbg_wl("PEB %d is being moved, wait", pnum); + spin_unlock(&ubi->wl_lock); + + /* Wait for the WL worker by taking the @ubi->move_mutex */ + mutex_lock(&ubi->move_mutex); + mutex_unlock(&ubi->move_mutex); + goto retry; + } else if (e == ubi->move_to) { + /* + * User is putting the physical eraseblock which was selected + * as the target the data is moved to. It may happen if the EBA + * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()' + * but the WL sub-system has not put the PEB to the "used" tree + * yet, but it is about to do this. So we just set a flag which + * will tell the WL worker that the PEB is not needed anymore + * and should be scheduled for erasure. + */ + dbg_wl("PEB %d is the target of data moving", pnum); + ubi_assert(!ubi->move_to_put); + ubi->move_to_put = 1; + spin_unlock(&ubi->wl_lock); + up_read(&ubi->fm_protect); + return 0; + } else { + if (in_wl_tree(e, &ubi->used)) { + self_check_in_wl_tree(ubi, e, &ubi->used); + rb_erase(&e->u.rb, &ubi->used); + } else if (in_wl_tree(e, &ubi->scrub)) { + self_check_in_wl_tree(ubi, e, &ubi->scrub); + rb_erase(&e->u.rb, &ubi->scrub); + } else if (in_wl_tree(e, &ubi->erroneous)) { + self_check_in_wl_tree(ubi, e, &ubi->erroneous); + rb_erase(&e->u.rb, &ubi->erroneous); + ubi->erroneous_peb_count -= 1; + ubi_assert(ubi->erroneous_peb_count >= 0); + /* Erroneous PEBs should be tortured */ + torture = 1; + } else { + err = prot_queue_del(ubi, e->pnum); + if (err) { + ubi_err(ubi, "PEB %d not found", pnum); + ubi_ro_mode(ubi); + spin_unlock(&ubi->wl_lock); + up_read(&ubi->fm_protect); + return err; + } + } + } + spin_unlock(&ubi->wl_lock); + + err = schedule_erase(ubi, e, vol_id, lnum, torture); + if (err) { + spin_lock(&ubi->wl_lock); + wl_tree_add(e, &ubi->used); + spin_unlock(&ubi->wl_lock); + } + + up_read(&ubi->fm_protect); + return err; +} + +/** + * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing. + * @ubi: UBI device description object + * @pnum: the physical eraseblock to schedule + * + * If a bit-flip in a physical eraseblock is detected, this physical eraseblock + * needs scrubbing. This function schedules a physical eraseblock for + * scrubbing which is done in background. This function returns zero in case of + * success and a negative error code in case of failure. + */ +int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) +{ + struct ubi_wl_entry *e; + + ubi_msg(ubi, "schedule PEB %d for scrubbing", pnum); + +retry: + spin_lock(&ubi->wl_lock); + e = ubi->lookuptbl[pnum]; + if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) || + in_wl_tree(e, &ubi->erroneous)) { + spin_unlock(&ubi->wl_lock); + return 0; + } + + if (e == ubi->move_to) { + /* + * This physical eraseblock was used to move data to. The data + * was moved but the PEB was not yet inserted to the proper + * tree. We should just wait a little and let the WL worker + * proceed. + */ + spin_unlock(&ubi->wl_lock); + dbg_wl("the PEB %d is not in proper tree, retry", pnum); + yield(); + goto retry; + } + + if (in_wl_tree(e, &ubi->used)) { + self_check_in_wl_tree(ubi, e, &ubi->used); + rb_erase(&e->u.rb, &ubi->used); + } else { + int err; + + err = prot_queue_del(ubi, e->pnum); + if (err) { + ubi_err(ubi, "PEB %d not found", pnum); + ubi_ro_mode(ubi); + spin_unlock(&ubi->wl_lock); + return err; + } + } + + wl_tree_add(e, &ubi->scrub); + spin_unlock(&ubi->wl_lock); + + /* + * Technically scrubbing is the same as wear-leveling, so it is done + * by the WL worker. + */ + return ensure_wear_leveling(ubi, 0); +} + +/** + * ubi_wl_flush - flush all pending works. + * @ubi: UBI device description object + * @vol_id: the volume id to flush for + * @lnum: the logical eraseblock number to flush for + * + * This function executes all pending works for a particular volume id / + * logical eraseblock number pair. If either value is set to %UBI_ALL, then it + * acts as a wildcard for all of the corresponding volume numbers or logical + * eraseblock numbers. It returns zero in case of success and a negative error + * code in case of failure. + */ +int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum) +{ + int err = 0; + int found = 1; + + /* + * Erase while the pending works queue is not empty, but not more than + * the number of currently pending works. + */ + dbg_wl("flush pending work for LEB %d:%d (%d pending works)", + vol_id, lnum, ubi->works_count); + + while (found) { + struct ubi_work *wrk, *tmp; + found = 0; + + down_read(&ubi->work_sem); + spin_lock(&ubi->wl_lock); + list_for_each_entry_safe(wrk, tmp, &ubi->works, list) { + if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) && + (lnum == UBI_ALL || wrk->lnum == lnum)) { + list_del(&wrk->list); + ubi->works_count -= 1; + ubi_assert(ubi->works_count >= 0); + spin_unlock(&ubi->wl_lock); + + err = wrk->func(ubi, wrk, 0); + if (err) { + up_read(&ubi->work_sem); + return err; + } + + spin_lock(&ubi->wl_lock); + found = 1; + break; + } + } + spin_unlock(&ubi->wl_lock); + up_read(&ubi->work_sem); + } + + /* + * Make sure all the works which have been done in parallel are + * finished. + */ + down_write(&ubi->work_sem); + up_write(&ubi->work_sem); + + return err; +} + +/** + * tree_destroy - destroy an RB-tree. + * @ubi: UBI device description object + * @root: the root of the tree to destroy + */ +static void tree_destroy(struct ubi_device *ubi, struct rb_root *root) +{ + struct rb_node *rb; + struct ubi_wl_entry *e; + + rb = root->rb_node; + while (rb) { + if (rb->rb_left) + rb = rb->rb_left; + else if (rb->rb_right) + rb = rb->rb_right; + else { + e = rb_entry(rb, struct ubi_wl_entry, u.rb); + + rb = rb_parent(rb); + if (rb) { + if (rb->rb_left == &e->u.rb) + rb->rb_left = NULL; + else + rb->rb_right = NULL; + } + + wl_entry_destroy(ubi, e); + } + } +} + +/** + * ubi_thread - UBI background thread. + * @u: the UBI device description object pointer + */ +int ubi_thread(void *u) +{ + int failures = 0; + struct ubi_device *ubi = u; + + ubi_msg(ubi, "background thread \"%s\" started, PID %d", + ubi->bgt_name, task_pid_nr(current)); + + set_freezable(); + for (;;) { + int err; + + if (kthread_should_stop()) + break; + + if (try_to_freeze()) + continue; + + spin_lock(&ubi->wl_lock); + if (list_empty(&ubi->works) || ubi->ro_mode || + !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock(&ubi->wl_lock); + schedule(); + continue; + } + spin_unlock(&ubi->wl_lock); + + err = do_work(ubi); + if (err) { + ubi_err(ubi, "%s: work failed with error code %d", + ubi->bgt_name, err); + if (failures++ > WL_MAX_FAILURES) { + /* + * Too many failures, disable the thread and + * switch to read-only mode. + */ + ubi_msg(ubi, "%s: %d consecutive failures", + ubi->bgt_name, WL_MAX_FAILURES); + ubi_ro_mode(ubi); + ubi->thread_enabled = 0; + continue; + } + } else + failures = 0; + + cond_resched(); + } + + dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); + return 0; +} + +/** + * shutdown_work - shutdown all pending works. + * @ubi: UBI device description object + */ +static void shutdown_work(struct ubi_device *ubi) +{ +#ifdef CONFIG_MTD_UBI_FASTMAP + flush_work(&ubi->fm_work); +#endif + while (!list_empty(&ubi->works)) { + struct ubi_work *wrk; + + wrk = list_entry(ubi->works.next, struct ubi_work, list); + list_del(&wrk->list); + wrk->func(ubi, wrk, 1); + ubi->works_count -= 1; + ubi_assert(ubi->works_count >= 0); + } +} + +/** + * ubi_wl_init - initialize the WL sub-system using attaching information. + * @ubi: UBI device description object + * @ai: attaching information + * + * This function returns zero in case of success, and a negative error code in + * case of failure. + */ +int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) +{ + int err, i, reserved_pebs, found_pebs = 0; + struct rb_node *rb1, *rb2; + struct ubi_ainf_volume *av; + struct ubi_ainf_peb *aeb, *tmp; + struct ubi_wl_entry *e; + + ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT; + spin_lock_init(&ubi->wl_lock); + mutex_init(&ubi->move_mutex); + init_rwsem(&ubi->work_sem); + ubi->max_ec = ai->max_ec; + INIT_LIST_HEAD(&ubi->works); + + sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); + + err = -ENOMEM; + ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL); + if (!ubi->lookuptbl) + return err; + + for (i = 0; i < UBI_PROT_QUEUE_LEN; i++) + INIT_LIST_HEAD(&ubi->pq[i]); + ubi->pq_head = 0; + + list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { + cond_resched(); + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi->lookuptbl[e->pnum] = e; + if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { + wl_entry_destroy(ubi, e); + goto out_free; + } + + found_pebs++; + } + + ubi->free_count = 0; + list_for_each_entry(aeb, &ai->free, u.list) { + cond_resched(); + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi_assert(e->ec >= 0); + + wl_tree_add(e, &ubi->free); + ubi->free_count++; + + ubi->lookuptbl[e->pnum] = e; + + found_pebs++; + } + + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) { + cond_resched(); + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi->lookuptbl[e->pnum] = e; + + if (!aeb->scrub) { + dbg_wl("add PEB %d EC %d to the used tree", + e->pnum, e->ec); + wl_tree_add(e, &ubi->used); + } else { + dbg_wl("add PEB %d EC %d to the scrub tree", + e->pnum, e->ec); + wl_tree_add(e, &ubi->scrub); + } + + found_pebs++; + } + } + + dbg_wl("found %i PEBs", found_pebs); + + if (ubi->fm) { + ubi_assert(ubi->good_peb_count == \ + found_pebs + ubi->fm->used_blocks); + + for (i = 0; i < ubi->fm->used_blocks; i++) { + e = ubi->fm->e[i]; + ubi->lookuptbl[e->pnum] = e; + } + } + else + ubi_assert(ubi->good_peb_count == found_pebs); + + reserved_pebs = WL_RESERVED_PEBS; + ubi_fastmap_init(ubi, &reserved_pebs); + + if (ubi->avail_pebs < reserved_pebs) { + ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)", + ubi->avail_pebs, reserved_pebs); + if (ubi->corr_peb_count) + ubi_err(ubi, "%d PEBs are corrupted and not used", + ubi->corr_peb_count); + goto out_free; + } + ubi->avail_pebs -= reserved_pebs; + ubi->rsvd_pebs += reserved_pebs; + + /* Schedule wear-leveling if needed */ + err = ensure_wear_leveling(ubi, 0); + if (err) + goto out_free; + + return 0; + +out_free: + shutdown_work(ubi); + tree_destroy(ubi, &ubi->used); + tree_destroy(ubi, &ubi->free); + tree_destroy(ubi, &ubi->scrub); + kfree(ubi->lookuptbl); + return err; +} + +/** + * protection_queue_destroy - destroy the protection queue. + * @ubi: UBI device description object + */ +static void protection_queue_destroy(struct ubi_device *ubi) +{ + int i; + struct ubi_wl_entry *e, *tmp; + + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) { + list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) { + list_del(&e->u.list); + wl_entry_destroy(ubi, e); + } + } +} + +/** + * ubi_wl_close - close the wear-leveling sub-system. + * @ubi: UBI device description object + */ +void ubi_wl_close(struct ubi_device *ubi) +{ + dbg_wl("close the WL sub-system"); + ubi_fastmap_close(ubi); + shutdown_work(ubi); + protection_queue_destroy(ubi); + tree_destroy(ubi, &ubi->used); + tree_destroy(ubi, &ubi->erroneous); + tree_destroy(ubi, &ubi->free); + tree_destroy(ubi, &ubi->scrub); + kfree(ubi->lookuptbl); +} + +/** + * self_check_ec - make sure that the erase counter of a PEB is correct. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * @ec: the erase counter to check + * + * This function returns zero if the erase counter of physical eraseblock @pnum + * is equivalent to @ec, and a negative error code if not or if an error + * occurred. + */ +static int self_check_ec(struct ubi_device *ubi, int pnum, int ec) +{ + int err; + long long read_ec; + struct ubi_ec_hdr *ec_hdr; + + if (!ubi_dbg_chk_gen(ubi)) + return 0; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); + if (!ec_hdr) + return -ENOMEM; + + err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); + if (err && err != UBI_IO_BITFLIPS) { + /* The header does not have to exist */ + err = 0; + goto out_free; + } + + read_ec = be64_to_cpu(ec_hdr->ec); + if (ec != read_ec && read_ec - ec > 1) { + ubi_err(ubi, "self-check failed for PEB %d", pnum); + ubi_err(ubi, "read EC is %lld, should be %d", read_ec, ec); + dump_stack(); + err = 1; + } else + err = 0; + +out_free: + kfree(ec_hdr); + return err; +} + +/** + * self_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. + * @ubi: UBI device description object + * @e: the wear-leveling entry to check + * @root: the root of the tree + * + * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it + * is not. + */ +static int self_check_in_wl_tree(const struct ubi_device *ubi, + struct ubi_wl_entry *e, struct rb_root *root) +{ + if (!ubi_dbg_chk_gen(ubi)) + return 0; + + if (in_wl_tree(e, root)) + return 0; + + ubi_err(ubi, "self-check failed for PEB %d, EC %d, RB-tree %p ", + e->pnum, e->ec, root); + dump_stack(); + return -EINVAL; +} + +/** + * self_check_in_pq - check if wear-leveling entry is in the protection + * queue. + * @ubi: UBI device description object + * @e: the wear-leveling entry to check + * + * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not. + */ +static int self_check_in_pq(const struct ubi_device *ubi, + struct ubi_wl_entry *e) +{ + struct ubi_wl_entry *p; + int i; + + if (!ubi_dbg_chk_gen(ubi)) + return 0; + + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) + list_for_each_entry(p, &ubi->pq[i], u.list) + if (p == e) + return 0; + + ubi_err(ubi, "self-check failed for PEB %d, EC %d, Protect queue", + e->pnum, e->ec); + dump_stack(); + return -EINVAL; +} +#ifndef CONFIG_MTD_UBI_FASTMAP +static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) +{ + struct ubi_wl_entry *e; + + e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + self_check_in_wl_tree(ubi, e, &ubi->free); + ubi->free_count--; + ubi_assert(ubi->free_count >= 0); + rb_erase(&e->u.rb, &ubi->free); + + return e; +} + +/** + * produce_free_peb - produce a free physical eraseblock. + * @ubi: UBI device description object + * + * This function tries to make a free PEB by means of synchronous execution of + * pending works. This may be needed if, for example the background thread is + * disabled. Returns zero in case of success and a negative error code in case + * of failure. + */ +static int produce_free_peb(struct ubi_device *ubi) +{ + int err; + + while (!ubi->free.rb_node && ubi->works_count) { + spin_unlock(&ubi->wl_lock); + + dbg_wl("do one work synchronously"); + err = do_work(ubi); + + spin_lock(&ubi->wl_lock); + if (err) + return err; + } + + return 0; +} + +/** + * ubi_wl_get_peb - get a physical eraseblock. + * @ubi: UBI device description object + * + * This function returns a physical eraseblock in case of success and a + * negative error code in case of failure. + * Returns with ubi->fm_eba_sem held in read mode! + */ +int ubi_wl_get_peb(struct ubi_device *ubi) +{ + int err; + struct ubi_wl_entry *e; + +retry: + down_read(&ubi->fm_eba_sem); + spin_lock(&ubi->wl_lock); + if (!ubi->free.rb_node) { + if (ubi->works_count == 0) { + ubi_err(ubi, "no free eraseblocks"); + ubi_assert(list_empty(&ubi->works)); + spin_unlock(&ubi->wl_lock); + return -ENOSPC; + } + + err = produce_free_peb(ubi); + if (err < 0) { + spin_unlock(&ubi->wl_lock); + return err; + } + spin_unlock(&ubi->wl_lock); + up_read(&ubi->fm_eba_sem); + goto retry; + + } + e = wl_get_wle(ubi); + prot_queue_add(ubi, e); + spin_unlock(&ubi->wl_lock); + + err = ubi_self_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset, + ubi->peb_size - ubi->vid_hdr_aloffset); + if (err) { + ubi_err(ubi, "new PEB %d does not contain all 0xFF bytes", e->pnum); + return err; + } + + return e->pnum; +} +#else +#include "fastmap-wl.c" +#endif diff --git a/drivers/mtd/ubi/wl.h b/drivers/mtd/ubi/wl.h new file mode 100644 index 000000000..bd1f07e5c --- /dev/null +++ b/drivers/mtd/ubi/wl.h @@ -0,0 +1,28 @@ +#ifndef UBI_WL_H +#define UBI_WL_H +#ifdef CONFIG_MTD_UBI_FASTMAP +static int anchor_pebs_avalible(struct rb_root *root); +static void update_fastmap_work_fn(struct work_struct *wrk); +static struct ubi_wl_entry *find_anchor_wl_entry(struct rb_root *root); +static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi); +static void ubi_fastmap_close(struct ubi_device *ubi); +static inline void ubi_fastmap_init(struct ubi_device *ubi, int *count) +{ + /* Reserve enough LEBs to store two fastmaps. */ + *count += (ubi->fm_size / ubi->leb_size) * 2; + INIT_WORK(&ubi->fm_work, update_fastmap_work_fn); +} +static struct ubi_wl_entry *may_reserve_for_fm(struct ubi_device *ubi, + struct ubi_wl_entry *e, + struct rb_root *root); +#else /* !CONFIG_MTD_UBI_FASTMAP */ +static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi); +static inline void ubi_fastmap_close(struct ubi_device *ubi) { } +static inline void ubi_fastmap_init(struct ubi_device *ubi, int *count) { } +static struct ubi_wl_entry *may_reserve_for_fm(struct ubi_device *ubi, + struct ubi_wl_entry *e, + struct rb_root *root) { + return e; +} +#endif /* CONFIG_MTD_UBI_FASTMAP */ +#endif /* UBI_WL_H */ |