diff options
Diffstat (limited to 'drivers/block/drbd/drbd_actlog.c')
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 323 |
1 files changed, 167 insertions, 156 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index b3868e7a1..10459a145 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -288,7 +288,162 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval * return need_transaction; } -static int al_write_transaction(struct drbd_device *device); +#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) +/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT + * are still coupled, or assume too much about their relation. + * Code below will not work if this is violated. + * Will be cleaned up with some followup patch. + */ +# error FIXME +#endif + +static unsigned int al_extent_to_bm_page(unsigned int al_enr) +{ + return al_enr >> + /* bit to page */ + ((PAGE_SHIFT + 3) - + /* al extent number to bit */ + (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); +} + +static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) +{ + const unsigned int stripes = device->ldev->md.al_stripes; + const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k; + + /* transaction number, modulo on-disk ring buffer wrap around */ + unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k); + + /* ... to aligned 4k on disk block */ + t = ((t % stripes) * stripe_size_4kB) + t/stripes; + + /* ... to 512 byte sector in activity log */ + t *= 8; + + /* ... plus offset to the on disk position */ + return device->ldev->md.md_offset + device->ldev->md.al_offset + t; +} + +static int __al_write_transaction(struct drbd_device *device, struct al_transaction_on_disk *buffer) +{ + struct lc_element *e; + sector_t sector; + int i, mx; + unsigned extent_nr; + unsigned crc = 0; + int err = 0; + + memset(buffer, 0, sizeof(*buffer)); + buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); + buffer->tr_number = cpu_to_be32(device->al_tr_number); + + i = 0; + + /* Even though no one can start to change this list + * once we set the LC_LOCKED -- from drbd_al_begin_io(), + * lc_try_lock_for_transaction() --, someone may still + * be in the process of changing it. */ + spin_lock_irq(&device->al_lock); + list_for_each_entry(e, &device->act_log->to_be_changed, list) { + if (i == AL_UPDATES_PER_TRANSACTION) { + i++; + break; + } + buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); + buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); + if (e->lc_number != LC_FREE) + drbd_bm_mark_for_writeout(device, + al_extent_to_bm_page(e->lc_number)); + i++; + } + spin_unlock_irq(&device->al_lock); + BUG_ON(i > AL_UPDATES_PER_TRANSACTION); + + buffer->n_updates = cpu_to_be16(i); + for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { + buffer->update_slot_nr[i] = cpu_to_be16(-1); + buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); + } + + buffer->context_size = cpu_to_be16(device->act_log->nr_elements); + buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle); + + mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, + device->act_log->nr_elements - device->al_tr_cycle); + for (i = 0; i < mx; i++) { + unsigned idx = device->al_tr_cycle + i; + extent_nr = lc_element_by_index(device->act_log, idx)->lc_number; + buffer->context[i] = cpu_to_be32(extent_nr); + } + for (; i < AL_CONTEXT_PER_TRANSACTION; i++) + buffer->context[i] = cpu_to_be32(LC_FREE); + + device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; + if (device->al_tr_cycle >= device->act_log->nr_elements) + device->al_tr_cycle = 0; + + sector = al_tr_number_to_on_disk_sector(device); + + crc = crc32c(0, buffer, 4096); + buffer->crc32c = cpu_to_be32(crc); + + if (drbd_bm_write_hinted(device)) + err = -EIO; + else { + bool write_al_updates; + rcu_read_lock(); + write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; + rcu_read_unlock(); + if (write_al_updates) { + if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) { + err = -EIO; + drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); + } else { + device->al_tr_number++; + device->al_writ_cnt++; + } + } + } + + return err; +} + +static int al_write_transaction(struct drbd_device *device) +{ + struct al_transaction_on_disk *buffer; + int err; + + if (!get_ldev(device)) { + drbd_err(device, "disk is %s, cannot start al transaction\n", + drbd_disk_str(device->state.disk)); + return -EIO; + } + + /* The bitmap write may have failed, causing a state change. */ + if (device->state.disk < D_INCONSISTENT) { + drbd_err(device, + "disk is %s, cannot write al transaction\n", + drbd_disk_str(device->state.disk)); + put_ldev(device); + return -EIO; + } + + /* protects md_io_buffer, al_tr_cycle, ... */ + buffer = drbd_md_get_buffer(device, __func__); + if (!buffer) { + drbd_err(device, "disk failed while waiting for md_io buffer\n"); + put_ldev(device); + return -ENODEV; + } + + err = __al_write_transaction(device, buffer); + + drbd_md_put_buffer(device); + put_ldev(device); + + return err; +} + void drbd_al_begin_io_commit(struct drbd_device *device) { @@ -420,153 +575,6 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i) wake_up(&device->al_wait); } -#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) -/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT - * are still coupled, or assume too much about their relation. - * Code below will not work if this is violated. - * Will be cleaned up with some followup patch. - */ -# error FIXME -#endif - -static unsigned int al_extent_to_bm_page(unsigned int al_enr) -{ - return al_enr >> - /* bit to page */ - ((PAGE_SHIFT + 3) - - /* al extent number to bit */ - (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); -} - -static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) -{ - const unsigned int stripes = device->ldev->md.al_stripes; - const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k; - - /* transaction number, modulo on-disk ring buffer wrap around */ - unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k); - - /* ... to aligned 4k on disk block */ - t = ((t % stripes) * stripe_size_4kB) + t/stripes; - - /* ... to 512 byte sector in activity log */ - t *= 8; - - /* ... plus offset to the on disk position */ - return device->ldev->md.md_offset + device->ldev->md.al_offset + t; -} - -int al_write_transaction(struct drbd_device *device) -{ - struct al_transaction_on_disk *buffer; - struct lc_element *e; - sector_t sector; - int i, mx; - unsigned extent_nr; - unsigned crc = 0; - int err = 0; - - if (!get_ldev(device)) { - drbd_err(device, "disk is %s, cannot start al transaction\n", - drbd_disk_str(device->state.disk)); - return -EIO; - } - - /* The bitmap write may have failed, causing a state change. */ - if (device->state.disk < D_INCONSISTENT) { - drbd_err(device, - "disk is %s, cannot write al transaction\n", - drbd_disk_str(device->state.disk)); - put_ldev(device); - return -EIO; - } - - /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = drbd_md_get_buffer(device, __func__); - if (!buffer) { - drbd_err(device, "disk failed while waiting for md_io buffer\n"); - put_ldev(device); - return -ENODEV; - } - - memset(buffer, 0, sizeof(*buffer)); - buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); - buffer->tr_number = cpu_to_be32(device->al_tr_number); - - i = 0; - - /* Even though no one can start to change this list - * once we set the LC_LOCKED -- from drbd_al_begin_io(), - * lc_try_lock_for_transaction() --, someone may still - * be in the process of changing it. */ - spin_lock_irq(&device->al_lock); - list_for_each_entry(e, &device->act_log->to_be_changed, list) { - if (i == AL_UPDATES_PER_TRANSACTION) { - i++; - break; - } - buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); - buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); - if (e->lc_number != LC_FREE) - drbd_bm_mark_for_writeout(device, - al_extent_to_bm_page(e->lc_number)); - i++; - } - spin_unlock_irq(&device->al_lock); - BUG_ON(i > AL_UPDATES_PER_TRANSACTION); - - buffer->n_updates = cpu_to_be16(i); - for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { - buffer->update_slot_nr[i] = cpu_to_be16(-1); - buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); - } - - buffer->context_size = cpu_to_be16(device->act_log->nr_elements); - buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle); - - mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, - device->act_log->nr_elements - device->al_tr_cycle); - for (i = 0; i < mx; i++) { - unsigned idx = device->al_tr_cycle + i; - extent_nr = lc_element_by_index(device->act_log, idx)->lc_number; - buffer->context[i] = cpu_to_be32(extent_nr); - } - for (; i < AL_CONTEXT_PER_TRANSACTION; i++) - buffer->context[i] = cpu_to_be32(LC_FREE); - - device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; - if (device->al_tr_cycle >= device->act_log->nr_elements) - device->al_tr_cycle = 0; - - sector = al_tr_number_to_on_disk_sector(device); - - crc = crc32c(0, buffer, 4096); - buffer->crc32c = cpu_to_be32(crc); - - if (drbd_bm_write_hinted(device)) - err = -EIO; - else { - bool write_al_updates; - rcu_read_lock(); - write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; - rcu_read_unlock(); - if (write_al_updates) { - if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) { - err = -EIO; - drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); - } else { - device->al_tr_number++; - device->al_writ_cnt++; - } - } - } - - drbd_md_put_buffer(device); - put_ldev(device); - - return err; -} - static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) { int rv; @@ -606,21 +614,24 @@ void drbd_al_shrink(struct drbd_device *device) wake_up(&device->al_wait); } -int drbd_initialize_al(struct drbd_device *device, void *buffer) +int drbd_al_initialize(struct drbd_device *device, void *buffer) { struct al_transaction_on_disk *al = buffer; struct drbd_md *md = &device->ldev->md; - sector_t al_base = md->md_offset + md->al_offset; int al_size_4k = md->al_stripes * md->al_stripe_size_4k; int i; - memset(al, 0, 4096); - al->magic = cpu_to_be32(DRBD_AL_MAGIC); - al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED); - al->crc32c = cpu_to_be32(crc32c(0, al, 4096)); + __al_write_transaction(device, al); + /* There may or may not have been a pending transaction. */ + spin_lock_irq(&device->al_lock); + lc_committed(device->act_log); + spin_unlock_irq(&device->al_lock); - for (i = 0; i < al_size_4k; i++) { - int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE); + /* The rest of the transactions will have an empty "updates" list, and + * are written out only to provide the context, and to initialize the + * on-disk ring buffer. */ + for (i = 1; i < al_size_4k; i++) { + int err = __al_write_transaction(device, al); if (err) return err; } |