1 files changed, 718 insertions, 0 deletions
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
new file mode 100644
index 000000000..52976785a
--- /dev/null
+++ b/fs/afs/vlocation.c
@@ -0,0 +1,718 @@
+/* AFS volume location management
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include "internal.h"
+
+static unsigned afs_vlocation_timeout = 10;	/* volume location timeout in seconds */
+static unsigned afs_vlocation_update_timeout = 10 * 60;
+
+static void afs_vlocation_reaper(struct work_struct *);
+static void afs_vlocation_updater(struct work_struct *);
+
+static LIST_HEAD(afs_vlocation_updates);
+static LIST_HEAD(afs_vlocation_graveyard);
+static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
+static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
+static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
+static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
+static struct workqueue_struct *afs_vlocation_update_worker;
+
+/*
+ * iterate through the VL servers in a cell until one of them admits knowing
+ * about the volume in question
+ */
+static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
+					   struct key *key,
+					   struct afs_cache_vlocation *vldb)
+{
+	struct afs_cell *cell = vl->cell;
+	struct in_addr addr;
+	int count, ret;
+
+	_enter("%s,%s", cell->name, vl->vldb.name);
+
+	down_write(&vl->cell->vl_sem);
+	ret = -ENOMEDIUM;
+	for (count = cell->vl_naddrs; count > 0; count--) {
+		addr = cell->vl_addrs[cell->vl_curr_svix];
+
+		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
+
+		/* attempt to access the VL server */
+		ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
+					       &afs_sync_call);
+		switch (ret) {
+		case 0:
+			goto out;
+		case -ENOMEM:
+		case -ENONET:
+		case -ENETUNREACH:
+		case -EHOSTUNREACH:
+		case -ECONNREFUSED:
+			if (ret == -ENOMEM || ret == -ENONET)
+				goto out;
+			goto rotate;
+		case -ENOMEDIUM:
+		case -EKEYREJECTED:
+		case -EKEYEXPIRED:
+			goto out;
+		default:
+			ret = -EIO;
+			goto rotate;
+		}
+
+		/* rotate the server records upon lookup failure */
+	rotate:
+		cell->vl_curr_svix++;
+		cell->vl_curr_svix %= cell->vl_naddrs;
+	}
+
+out:
+	up_write(&vl->cell->vl_sem);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * iterate through the VL servers in a cell until one of them admits knowing
+ * about the volume in question
+ */
+static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
+					 struct key *key,
+					 afs_volid_t volid,
+					 afs_voltype_t voltype,
+					 struct afs_cache_vlocation *vldb)
+{
+	struct afs_cell *cell = vl->cell;
+	struct in_addr addr;
+	int count, ret;
+
+	_enter("%s,%x,%d,", cell->name, volid, voltype);
+
+	down_write(&vl->cell->vl_sem);
+	ret = -ENOMEDIUM;
+	for (count = cell->vl_naddrs; count > 0; count--) {
+		addr = cell->vl_addrs[cell->vl_curr_svix];
+
+		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
+
+		/* attempt to access the VL server */
+		ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
+					     &afs_sync_call);
+		switch (ret) {
+		case 0:
+			goto out;
+		case -ENOMEM:
+		case -ENONET:
+		case -ENETUNREACH:
+		case -EHOSTUNREACH:
+		case -ECONNREFUSED:
+			if (ret == -ENOMEM || ret == -ENONET)
+				goto out;
+			goto rotate;
+		case -EBUSY:
+			vl->upd_busy_cnt++;
+			if (vl->upd_busy_cnt <= 3) {
+				if (vl->upd_busy_cnt > 1) {
+					/* second+ BUSY - sleep a little bit */
+					set_current_state(TASK_UNINTERRUPTIBLE);
+					schedule_timeout(1);
+				}
+				continue;
+			}
+			break;
+		case -ENOMEDIUM:
+			vl->upd_rej_cnt++;
+			goto rotate;
+		default:
+			ret = -EIO;
+			goto rotate;
+		}
+
+		/* rotate the server records upon lookup failure */
+	rotate:
+		cell->vl_curr_svix++;
+		cell->vl_curr_svix %= cell->vl_naddrs;
+		vl->upd_busy_cnt = 0;
+	}
+
+out:
+	if (ret < 0 && vl->upd_rej_cnt > 0) {
+		printk(KERN_NOTICE "kAFS:"
+		       " Active volume no longer valid '%s'\n",
+		       vl->vldb.name);
+		vl->valid = 0;
+		ret = -ENOMEDIUM;
+	}
+
+	up_write(&vl->cell->vl_sem);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * allocate a volume location record
+ */
+static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
+						 const char *name,
+						 size_t namesz)
+{
+	struct afs_vlocation *vl;
+
+	vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
+	if (vl) {
+		vl->cell = cell;
+		vl->state = AFS_VL_NEW;
+		atomic_set(&vl->usage, 1);
+		INIT_LIST_HEAD(&vl->link);
+		INIT_LIST_HEAD(&vl->grave);
+		INIT_LIST_HEAD(&vl->update);
+		init_waitqueue_head(&vl->waitq);
+		spin_lock_init(&vl->lock);
+		memcpy(vl->vldb.name, name, namesz);
+	}
+
+	_leave(" = %p", vl);
+	return vl;
+}
+
+/*
+ * update record if we found it in the cache
+ */
+static int afs_vlocation_update_record(struct afs_vlocation *vl,
+				       struct key *key,
+				       struct afs_cache_vlocation *vldb)
+{
+	afs_voltype_t voltype;
+	afs_volid_t vid;
+	int ret;
+
+	/* try to look up a cached volume in the cell VL databases by ID */
+	_debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
+	       vl->vldb.name,
+	       vl->vldb.vidmask,
+	       ntohl(vl->vldb.servers[0].s_addr),
+	       vl->vldb.srvtmask[0],
+	       ntohl(vl->vldb.servers[1].s_addr),
+	       vl->vldb.srvtmask[1],
+	       ntohl(vl->vldb.servers[2].s_addr),
+	       vl->vldb.srvtmask[2]);
+
+	_debug("Vids: %08x %08x %08x",
+	       vl->vldb.vid[0],
+	       vl->vldb.vid[1],
+	       vl->vldb.vid[2]);
+
+	if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
+		vid = vl->vldb.vid[0];
+		voltype = AFSVL_RWVOL;
+	} else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
+		vid = vl->vldb.vid[1];
+		voltype = AFSVL_ROVOL;
+	} else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
+		vid = vl->vldb.vid[2];
+		voltype = AFSVL_BACKVOL;
+	} else {
+		BUG();
+		vid = 0;
+		voltype = 0;
+	}
+
+	/* contact the server to make sure the volume is still available
+	 * - TODO: need to handle disconnected operation here
+	 */
+	ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
+	switch (ret) {
+		/* net error */
+	default:
+		printk(KERN_WARNING "kAFS:"
+		       " failed to update volume '%s' (%x) up in '%s': %d\n",
+		       vl->vldb.name, vid, vl->cell->name, ret);
+		_leave(" = %d", ret);
+		return ret;
+
+		/* pulled from local cache into memory */
+	case 0:
+		_leave(" = 0");
+		return 0;
+
+		/* uh oh... looks like the volume got deleted */
+	case -ENOMEDIUM:
+		printk(KERN_ERR "kAFS:"
+		       " volume '%s' (%x) does not exist '%s'\n",
+		       vl->vldb.name, vid, vl->cell->name);
+
+		/* TODO: make existing record unavailable */
+		_leave(" = %d", ret);
+		return ret;
+	}
+}
+
+/*
+ * apply the update to a VL record
+ */
+static void afs_vlocation_apply_update(struct afs_vlocation *vl,
+				       struct afs_cache_vlocation *vldb)
+{
+	_debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
+	       vldb->name, vldb->vidmask,
+	       ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
+	       ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
+	       ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
+
+	_debug("Vids: %08x %08x %08x",
+	       vldb->vid[0], vldb->vid[1], vldb->vid[2]);
+
+	if (strcmp(vldb->name, vl->vldb.name) != 0)
+		printk(KERN_NOTICE "kAFS:"
+		       " name of volume '%s' changed to '%s' on server\n",
+		       vl->vldb.name, vldb->name);
+
+	vl->vldb = *vldb;
+
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_update_cookie(vl->cache);
+#endif
+}
+
+/*
+ * fill in a volume location record, consulting the cache and the VL server
+ * both
+ */
+static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
+					struct key *key)
+{
+	struct afs_cache_vlocation vldb;
+	int ret;
+
+	_enter("");
+
+	ASSERTCMP(vl->valid, ==, 0);
+
+	memset(&vldb, 0, sizeof(vldb));
+
+	/* see if we have an in-cache copy (will set vl->valid if there is) */
+#ifdef CONFIG_AFS_FSCACHE
+	vl->cache = fscache_acquire_cookie(vl->cell->cache,
+					   &afs_vlocation_cache_index_def, vl,
+					   true);
+#endif
+
+	if (vl->valid) {
+		/* try to update a known volume in the cell VL databases by
+		 * ID as the name may have changed */
+		_debug("found in cache");
+		ret = afs_vlocation_update_record(vl, key, &vldb);
+	} else {
+		/* try to look up an unknown volume in the cell VL databases by
+		 * name */
+		ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
+		if (ret < 0) {
+			printk("kAFS: failed to locate '%s' in cell '%s'\n",
+			       vl->vldb.name, vl->cell->name);
+			return ret;
+		}
+	}
+
+	afs_vlocation_apply_update(vl, &vldb);
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * queue a vlocation record for updates
+ */
+static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
+{
+	struct afs_vlocation *xvl;
+
+	/* wait at least 10 minutes before updating... */
+	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+
+	spin_lock(&afs_vlocation_updates_lock);
+
+	if (!list_empty(&afs_vlocation_updates)) {
+		/* ... but wait at least 1 second more than the newest record
+		 * already queued so that we don't spam the VL server suddenly
+		 * with lots of requests
+		 */
+		xvl = list_entry(afs_vlocation_updates.prev,
+				 struct afs_vlocation, update);
+		if (vl->update_at <= xvl->update_at)
+			vl->update_at = xvl->update_at + 1;
+	} else {
+		queue_delayed_work(afs_vlocation_update_worker,
+				   &afs_vlocation_update,
+				   afs_vlocation_update_timeout * HZ);
+	}
+
+	list_add_tail(&vl->update, &afs_vlocation_updates);
+	spin_unlock(&afs_vlocation_updates_lock);
+}
+
+/*
+ * lookup volume location
+ * - iterate through the VL servers in a cell until one of them admits knowing
+ *   about the volume in question
+ * - lookup in the local cache if not able to find on the VL server
+ * - insert/update in the local cache if did get a VL response
+ */
+struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
+					   struct key *key,
+					   const char *name,
+					   size_t namesz)
+{
+	struct afs_vlocation *vl;
+	int ret;
+
+	_enter("{%s},{%x},%*.*s,%zu",
+	       cell->name, key_serial(key),
+	       (int) namesz, (int) namesz, name, namesz);
+
+	if (namesz >= sizeof(vl->vldb.name)) {
+		_leave(" = -ENAMETOOLONG");
+		return ERR_PTR(-ENAMETOOLONG);
+	}
+
+	/* see if we have an in-memory copy first */
+	down_write(&cell->vl_sem);
+	spin_lock(&cell->vl_lock);
+	list_for_each_entry(vl, &cell->vl_list, link) {
+		if (vl->vldb.name[namesz] != '\0')
+			continue;
+		if (memcmp(vl->vldb.name, name, namesz) == 0)
+			goto found_in_memory;
+	}
+	spin_unlock(&cell->vl_lock);
+
+	/* not in the cell's in-memory lists - create a new record */
+	vl = afs_vlocation_alloc(cell, name, namesz);
+	if (!vl) {
+		up_write(&cell->vl_sem);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	afs_get_cell(cell);
+
+	list_add_tail(&vl->link, &cell->vl_list);
+	vl->state = AFS_VL_CREATING;
+	up_write(&cell->vl_sem);
+
+fill_in_record:
+	ret = afs_vlocation_fill_in_record(vl, key);
+	if (ret < 0)
+		goto error_abandon;
+	spin_lock(&vl->lock);
+	vl->state = AFS_VL_VALID;
+	spin_unlock(&vl->lock);
+	wake_up(&vl->waitq);
+
+	/* update volume entry in local cache */
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_update_cookie(vl->cache);
+#endif
+
+	/* schedule for regular updates */
+	afs_vlocation_queue_for_updates(vl);
+	goto success;
+
+found_in_memory:
+	/* found in memory */
+	_debug("found in memory");
+	atomic_inc(&vl->usage);
+	spin_unlock(&cell->vl_lock);
+	if (!list_empty(&vl->grave)) {
+		spin_lock(&afs_vlocation_graveyard_lock);
+		list_del_init(&vl->grave);
+		spin_unlock(&afs_vlocation_graveyard_lock);
+	}
+	up_write(&cell->vl_sem);
+
+	/* see if it was an abandoned record that we might try filling in */
+	spin_lock(&vl->lock);
+	while (vl->state != AFS_VL_VALID) {
+		afs_vlocation_state_t state = vl->state;
+
+		_debug("invalid [state %d]", state);
+
+		if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
+			vl->state = AFS_VL_CREATING;
+			spin_unlock(&vl->lock);
+			goto fill_in_record;
+		}
+
+		/* must now wait for creation or update by someone else to
+		 * complete */
+		_debug("wait");
+
+		spin_unlock(&vl->lock);
+		ret = wait_event_interruptible(vl->waitq,
+					       vl->state == AFS_VL_NEW ||
+					       vl->state == AFS_VL_VALID ||
+					       vl->state == AFS_VL_NO_VOLUME);
+		if (ret < 0)
+			goto error;
+		spin_lock(&vl->lock);
+	}
+	spin_unlock(&vl->lock);
+
+success:
+	_leave(" = %p", vl);
+	return vl;
+
+error_abandon:
+	spin_lock(&vl->lock);
+	vl->state = AFS_VL_NEW;
+	spin_unlock(&vl->lock);
+	wake_up(&vl->waitq);
+error:
+	ASSERT(vl != NULL);
+	afs_put_vlocation(vl);
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+}
+
+/*
+ * finish using a volume location record
+ */
+void afs_put_vlocation(struct afs_vlocation *vl)
+{
+	if (!vl)
+		return;
+
+	_enter("%s", vl->vldb.name);
+
+	ASSERTCMP(atomic_read(&vl->usage), >, 0);
+
+	if (likely(!atomic_dec_and_test(&vl->usage))) {
+		_leave("");
+		return;
+	}
+
+	spin_lock(&afs_vlocation_graveyard_lock);
+	if (atomic_read(&vl->usage) == 0) {
+		_debug("buried");
+		list_move_tail(&vl->grave, &afs_vlocation_graveyard);
+		vl->time_of_death = get_seconds();
+		queue_delayed_work(afs_wq, &afs_vlocation_reap,
+				   afs_vlocation_timeout * HZ);
+
+		/* suspend updates on this record */
+		if (!list_empty(&vl->update)) {
+			spin_lock(&afs_vlocation_updates_lock);
+			list_del_init(&vl->update);
+			spin_unlock(&afs_vlocation_updates_lock);
+		}
+	}
+	spin_unlock(&afs_vlocation_graveyard_lock);
+	_leave(" [killed?]");
+}
+
+/*
+ * destroy a dead volume location record
+ */
+static void afs_vlocation_destroy(struct afs_vlocation *vl)
+{
+	_enter("%p", vl);
+
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_relinquish_cookie(vl->cache, 0);
+#endif
+	afs_put_cell(vl->cell);
+	kfree(vl);
+}
+
+/*
+ * reap dead volume location records
+ */
+static void afs_vlocation_reaper(struct work_struct *work)
+{
+	LIST_HEAD(corpses);
+	struct afs_vlocation *vl;
+	unsigned long delay, expiry;
+	time_t now;
+
+	_enter("");
+
+	now = get_seconds();
+	spin_lock(&afs_vlocation_graveyard_lock);
+
+	while (!list_empty(&afs_vlocation_graveyard)) {
+		vl = list_entry(afs_vlocation_graveyard.next,
+				struct afs_vlocation, grave);
+
+		_debug("check %p", vl);
+
+		/* the queue is ordered most dead first */
+		expiry = vl->time_of_death + afs_vlocation_timeout;
+		if (expiry > now) {
+			delay = (expiry - now) * HZ;
+			_debug("delay %lu", delay);
+			mod_delayed_work(afs_wq, &afs_vlocation_reap, delay);
+			break;
+		}
+
+		spin_lock(&vl->cell->vl_lock);
+		if (atomic_read(&vl->usage) > 0) {
+			_debug("no reap");
+			list_del_init(&vl->grave);
+		} else {
+			_debug("reap");
+			list_move_tail(&vl->grave, &corpses);
+			list_del_init(&vl->link);
+		}
+		spin_unlock(&vl->cell->vl_lock);
+	}
+
+	spin_unlock(&afs_vlocation_graveyard_lock);
+
+	/* now reap the corpses we've extracted */
+	while (!list_empty(&corpses)) {
+		vl = list_entry(corpses.next, struct afs_vlocation, grave);
+		list_del(&vl->grave);
+		afs_vlocation_destroy(vl);
+	}
+
+	_leave("");
+}
+
+/*
+ * initialise the VL update process
+ */
+int __init afs_vlocation_update_init(void)
+{
+	afs_vlocation_update_worker =
+		create_singlethread_workqueue("kafs_vlupdated");
+	return afs_vlocation_update_worker ? 0 : -ENOMEM;
+}
+
+/*
+ * discard all the volume location records for rmmod
+ */
+void afs_vlocation_purge(void)
+{
+	afs_vlocation_timeout = 0;
+
+	spin_lock(&afs_vlocation_updates_lock);
+	list_del_init(&afs_vlocation_updates);
+	spin_unlock(&afs_vlocation_updates_lock);
+	mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0);
+	destroy_workqueue(afs_vlocation_update_worker);
+
+	mod_delayed_work(afs_wq, &afs_vlocation_reap, 0);
+}
+
+/*
+ * update a volume location
+ */
+static void afs_vlocation_updater(struct work_struct *work)
+{
+	struct afs_cache_vlocation vldb;
+	struct afs_vlocation *vl, *xvl;
+	time_t now;
+	long timeout;
+	int ret;
+
+	_enter("");
+
+	now = get_seconds();
+
+	/* find a record to update */
+	spin_lock(&afs_vlocation_updates_lock);
+	for (;;) {
+		if (list_empty(&afs_vlocation_updates)) {
+			spin_unlock(&afs_vlocation_updates_lock);
+			_leave(" [nothing]");
+			return;
+		}
+
+		vl = list_entry(afs_vlocation_updates.next,
+				struct afs_vlocation, update);
+		if (atomic_read(&vl->usage) > 0)
+			break;
+		list_del_init(&vl->update);
+	}
+
+	timeout = vl->update_at - now;
+	if (timeout > 0) {
+		queue_delayed_work(afs_vlocation_update_worker,
+				   &afs_vlocation_update, timeout * HZ);
+		spin_unlock(&afs_vlocation_updates_lock);
+		_leave(" [nothing]");
+		return;
+	}
+
+	list_del_init(&vl->update);
+	atomic_inc(&vl->usage);
+	spin_unlock(&afs_vlocation_updates_lock);
+
+	/* we can now perform the update */
+	_debug("update %s", vl->vldb.name);
+	vl->state = AFS_VL_UPDATING;
+	vl->upd_rej_cnt = 0;
+	vl->upd_busy_cnt = 0;
+
+	ret = afs_vlocation_update_record(vl, NULL, &vldb);
+	spin_lock(&vl->lock);
+	switch (ret) {
+	case 0:
+		afs_vlocation_apply_update(vl, &vldb);
+		vl->state = AFS_VL_VALID;
+		break;
+	case -ENOMEDIUM:
+		vl->state = AFS_VL_VOLUME_DELETED;
+		break;
+	default:
+		vl->state = AFS_VL_UNCERTAIN;
+		break;
+	}
+	spin_unlock(&vl->lock);
+	wake_up(&vl->waitq);
+
+	/* and then reschedule */
+	_debug("reschedule");
+	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+
+	spin_lock(&afs_vlocation_updates_lock);
+
+	if (!list_empty(&afs_vlocation_updates)) {
+		/* next update in 10 minutes, but wait at least 1 second more
+		 * than the newest record already queued so that we don't spam
+		 * the VL server suddenly with lots of requests
+		 */
+		xvl = list_entry(afs_vlocation_updates.prev,
+				 struct afs_vlocation, update);
+		if (vl->update_at <= xvl->update_at)
+			vl->update_at = xvl->update_at + 1;
+		xvl = list_entry(afs_vlocation_updates.next,
+				 struct afs_vlocation, update);
+		timeout = xvl->update_at - now;
+		if (timeout < 0)
+			timeout = 0;
+	} else {
+		timeout = afs_vlocation_update_timeout;
+	}
+
+	ASSERT(list_empty(&vl->update));
+
+	list_add_tail(&vl->update, &afs_vlocation_updates);
+
+	_debug("timeout %ld", timeout);
+	queue_delayed_work(afs_vlocation_update_worker,
+			   &afs_vlocation_update, timeout * HZ);
+	spin_unlock(&afs_vlocation_updates_lock);
+	afs_put_vlocation(vl);
+}