diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-08-05 17:04:01 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-08-05 17:04:01 -0300 |
commit | 57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch) | |
tree | 5e910f0e82173f4ef4f51111366a3f1299037a7b /drivers/staging/lustre/lustre/fld |
Initial import
Diffstat (limited to 'drivers/staging/lustre/lustre/fld')
-rw-r--r-- | drivers/staging/lustre/lustre/fld/Makefile | 3 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/fld/fld_cache.c | 546 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/fld/fld_internal.h | 193 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/fld/fld_request.c | 526 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/fld/lproc_fld.c | 172 |
5 files changed, 1440 insertions, 0 deletions
diff --git a/drivers/staging/lustre/lustre/fld/Makefile b/drivers/staging/lustre/lustre/fld/Makefile new file mode 100644 index 000000000..2bbf08433 --- /dev/null +++ b/drivers/staging/lustre/lustre/fld/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_LUSTRE_FS) += fld.o +fld-y := fld_request.o fld_cache.o +fld-$(CONFIG_PROC_FS) += lproc_fld.o diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c new file mode 100644 index 000000000..0d0a73745 --- /dev/null +++ b/drivers/staging/lustre/lustre/fld/fld_cache.c @@ -0,0 +1,546 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2013, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/fld/fld_cache.c + * + * FLD (Fids Location Database) + * + * Author: Pravin Shelar <pravin.shelar@sun.com> + * Author: Yury Umanets <umka@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_FLD + +#include "../../include/linux/libcfs/libcfs.h" +#include <linux/module.h> +#include <asm/div64.h> + +#include "../include/obd.h" +#include "../include/obd_class.h" +#include "../include/lustre_ver.h" +#include "../include/obd_support.h" +#include "../include/lprocfs_status.h" + +#include "../include/dt_object.h" +#include "../include/lustre_req_layout.h" +#include "../include/lustre_fld.h" +#include "fld_internal.h" + +/** + * create fld cache. + */ +struct fld_cache *fld_cache_init(const char *name, + int cache_size, int cache_threshold) +{ + struct fld_cache *cache; + + LASSERT(name != NULL); + LASSERT(cache_threshold < cache_size); + + OBD_ALLOC_PTR(cache); + if (cache == NULL) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&cache->fci_entries_head); + INIT_LIST_HEAD(&cache->fci_lru); + + cache->fci_cache_count = 0; + rwlock_init(&cache->fci_lock); + + strlcpy(cache->fci_name, name, + sizeof(cache->fci_name)); + + cache->fci_cache_size = cache_size; + cache->fci_threshold = cache_threshold; + + /* Init fld cache info. */ + memset(&cache->fci_stat, 0, sizeof(cache->fci_stat)); + + CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n", + cache->fci_name, cache_size, cache_threshold); + + return cache; +} + +/** + * destroy fld cache. + */ +void fld_cache_fini(struct fld_cache *cache) +{ + __u64 pct; + + LASSERT(cache != NULL); + fld_cache_flush(cache); + + if (cache->fci_stat.fst_count > 0) { + pct = cache->fci_stat.fst_cache * 100; + do_div(pct, cache->fci_stat.fst_count); + } else { + pct = 0; + } + + CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name); + CDEBUG(D_INFO, " Total reqs: %llu\n", cache->fci_stat.fst_count); + CDEBUG(D_INFO, " Cache reqs: %llu\n", cache->fci_stat.fst_cache); + CDEBUG(D_INFO, " Cache hits: %llu%%\n", pct); + + OBD_FREE_PTR(cache); +} + +/** + * delete given node from list. + */ +void fld_cache_entry_delete(struct fld_cache *cache, + struct fld_cache_entry *node) +{ + list_del(&node->fce_list); + list_del(&node->fce_lru); + cache->fci_cache_count--; + OBD_FREE_PTR(node); +} + +/** + * fix list by checking new entry with NEXT entry in order. + */ +static void fld_fix_new_list(struct fld_cache *cache) +{ + struct fld_cache_entry *f_curr; + struct fld_cache_entry *f_next; + struct lu_seq_range *c_range; + struct lu_seq_range *n_range; + struct list_head *head = &cache->fci_entries_head; + +restart_fixup: + + list_for_each_entry_safe(f_curr, f_next, head, fce_list) { + c_range = &f_curr->fce_range; + n_range = &f_next->fce_range; + + LASSERT(range_is_sane(c_range)); + if (&f_next->fce_list == head) + break; + + if (c_range->lsr_flags != n_range->lsr_flags) + continue; + + LASSERTF(c_range->lsr_start <= n_range->lsr_start, + "cur lsr_start "DRANGE" next lsr_start "DRANGE"\n", + PRANGE(c_range), PRANGE(n_range)); + + /* check merge possibility with next range */ + if (c_range->lsr_end == n_range->lsr_start) { + if (c_range->lsr_index != n_range->lsr_index) + continue; + n_range->lsr_start = c_range->lsr_start; + fld_cache_entry_delete(cache, f_curr); + continue; + } + + /* check if current range overlaps with next range. */ + if (n_range->lsr_start < c_range->lsr_end) { + if (c_range->lsr_index == n_range->lsr_index) { + n_range->lsr_start = c_range->lsr_start; + n_range->lsr_end = max(c_range->lsr_end, + n_range->lsr_end); + fld_cache_entry_delete(cache, f_curr); + } else { + if (n_range->lsr_end <= c_range->lsr_end) { + *n_range = *c_range; + fld_cache_entry_delete(cache, f_curr); + } else + n_range->lsr_start = c_range->lsr_end; + } + + /* we could have overlap over next + * range too. better restart. */ + goto restart_fixup; + } + + /* kill duplicates */ + if (c_range->lsr_start == n_range->lsr_start && + c_range->lsr_end == n_range->lsr_end) + fld_cache_entry_delete(cache, f_curr); + } +} + +/** + * add node to fld cache + */ +static inline void fld_cache_entry_add(struct fld_cache *cache, + struct fld_cache_entry *f_new, + struct list_head *pos) +{ + list_add(&f_new->fce_list, pos); + list_add(&f_new->fce_lru, &cache->fci_lru); + + cache->fci_cache_count++; + fld_fix_new_list(cache); +} + +/** + * Check if cache needs to be shrunk. If so - do it. + * Remove one entry in list and so on until cache is shrunk enough. + */ +static int fld_cache_shrink(struct fld_cache *cache) +{ + struct fld_cache_entry *flde; + struct list_head *curr; + int num = 0; + + LASSERT(cache != NULL); + + if (cache->fci_cache_count < cache->fci_cache_size) + return 0; + + curr = cache->fci_lru.prev; + + while (cache->fci_cache_count + cache->fci_threshold > + cache->fci_cache_size && curr != &cache->fci_lru) { + + flde = list_entry(curr, struct fld_cache_entry, fce_lru); + curr = curr->prev; + fld_cache_entry_delete(cache, flde); + num++; + } + + CDEBUG(D_INFO, "%s: FLD cache - Shrunk by %d entries\n", + cache->fci_name, num); + + return 0; +} + +/** + * kill all fld cache entries. + */ +void fld_cache_flush(struct fld_cache *cache) +{ + write_lock(&cache->fci_lock); + cache->fci_cache_size = 0; + fld_cache_shrink(cache); + write_unlock(&cache->fci_lock); +} + +/** + * punch hole in existing range. divide this range and add new + * entry accordingly. + */ + +static void fld_cache_punch_hole(struct fld_cache *cache, + struct fld_cache_entry *f_curr, + struct fld_cache_entry *f_new) +{ + const struct lu_seq_range *range = &f_new->fce_range; + const u64 new_start = range->lsr_start; + const u64 new_end = range->lsr_end; + struct fld_cache_entry *fldt; + + OBD_ALLOC_GFP(fldt, sizeof(*fldt), GFP_ATOMIC); + if (!fldt) { + OBD_FREE_PTR(f_new); + /* overlap is not allowed, so dont mess up list. */ + return; + } + /* break f_curr RANGE into three RANGES: + * f_curr, f_new , fldt + */ + + /* f_new = *range */ + + /* fldt */ + fldt->fce_range.lsr_start = new_end; + fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end; + fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index; + + /* f_curr */ + f_curr->fce_range.lsr_end = new_start; + + /* add these two entries to list */ + fld_cache_entry_add(cache, f_new, &f_curr->fce_list); + fld_cache_entry_add(cache, fldt, &f_new->fce_list); + + /* no need to fixup */ +} + +/** + * handle range overlap in fld cache. + */ +static void fld_cache_overlap_handle(struct fld_cache *cache, + struct fld_cache_entry *f_curr, + struct fld_cache_entry *f_new) +{ + const struct lu_seq_range *range = &f_new->fce_range; + const u64 new_start = range->lsr_start; + const u64 new_end = range->lsr_end; + const u32 mdt = range->lsr_index; + + /* this is overlap case, these case are checking overlapping with + * prev range only. fixup will handle overlapping with next range. */ + + if (f_curr->fce_range.lsr_index == mdt) { + f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start, + new_start); + + f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end, + new_end); + + OBD_FREE_PTR(f_new); + fld_fix_new_list(cache); + + } else if (new_start <= f_curr->fce_range.lsr_start && + f_curr->fce_range.lsr_end <= new_end) { + /* case 1: new range completely overshadowed existing range. + * e.g. whole range migrated. update fld cache entry */ + + f_curr->fce_range = *range; + OBD_FREE_PTR(f_new); + fld_fix_new_list(cache); + + } else if (f_curr->fce_range.lsr_start < new_start && + new_end < f_curr->fce_range.lsr_end) { + /* case 2: new range fit within existing range. */ + + fld_cache_punch_hole(cache, f_curr, f_new); + + } else if (new_end <= f_curr->fce_range.lsr_end) { + /* case 3: overlap: + * [new_start [c_start new_end) c_end) + */ + + LASSERT(new_start <= f_curr->fce_range.lsr_start); + + f_curr->fce_range.lsr_start = new_end; + fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev); + + } else if (f_curr->fce_range.lsr_start <= new_start) { + /* case 4: overlap: + * [c_start [new_start c_end) new_end) + */ + + LASSERT(f_curr->fce_range.lsr_end <= new_end); + + f_curr->fce_range.lsr_end = new_start; + fld_cache_entry_add(cache, f_new, &f_curr->fce_list); + } else + CERROR("NEW range ="DRANGE" curr = "DRANGE"\n", + PRANGE(range), PRANGE(&f_curr->fce_range)); +} + +struct fld_cache_entry +*fld_cache_entry_create(const struct lu_seq_range *range) +{ + struct fld_cache_entry *f_new; + + LASSERT(range_is_sane(range)); + + OBD_ALLOC_PTR(f_new); + if (!f_new) + return ERR_PTR(-ENOMEM); + + f_new->fce_range = *range; + return f_new; +} + +/** + * Insert FLD entry in FLD cache. + * + * This function handles all cases of merging and breaking up of + * ranges. + */ +int fld_cache_insert_nolock(struct fld_cache *cache, + struct fld_cache_entry *f_new) +{ + struct fld_cache_entry *f_curr; + struct fld_cache_entry *n; + struct list_head *head; + struct list_head *prev = NULL; + const u64 new_start = f_new->fce_range.lsr_start; + const u64 new_end = f_new->fce_range.lsr_end; + __u32 new_flags = f_new->fce_range.lsr_flags; + + /* + * Duplicate entries are eliminated in insert op. + * So we don't need to search new entry before starting + * insertion loop. + */ + + if (!cache->fci_no_shrink) + fld_cache_shrink(cache); + + head = &cache->fci_entries_head; + + list_for_each_entry_safe(f_curr, n, head, fce_list) { + /* add list if next is end of list */ + if (new_end < f_curr->fce_range.lsr_start || + (new_end == f_curr->fce_range.lsr_start && + new_flags != f_curr->fce_range.lsr_flags)) + break; + + prev = &f_curr->fce_list; + /* check if this range is to left of new range. */ + if (new_start < f_curr->fce_range.lsr_end && + new_flags == f_curr->fce_range.lsr_flags) { + fld_cache_overlap_handle(cache, f_curr, f_new); + goto out; + } + } + + if (prev == NULL) + prev = head; + + CDEBUG(D_INFO, "insert range "DRANGE"\n", PRANGE(&f_new->fce_range)); + /* Add new entry to cache and lru list. */ + fld_cache_entry_add(cache, f_new, prev); +out: + return 0; +} + +int fld_cache_insert(struct fld_cache *cache, + const struct lu_seq_range *range) +{ + struct fld_cache_entry *flde; + int rc; + + flde = fld_cache_entry_create(range); + if (IS_ERR(flde)) + return PTR_ERR(flde); + + write_lock(&cache->fci_lock); + rc = fld_cache_insert_nolock(cache, flde); + write_unlock(&cache->fci_lock); + if (rc) + OBD_FREE_PTR(flde); + + return rc; +} + +void fld_cache_delete_nolock(struct fld_cache *cache, + const struct lu_seq_range *range) +{ + struct fld_cache_entry *flde; + struct fld_cache_entry *tmp; + struct list_head *head; + + head = &cache->fci_entries_head; + list_for_each_entry_safe(flde, tmp, head, fce_list) { + /* add list if next is end of list */ + if (range->lsr_start == flde->fce_range.lsr_start || + (range->lsr_end == flde->fce_range.lsr_end && + range->lsr_flags == flde->fce_range.lsr_flags)) { + fld_cache_entry_delete(cache, flde); + break; + } + } +} + +/** + * Delete FLD entry in FLD cache. + * + */ +void fld_cache_delete(struct fld_cache *cache, + const struct lu_seq_range *range) +{ + write_lock(&cache->fci_lock); + fld_cache_delete_nolock(cache, range); + write_unlock(&cache->fci_lock); +} + +struct fld_cache_entry +*fld_cache_entry_lookup_nolock(struct fld_cache *cache, + struct lu_seq_range *range) +{ + struct fld_cache_entry *flde; + struct fld_cache_entry *got = NULL; + struct list_head *head; + + head = &cache->fci_entries_head; + list_for_each_entry(flde, head, fce_list) { + if (range->lsr_start == flde->fce_range.lsr_start || + (range->lsr_end == flde->fce_range.lsr_end && + range->lsr_flags == flde->fce_range.lsr_flags)) { + got = flde; + break; + } + } + + return got; +} + +/** + * lookup \a seq sequence for range in fld cache. + */ +struct fld_cache_entry +*fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range) +{ + struct fld_cache_entry *got = NULL; + + read_lock(&cache->fci_lock); + got = fld_cache_entry_lookup_nolock(cache, range); + read_unlock(&cache->fci_lock); + return got; +} + +/** + * lookup \a seq sequence for range in fld cache. + */ +int fld_cache_lookup(struct fld_cache *cache, + const u64 seq, struct lu_seq_range *range) +{ + struct fld_cache_entry *flde; + struct fld_cache_entry *prev = NULL; + struct list_head *head; + + read_lock(&cache->fci_lock); + head = &cache->fci_entries_head; + + cache->fci_stat.fst_count++; + list_for_each_entry(flde, head, fce_list) { + if (flde->fce_range.lsr_start > seq) { + if (prev != NULL) + *range = prev->fce_range; + break; + } + + prev = flde; + if (range_within(&flde->fce_range, seq)) { + *range = flde->fce_range; + + cache->fci_stat.fst_cache++; + read_unlock(&cache->fci_lock); + return 0; + } + } + read_unlock(&cache->fci_lock); + return -ENOENT; +} diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h new file mode 100644 index 000000000..68bec7658 --- /dev/null +++ b/drivers/staging/lustre/lustre/fld/fld_internal.h @@ -0,0 +1,193 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2013, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/fld/fld_internal.h + * + * Author: Yury Umanets <umka@clusterfs.com> + * Author: Tom WangDi <wangdi@clusterfs.com> + */ +#ifndef __FLD_INTERNAL_H +#define __FLD_INTERNAL_H + +#include "../include/lustre/lustre_idl.h" +#include "../include/dt_object.h" + +#include "../../include/linux/libcfs/libcfs.h" +#include "../include/lustre_req_layout.h" +#include "../include/lustre_fld.h" + +enum { + LUSTRE_FLD_INIT = 1 << 0, + LUSTRE_FLD_RUN = 1 << 1 +}; + +struct fld_stats { + __u64 fst_count; + __u64 fst_cache; + __u64 fst_inflight; +}; + +typedef int (*fld_hash_func_t) (struct lu_client_fld *, __u64); + +typedef struct lu_fld_target * +(*fld_scan_func_t) (struct lu_client_fld *, __u64); + +struct lu_fld_hash { + const char *fh_name; + fld_hash_func_t fh_hash_func; + fld_scan_func_t fh_scan_func; +}; + +struct fld_cache_entry { + struct list_head fce_lru; + struct list_head fce_list; + /** + * fld cache entries are sorted on range->lsr_start field. */ + struct lu_seq_range fce_range; +}; + +struct fld_cache { + /** + * Cache guard, protects fci_hash mostly because others immutable after + * init is finished. + */ + rwlock_t fci_lock; + + /** + * Cache shrink threshold */ + int fci_threshold; + + /** + * Preferred number of cached entries */ + int fci_cache_size; + + /** + * Current number of cached entries. Protected by \a fci_lock */ + int fci_cache_count; + + /** + * LRU list fld entries. */ + struct list_head fci_lru; + + /** + * sorted fld entries. */ + struct list_head fci_entries_head; + + /** + * Cache statistics. */ + struct fld_stats fci_stat; + + /** + * Cache name used for debug and messages. */ + char fci_name[LUSTRE_MDT_MAXNAMELEN]; + unsigned int fci_no_shrink:1; +}; + +enum fld_op { + FLD_CREATE = 0, + FLD_DELETE = 1, + FLD_LOOKUP = 2 +}; + +enum { + /* 4M of FLD cache will not hurt client a lot. */ + FLD_SERVER_CACHE_SIZE = (4 * 0x100000), + + /* 1M of FLD cache will not hurt client a lot. */ + FLD_CLIENT_CACHE_SIZE = (1 * 0x100000) +}; + +enum { + /* Cache threshold is 10 percent of size. */ + FLD_SERVER_CACHE_THRESHOLD = 10, + + /* Cache threshold is 10 percent of size. */ + FLD_CLIENT_CACHE_THRESHOLD = 10 +}; + +extern struct lu_fld_hash fld_hash[]; + +int fld_client_rpc(struct obd_export *exp, + struct lu_seq_range *range, __u32 fld_op); + +#if defined(CONFIG_PROC_FS) +extern struct lprocfs_vars fld_client_proc_list[]; +#endif + + +struct fld_cache *fld_cache_init(const char *name, + int cache_size, int cache_threshold); + +void fld_cache_fini(struct fld_cache *cache); + +void fld_cache_flush(struct fld_cache *cache); + +int fld_cache_insert(struct fld_cache *cache, + const struct lu_seq_range *range); + +struct fld_cache_entry +*fld_cache_entry_create(const struct lu_seq_range *range); + +int fld_cache_insert_nolock(struct fld_cache *cache, + struct fld_cache_entry *f_new); +void fld_cache_delete(struct fld_cache *cache, + const struct lu_seq_range *range); +void fld_cache_delete_nolock(struct fld_cache *cache, + const struct lu_seq_range *range); +int fld_cache_lookup(struct fld_cache *cache, + const u64 seq, struct lu_seq_range *range); + +struct fld_cache_entry* +fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range); +void fld_cache_entry_delete(struct fld_cache *cache, + struct fld_cache_entry *node); +void fld_dump_cache_entries(struct fld_cache *cache); + +struct fld_cache_entry +*fld_cache_entry_lookup_nolock(struct fld_cache *cache, + struct lu_seq_range *range); +int fld_write_range(const struct lu_env *env, struct dt_object *dt, + const struct lu_seq_range *range, struct thandle *th); + +static inline const char * +fld_target_name(struct lu_fld_target *tar) +{ + if (tar->ft_srv != NULL) + return tar->ft_srv->lsf_name; + + return (const char *)tar->ft_exp->exp_obd->obd_name; +} + +#endif /* __FLD_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c new file mode 100644 index 000000000..6ac225e90 --- /dev/null +++ b/drivers/staging/lustre/lustre/fld/fld_request.c @@ -0,0 +1,526 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2013, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/fld/fld_request.c + * + * FLD (Fids Location Database) + * + * Author: Yury Umanets <umka@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_FLD + +#include "../../include/linux/libcfs/libcfs.h" +#include <linux/module.h> +#include <asm/div64.h> + +#include "../include/obd.h" +#include "../include/obd_class.h" +#include "../include/lustre_ver.h" +#include "../include/obd_support.h" +#include "../include/lprocfs_status.h" + +#include "../include/dt_object.h" +#include "../include/lustre_req_layout.h" +#include "../include/lustre_fld.h" +#include "../include/lustre_mdc.h" +#include "fld_internal.h" + +/* TODO: these 3 functions are copies of flow-control code from mdc_lib.c + * It should be common thing. The same about mdc RPC lock */ +static int fld_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw) +{ + int rc; + + client_obd_list_lock(&cli->cl_loi_list_lock); + rc = list_empty(&mcw->mcw_entry); + client_obd_list_unlock(&cli->cl_loi_list_lock); + return rc; +}; + +static void fld_enter_request(struct client_obd *cli) +{ + struct mdc_cache_waiter mcw; + struct l_wait_info lwi = { 0 }; + + client_obd_list_lock(&cli->cl_loi_list_lock); + if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { + list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters); + init_waitqueue_head(&mcw.mcw_waitq); + client_obd_list_unlock(&cli->cl_loi_list_lock); + l_wait_event(mcw.mcw_waitq, fld_req_avail(cli, &mcw), &lwi); + } else { + cli->cl_r_in_flight++; + client_obd_list_unlock(&cli->cl_loi_list_lock); + } +} + +static void fld_exit_request(struct client_obd *cli) +{ + struct list_head *l, *tmp; + struct mdc_cache_waiter *mcw; + + client_obd_list_lock(&cli->cl_loi_list_lock); + cli->cl_r_in_flight--; + list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { + + if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { + /* No free request slots anymore */ + break; + } + + mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry); + list_del_init(&mcw->mcw_entry); + cli->cl_r_in_flight++; + wake_up(&mcw->mcw_waitq); + } + client_obd_list_unlock(&cli->cl_loi_list_lock); +} + +static int fld_rrb_hash(struct lu_client_fld *fld, u64 seq) +{ + LASSERT(fld->lcf_count > 0); + return do_div(seq, fld->lcf_count); +} + +static struct lu_fld_target * +fld_rrb_scan(struct lu_client_fld *fld, u64 seq) +{ + struct lu_fld_target *target; + int hash; + + /* Because almost all of special sequence located in MDT0, + * it should go to index 0 directly, instead of calculating + * hash again, and also if other MDTs is not being connected, + * the fld lookup requests(for seq on MDT0) should not be + * blocked because of other MDTs */ + if (fid_seq_is_norm(seq)) + hash = fld_rrb_hash(fld, seq); + else + hash = 0; + +again: + list_for_each_entry(target, &fld->lcf_targets, ft_chain) { + if (target->ft_idx == hash) + return target; + } + + if (hash != 0) { + /* It is possible the remote target(MDT) are not connected to + * with client yet, so we will refer this to MDT0, which should + * be connected during mount */ + hash = 0; + goto again; + } + + CERROR("%s: Can't find target by hash %d (seq %#llx). Targets (%d):\n", + fld->lcf_name, hash, seq, fld->lcf_count); + + list_for_each_entry(target, &fld->lcf_targets, ft_chain) { + const char *srv_name = target->ft_srv != NULL ? + target->ft_srv->lsf_name : "<null>"; + const char *exp_name = target->ft_exp != NULL ? + (char *)target->ft_exp->exp_obd->obd_uuid.uuid : + "<null>"; + + CERROR(" exp: 0x%p (%s), srv: 0x%p (%s), idx: %llu\n", + target->ft_exp, exp_name, target->ft_srv, + srv_name, target->ft_idx); + } + + /* + * If target is not found, there is logical error anyway, so here is + * LBUG() to catch this situation. + */ + LBUG(); + return NULL; +} + +struct lu_fld_hash fld_hash[] = { + { + .fh_name = "RRB", + .fh_hash_func = fld_rrb_hash, + .fh_scan_func = fld_rrb_scan + }, + { + NULL, + } +}; + +static struct lu_fld_target * +fld_client_get_target(struct lu_client_fld *fld, u64 seq) +{ + struct lu_fld_target *target; + + LASSERT(fld->lcf_hash != NULL); + + spin_lock(&fld->lcf_lock); + target = fld->lcf_hash->fh_scan_func(fld, seq); + spin_unlock(&fld->lcf_lock); + + if (target != NULL) { + CDEBUG(D_INFO, "%s: Found target (idx %llu) by seq %#llx\n", + fld->lcf_name, target->ft_idx, seq); + } + + return target; +} + +/* + * Add export to FLD. This is usually done by CMM and LMV as they are main users + * of FLD module. + */ +int fld_client_add_target(struct lu_client_fld *fld, + struct lu_fld_target *tar) +{ + const char *name; + struct lu_fld_target *target, *tmp; + + LASSERT(tar != NULL); + name = fld_target_name(tar); + LASSERT(name != NULL); + LASSERT(tar->ft_srv != NULL || tar->ft_exp != NULL); + + if (fld->lcf_flags != LUSTRE_FLD_INIT) { + CERROR("%s: Attempt to add target %s (idx %llu) on fly - skip it\n", + fld->lcf_name, name, tar->ft_idx); + return 0; + } + CDEBUG(D_INFO, "%s: Adding target %s (idx %llu)\n", + fld->lcf_name, name, tar->ft_idx); + + OBD_ALLOC_PTR(target); + if (target == NULL) + return -ENOMEM; + + spin_lock(&fld->lcf_lock); + list_for_each_entry(tmp, &fld->lcf_targets, ft_chain) { + if (tmp->ft_idx == tar->ft_idx) { + spin_unlock(&fld->lcf_lock); + OBD_FREE_PTR(target); + CERROR("Target %s exists in FLD and known as %s:#%llu\n", + name, fld_target_name(tmp), tmp->ft_idx); + return -EEXIST; + } + } + + target->ft_exp = tar->ft_exp; + if (target->ft_exp != NULL) + class_export_get(target->ft_exp); + target->ft_srv = tar->ft_srv; + target->ft_idx = tar->ft_idx; + + list_add_tail(&target->ft_chain, + &fld->lcf_targets); + + fld->lcf_count++; + spin_unlock(&fld->lcf_lock); + + return 0; +} +EXPORT_SYMBOL(fld_client_add_target); + +/* Remove export from FLD */ +int fld_client_del_target(struct lu_client_fld *fld, __u64 idx) +{ + struct lu_fld_target *target, *tmp; + + spin_lock(&fld->lcf_lock); + list_for_each_entry_safe(target, tmp, + &fld->lcf_targets, ft_chain) { + if (target->ft_idx == idx) { + fld->lcf_count--; + list_del(&target->ft_chain); + spin_unlock(&fld->lcf_lock); + + if (target->ft_exp != NULL) + class_export_put(target->ft_exp); + + OBD_FREE_PTR(target); + return 0; + } + } + spin_unlock(&fld->lcf_lock); + return -ENOENT; +} +EXPORT_SYMBOL(fld_client_del_target); + +static struct proc_dir_entry *fld_type_proc_dir; + +#if defined(CONFIG_PROC_FS) +static int fld_client_proc_init(struct lu_client_fld *fld) +{ + int rc; + + fld->lcf_proc_dir = lprocfs_register(fld->lcf_name, + fld_type_proc_dir, + NULL, NULL); + + if (IS_ERR(fld->lcf_proc_dir)) { + CERROR("%s: LProcFS failed in fld-init\n", + fld->lcf_name); + rc = PTR_ERR(fld->lcf_proc_dir); + return rc; + } + + rc = lprocfs_add_vars(fld->lcf_proc_dir, + fld_client_proc_list, fld); + if (rc) { + CERROR("%s: Can't init FLD proc, rc %d\n", + fld->lcf_name, rc); + goto out_cleanup; + } + + return 0; + +out_cleanup: + fld_client_proc_fini(fld); + return rc; +} + +void fld_client_proc_fini(struct lu_client_fld *fld) +{ + if (fld->lcf_proc_dir) { + if (!IS_ERR(fld->lcf_proc_dir)) + lprocfs_remove(&fld->lcf_proc_dir); + fld->lcf_proc_dir = NULL; + } +} +#else +static int fld_client_proc_init(struct lu_client_fld *fld) +{ + return 0; +} + +void fld_client_proc_fini(struct lu_client_fld *fld) +{ +} +#endif +EXPORT_SYMBOL(fld_client_proc_fini); + +static inline int hash_is_sane(int hash) +{ + return (hash >= 0 && hash < ARRAY_SIZE(fld_hash)); +} + +int fld_client_init(struct lu_client_fld *fld, + const char *prefix, int hash) +{ + int cache_size, cache_threshold; + int rc; + + LASSERT(fld != NULL); + + snprintf(fld->lcf_name, sizeof(fld->lcf_name), + "cli-%s", prefix); + + if (!hash_is_sane(hash)) { + CERROR("%s: Wrong hash function %#x\n", + fld->lcf_name, hash); + return -EINVAL; + } + + fld->lcf_count = 0; + spin_lock_init(&fld->lcf_lock); + fld->lcf_hash = &fld_hash[hash]; + fld->lcf_flags = LUSTRE_FLD_INIT; + INIT_LIST_HEAD(&fld->lcf_targets); + + cache_size = FLD_CLIENT_CACHE_SIZE / + sizeof(struct fld_cache_entry); + + cache_threshold = cache_size * + FLD_CLIENT_CACHE_THRESHOLD / 100; + + fld->lcf_cache = fld_cache_init(fld->lcf_name, + cache_size, cache_threshold); + if (IS_ERR(fld->lcf_cache)) { + rc = PTR_ERR(fld->lcf_cache); + fld->lcf_cache = NULL; + goto out; + } + + rc = fld_client_proc_init(fld); + if (rc) + goto out; +out: + if (rc) + fld_client_fini(fld); + else + CDEBUG(D_INFO, "%s: Using \"%s\" hash\n", + fld->lcf_name, fld->lcf_hash->fh_name); + return rc; +} +EXPORT_SYMBOL(fld_client_init); + +void fld_client_fini(struct lu_client_fld *fld) +{ + struct lu_fld_target *target, *tmp; + + spin_lock(&fld->lcf_lock); + list_for_each_entry_safe(target, tmp, + &fld->lcf_targets, ft_chain) { + fld->lcf_count--; + list_del(&target->ft_chain); + if (target->ft_exp != NULL) + class_export_put(target->ft_exp); + OBD_FREE_PTR(target); + } + spin_unlock(&fld->lcf_lock); + + if (fld->lcf_cache != NULL) { + if (!IS_ERR(fld->lcf_cache)) + fld_cache_fini(fld->lcf_cache); + fld->lcf_cache = NULL; + } +} +EXPORT_SYMBOL(fld_client_fini); + +int fld_client_rpc(struct obd_export *exp, + struct lu_seq_range *range, __u32 fld_op) +{ + struct ptlrpc_request *req; + struct lu_seq_range *prange; + __u32 *op; + int rc; + struct obd_import *imp; + + LASSERT(exp != NULL); + + imp = class_exp2cliimp(exp); + req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, LUSTRE_MDS_VERSION, + FLD_QUERY); + if (req == NULL) + return -ENOMEM; + + op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC); + *op = fld_op; + + prange = req_capsule_client_get(&req->rq_pill, &RMF_FLD_MDFLD); + *prange = *range; + + ptlrpc_request_set_replen(req); + req->rq_request_portal = FLD_REQUEST_PORTAL; + req->rq_reply_portal = MDC_REPLY_PORTAL; + ptlrpc_at_set_req_timeout(req); + + if (fld_op == FLD_LOOKUP && + imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS) + req->rq_allow_replay = 1; + + if (fld_op != FLD_LOOKUP) + mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); + fld_enter_request(&exp->exp_obd->u.cli); + rc = ptlrpc_queue_wait(req); + fld_exit_request(&exp->exp_obd->u.cli); + if (fld_op != FLD_LOOKUP) + mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); + if (rc) + goto out_req; + + prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD); + if (prange == NULL) { + rc = -EFAULT; + goto out_req; + } + *range = *prange; +out_req: + ptlrpc_req_finished(req); + return rc; +} + +int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds, + __u32 flags, const struct lu_env *env) +{ + struct lu_seq_range res = { 0 }; + struct lu_fld_target *target; + int rc; + + fld->lcf_flags |= LUSTRE_FLD_RUN; + + rc = fld_cache_lookup(fld->lcf_cache, seq, &res); + if (rc == 0) { + *mds = res.lsr_index; + return 0; + } + + /* Can not find it in the cache */ + target = fld_client_get_target(fld, seq); + LASSERT(target != NULL); + + CDEBUG(D_INFO, "%s: Lookup fld entry (seq: %#llx) on target %s (idx %llu)\n", + fld->lcf_name, seq, fld_target_name(target), target->ft_idx); + + res.lsr_start = seq; + fld_range_set_type(&res, flags); + rc = fld_client_rpc(target->ft_exp, &res, FLD_LOOKUP); + + if (rc == 0) { + *mds = res.lsr_index; + + fld_cache_insert(fld->lcf_cache, &res); + } + return rc; +} +EXPORT_SYMBOL(fld_client_lookup); + +void fld_client_flush(struct lu_client_fld *fld) +{ + fld_cache_flush(fld->lcf_cache); +} +EXPORT_SYMBOL(fld_client_flush); + +static int __init fld_mod_init(void) +{ + fld_type_proc_dir = lprocfs_register(LUSTRE_FLD_NAME, + proc_lustre_root, + NULL, NULL); + return PTR_ERR_OR_ZERO(fld_type_proc_dir); +} + +static void __exit fld_mod_exit(void) +{ + if (fld_type_proc_dir != NULL && !IS_ERR(fld_type_proc_dir)) { + lprocfs_remove(&fld_type_proc_dir); + fld_type_proc_dir = NULL; + } +} + +MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>"); +MODULE_DESCRIPTION("Lustre FLD"); +MODULE_LICENSE("GPL"); + +module_init(fld_mod_init) +module_exit(fld_mod_exit) diff --git a/drivers/staging/lustre/lustre/fld/lproc_fld.c b/drivers/staging/lustre/lustre/fld/lproc_fld.c new file mode 100644 index 000000000..f53fdcfae --- /dev/null +++ b/drivers/staging/lustre/lustre/fld/lproc_fld.c @@ -0,0 +1,172 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2013, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/fld/lproc_fld.c + * + * FLD (FIDs Location Database) + * + * Author: Yury Umanets <umka@clusterfs.com> + * Di Wang <di.wang@whamcloud.com> + */ + +#define DEBUG_SUBSYSTEM S_FLD + +#include "../../include/linux/libcfs/libcfs.h" +#include <linux/module.h> + +#include "../include/obd.h" +#include "../include/obd_class.h" +#include "../include/dt_object.h" +#include "../include/obd_support.h" +#include "../include/lustre_req_layout.h" +#include "../include/lustre_fld.h" +#include "../include/lustre_fid.h" +#include "fld_internal.h" + +static int +fld_proc_targets_seq_show(struct seq_file *m, void *unused) +{ + struct lu_client_fld *fld = (struct lu_client_fld *)m->private; + struct lu_fld_target *target; + + LASSERT(fld != NULL); + + spin_lock(&fld->lcf_lock); + list_for_each_entry(target, + &fld->lcf_targets, ft_chain) + seq_printf(m, "%s\n", fld_target_name(target)); + spin_unlock(&fld->lcf_lock); + + return 0; +} + +static int +fld_proc_hash_seq_show(struct seq_file *m, void *unused) +{ + struct lu_client_fld *fld = (struct lu_client_fld *)m->private; + + LASSERT(fld != NULL); + + spin_lock(&fld->lcf_lock); + seq_printf(m, "%s\n", fld->lcf_hash->fh_name); + spin_unlock(&fld->lcf_lock); + + return 0; +} + +static ssize_t +fld_proc_hash_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) +{ + struct lu_client_fld *fld; + struct lu_fld_hash *hash = NULL; + char fh_name[8]; + int i; + + if (count > sizeof(fh_name)) + return -ENAMETOOLONG; + + if (copy_from_user(fh_name, buffer, count) != 0) + return -EFAULT; + + fld = ((struct seq_file *)file->private_data)->private; + LASSERT(fld != NULL); + + for (i = 0; fld_hash[i].fh_name != NULL; i++) { + if (count != strlen(fld_hash[i].fh_name)) + continue; + + if (!strncmp(fld_hash[i].fh_name, fh_name, count)) { + hash = &fld_hash[i]; + break; + } + } + + if (hash != NULL) { + spin_lock(&fld->lcf_lock); + fld->lcf_hash = hash; + spin_unlock(&fld->lcf_lock); + + CDEBUG(D_INFO, "%s: Changed hash to \"%s\"\n", + fld->lcf_name, hash->fh_name); + } + + return count; +} + +static ssize_t +fld_proc_cache_flush_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) +{ + struct lu_client_fld *fld = file->private_data; + + LASSERT(fld != NULL); + + fld_cache_flush(fld->lcf_cache); + + CDEBUG(D_INFO, "%s: Lookup cache is flushed\n", fld->lcf_name); + + return count; +} + +static int fld_proc_cache_flush_open(struct inode *inode, struct file *file) +{ + file->private_data = PDE_DATA(inode); + return 0; +} + +static int fld_proc_cache_flush_release(struct inode *inode, struct file *file) +{ + file->private_data = NULL; + return 0; +} + +static struct file_operations fld_proc_cache_flush_fops = { + .owner = THIS_MODULE, + .open = fld_proc_cache_flush_open, + .write = fld_proc_cache_flush_write, + .release = fld_proc_cache_flush_release, +}; + +LPROC_SEQ_FOPS_RO(fld_proc_targets); +LPROC_SEQ_FOPS(fld_proc_hash); + +struct lprocfs_vars fld_client_proc_list[] = { + { "targets", &fld_proc_targets_fops }, + { "hash", &fld_proc_hash_fops }, + { "cache_flush", &fld_proc_cache_flush_fops }, + { NULL } +}; |