diff options
Diffstat (limited to 'include/linux/sunrpc')
30 files changed, 4996 insertions, 0 deletions
diff --git a/include/linux/sunrpc/addr.h b/include/linux/sunrpc/addr.h new file mode 100644 index 000000000..07d8e53be --- /dev/null +++ b/include/linux/sunrpc/addr.h @@ -0,0 +1,170 @@ +/* + * linux/include/linux/sunrpc/addr.h + * + * Various routines for copying and comparing sockaddrs and for + * converting them to and from presentation format. + */ +#ifndef _LINUX_SUNRPC_ADDR_H +#define _LINUX_SUNRPC_ADDR_H + +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <net/ipv6.h> + +size_t rpc_ntop(const struct sockaddr *, char *, const size_t); +size_t rpc_pton(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); +char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); +size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); + +static inline unsigned short rpc_get_port(const struct sockaddr *sap) +{ + switch (sap->sa_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *)sap)->sin_port); + case AF_INET6: + return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + } + return 0; +} + +static inline void rpc_set_port(struct sockaddr *sap, + const unsigned short port) +{ + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + break; + } +} + +#define IPV6_SCOPE_DELIMITER '%' +#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") + +static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; + + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +static inline bool __rpc_copy_addr4(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in *ssin = (struct sockaddr_in *) src; + struct sockaddr_in *dsin = (struct sockaddr_in *) dst; + + dsin->sin_family = ssin->sin_family; + dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; + return true; +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; + + if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) + return false; + else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) + return sin1->sin6_scope_id == sin2->sin6_scope_id; + + return true; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; + struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; + + dsin6->sin6_family = ssin6->sin6_family; + dsin6->sin6_addr = ssin6->sin6_addr; + dsin6->sin6_scope_id = ssin6->sin6_scope_id; + return true; +} +#else /* !(IS_ENABLED(CONFIG_IPV6) */ +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + return false; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + return false; +} +#endif /* !(IS_ENABLED(CONFIG_IPV6) */ + +/** + * rpc_cmp_addr - compare the address portion of two sockaddrs. + * @sap1: first sockaddr + * @sap2: second sockaddr + * + * Just compares the family and address portion. Ignores port, but + * compares the scope if it's a link-local address. + * + * Returns true if the addrs are equal, false if they aren't. + */ +static inline bool rpc_cmp_addr(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + if (sap1->sa_family == sap2->sa_family) { + switch (sap1->sa_family) { + case AF_INET: + return __rpc_cmp_addr4(sap1, sap2); + case AF_INET6: + return __rpc_cmp_addr6(sap1, sap2); + } + } + return false; +} + +/** + * rpc_copy_addr - copy the address portion of one sockaddr to another + * @dst: destination sockaddr + * @src: source sockaddr + * + * Just copies the address portion and family. Ignores port, scope, etc. + * Caller is responsible for making certain that dst is large enough to hold + * the address in src. Returns true if address family is supported. Returns + * false otherwise. + */ +static inline bool rpc_copy_addr(struct sockaddr *dst, + const struct sockaddr *src) +{ + switch (src->sa_family) { + case AF_INET: + return __rpc_copy_addr4(dst, src); + case AF_INET6: + return __rpc_copy_addr6(dst, src); + } + return false; +} + +/** + * rpc_get_scope_id - return scopeid for a given sockaddr + * @sa: sockaddr to get scopeid from + * + * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if + * not an AF_INET6 address. + */ +static inline u32 rpc_get_scope_id(const struct sockaddr *sa) +{ + if (sa->sa_family != AF_INET6) + return 0; + + return ((struct sockaddr_in6 *) sa)->sin6_scope_id; +} + +#endif /* _LINUX_SUNRPC_ADDR_H */ diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h new file mode 100644 index 000000000..a7cbb570c --- /dev/null +++ b/include/linux/sunrpc/auth.h @@ -0,0 +1,198 @@ +/* + * linux/include/linux/sunrpc/auth.h + * + * Declarations for the RPC client authentication machinery. + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef _LINUX_SUNRPC_AUTH_H +#define _LINUX_SUNRPC_AUTH_H + +#ifdef __KERNEL__ + +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/msg_prot.h> +#include <linux/sunrpc/xdr.h> + +#include <linux/atomic.h> +#include <linux/rcupdate.h> +#include <linux/uidgid.h> + +/* size of the nodename buffer */ +#define UNX_MAXNODENAME 32 + +struct rpcsec_gss_info; + +/* auth_cred ac_flags bits */ +enum { + RPC_CRED_NO_CRKEY_TIMEOUT = 0, /* underlying cred has no key timeout */ + RPC_CRED_KEY_EXPIRE_SOON = 1, /* underlying cred key will expire soon */ + RPC_CRED_NOTIFY_TIMEOUT = 2, /* nofity generic cred when underlying + key will expire soon */ +}; + +/* Work around the lack of a VFS credential */ +struct auth_cred { + kuid_t uid; + kgid_t gid; + struct group_info *group_info; + const char *principal; + unsigned long ac_flags; + unsigned char machine_cred : 1; +}; + +/* + * Client user credentials + */ +struct rpc_auth; +struct rpc_credops; +struct rpc_cred { + struct hlist_node cr_hash; /* hash chain */ + struct list_head cr_lru; /* lru garbage collection */ + struct rcu_head cr_rcu; + struct rpc_auth * cr_auth; + const struct rpc_credops *cr_ops; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + unsigned long cr_magic; /* 0x0f4aa4f0 */ +#endif + unsigned long cr_expire; /* when to gc */ + unsigned long cr_flags; /* various flags */ + atomic_t cr_count; /* ref count */ + + kuid_t cr_uid; + + /* per-flavor data */ +}; +#define RPCAUTH_CRED_NEW 0 +#define RPCAUTH_CRED_UPTODATE 1 +#define RPCAUTH_CRED_HASHED 2 +#define RPCAUTH_CRED_NEGATIVE 3 + +#define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 + +/* + * Client authentication handle + */ +struct rpc_cred_cache; +struct rpc_authops; +struct rpc_auth { + unsigned int au_cslack; /* call cred size estimate */ + /* guess at number of u32's auth adds before + * reply data; normally the verifier size: */ + unsigned int au_rslack; + /* for gss, used to calculate au_rslack: */ + unsigned int au_verfsize; + + unsigned int au_flags; /* various flags */ + const struct rpc_authops *au_ops; /* operations */ + rpc_authflavor_t au_flavor; /* pseudoflavor (note may + * differ from the flavor in + * au_ops->au_flavor in gss + * case) */ + atomic_t au_count; /* Reference counter */ + + struct rpc_cred_cache * au_credcache; + /* per-flavor data */ +}; + +struct rpc_auth_create_args { + rpc_authflavor_t pseudoflavor; + const char *target_name; +}; + +/* Flags for rpcauth_lookupcred() */ +#define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */ +#define RPCAUTH_LOOKUP_RCU 0x02 /* lock-less lookup */ + +/* + * Client authentication ops + */ +struct rpc_authops { + struct module *owner; + rpc_authflavor_t au_flavor; /* flavor (RPC_AUTH_*) */ + char * au_name; + struct rpc_auth * (*create)(struct rpc_auth_create_args *, struct rpc_clnt *); + void (*destroy)(struct rpc_auth *); + + struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); + struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int); + int (*list_pseudoflavors)(rpc_authflavor_t *, int); + rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); + int (*flavor2info)(rpc_authflavor_t, + struct rpcsec_gss_info *); + int (*key_timeout)(struct rpc_auth *, + struct rpc_cred *); +}; + +struct rpc_credops { + const char * cr_name; /* Name of the auth flavour */ + int (*cr_init)(struct rpc_auth *, struct rpc_cred *); + void (*crdestroy)(struct rpc_cred *); + + int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); + struct rpc_cred * (*crbind)(struct rpc_task *, struct rpc_cred *, int); + __be32 * (*crmarshal)(struct rpc_task *, __be32 *); + int (*crrefresh)(struct rpc_task *); + __be32 * (*crvalidate)(struct rpc_task *, __be32 *); + int (*crwrap_req)(struct rpc_task *, kxdreproc_t, + void *, __be32 *, void *); + int (*crunwrap_resp)(struct rpc_task *, kxdrdproc_t, + void *, __be32 *, void *); + int (*crkey_timeout)(struct rpc_cred *); + bool (*crkey_to_expire)(struct rpc_cred *); + char * (*crstringify_acceptor)(struct rpc_cred *); +}; + +extern const struct rpc_authops authunix_ops; +extern const struct rpc_authops authnull_ops; + +int __init rpc_init_authunix(void); +int __init rpc_init_generic_auth(void); +int __init rpcauth_init_module(void); +void rpcauth_remove_module(void); +void rpc_destroy_generic_auth(void); +void rpc_destroy_authunix(void); + +struct rpc_cred * rpc_lookup_cred(void); +struct rpc_cred * rpc_lookup_cred_nonblock(void); +struct rpc_cred * rpc_lookup_machine_cred(const char *service_name); +int rpcauth_register(const struct rpc_authops *); +int rpcauth_unregister(const struct rpc_authops *); +struct rpc_auth * rpcauth_create(struct rpc_auth_create_args *, + struct rpc_clnt *); +void rpcauth_release(struct rpc_auth *); +rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, + struct rpcsec_gss_info *); +int rpcauth_get_gssinfo(rpc_authflavor_t, + struct rpcsec_gss_info *); +int rpcauth_list_flavors(rpc_authflavor_t *, int); +struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); +void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); +struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); +struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int); +void put_rpccred(struct rpc_cred *); +__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); +__be32 * rpcauth_checkverf(struct rpc_task *, __be32 *); +int rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj); +int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj); +int rpcauth_refreshcred(struct rpc_task *); +void rpcauth_invalcred(struct rpc_task *); +int rpcauth_uptodatecred(struct rpc_task *); +int rpcauth_init_credcache(struct rpc_auth *); +void rpcauth_destroy_credcache(struct rpc_auth *); +void rpcauth_clear_credcache(struct rpc_cred_cache *); +int rpcauth_key_timeout_notify(struct rpc_auth *, + struct rpc_cred *); +bool rpcauth_cred_key_to_expire(struct rpc_cred *); +char * rpcauth_stringify_acceptor(struct rpc_cred *); + +static inline +struct rpc_cred * get_rpccred(struct rpc_cred *cred) +{ + atomic_inc(&cred->cr_count); + return cred; +} + +#endif /* __KERNEL__ */ +#endif /* _LINUX_SUNRPC_AUTH_H */ diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h new file mode 100644 index 000000000..36eebc451 --- /dev/null +++ b/include/linux/sunrpc/auth_gss.h @@ -0,0 +1,92 @@ +/* + * linux/include/linux/sunrpc/auth_gss.h + * + * Declarations for RPCSEC_GSS + * + * Dug Song <dugsong@monkey.org> + * Andy Adamson <andros@umich.edu> + * Bruce Fields <bfields@umich.edu> + * Copyright (c) 2000 The Regents of the University of Michigan + */ + +#ifndef _LINUX_SUNRPC_AUTH_GSS_H +#define _LINUX_SUNRPC_AUTH_GSS_H + +#ifdef __KERNEL__ +#include <linux/sunrpc/auth.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/gss_api.h> + +#define RPC_GSS_VERSION 1 + +#define MAXSEQ 0x80000000 /* maximum legal sequence number, from rfc 2203 */ + +enum rpc_gss_proc { + RPC_GSS_PROC_DATA = 0, + RPC_GSS_PROC_INIT = 1, + RPC_GSS_PROC_CONTINUE_INIT = 2, + RPC_GSS_PROC_DESTROY = 3 +}; + +enum rpc_gss_svc { + RPC_GSS_SVC_NONE = 1, + RPC_GSS_SVC_INTEGRITY = 2, + RPC_GSS_SVC_PRIVACY = 3 +}; + +/* on-the-wire gss cred: */ +struct rpc_gss_wire_cred { + u32 gc_v; /* version */ + u32 gc_proc; /* control procedure */ + u32 gc_seq; /* sequence number */ + u32 gc_svc; /* service */ + struct xdr_netobj gc_ctx; /* context handle */ +}; + +/* on-the-wire gss verifier: */ +struct rpc_gss_wire_verf { + u32 gv_flavor; + struct xdr_netobj gv_verf; +}; + +/* return from gss NULL PROC init sec context */ +struct rpc_gss_init_res { + struct xdr_netobj gr_ctx; /* context handle */ + u32 gr_major; /* major status */ + u32 gr_minor; /* minor status */ + u32 gr_win; /* sequence window */ + struct xdr_netobj gr_token; /* token */ +}; + +/* The gss_cl_ctx struct holds all the information the rpcsec_gss client + * code needs to know about a single security context. In particular, + * gc_gss_ctx is the context handle that is used to do gss-api calls, while + * gc_wire_ctx is the context handle that is used to identify the context on + * the wire when communicating with a server. */ + +struct gss_cl_ctx { + atomic_t count; + enum rpc_gss_proc gc_proc; + u32 gc_seq; + spinlock_t gc_seq_lock; + struct gss_ctx *gc_gss_ctx; + struct xdr_netobj gc_wire_ctx; + struct xdr_netobj gc_acceptor; + u32 gc_win; + unsigned long gc_expiry; + struct rcu_head gc_rcu; +}; + +struct gss_upcall_msg; +struct gss_cred { + struct rpc_cred gc_base; + enum rpc_gss_svc gc_service; + struct gss_cl_ctx __rcu *gc_ctx; + struct gss_upcall_msg *gc_upcall; + const char *gc_principal; + unsigned long gc_upcall_timestamp; +}; + +#endif /* __KERNEL__ */ +#endif /* _LINUX_SUNRPC_AUTH_GSS_H */ + diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h new file mode 100644 index 000000000..2ca67b55e --- /dev/null +++ b/include/linux/sunrpc/bc_xprt.h @@ -0,0 +1,68 @@ +/****************************************************************************** + +(c) 2008 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +/* + * Functions to create and manage the backchannel + */ + +#ifndef _LINUX_SUNRPC_BC_XPRT_H +#define _LINUX_SUNRPC_BC_XPRT_H + +#include <linux/sunrpc/svcsock.h> +#include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/sched.h> + +#ifdef CONFIG_SUNRPC_BACKCHANNEL +struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid); +void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); +void xprt_free_bc_request(struct rpc_rqst *req); +int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); +void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); +int bc_send(struct rpc_rqst *req); + +/* + * Determine if a shared backchannel is in use + */ +static inline int svc_is_backchannel(const struct svc_rqst *rqstp) +{ + if (rqstp->rq_server->sv_bc_xprt) + return 1; + return 0; +} +#else /* CONFIG_SUNRPC_BACKCHANNEL */ +static inline int xprt_setup_backchannel(struct rpc_xprt *xprt, + unsigned int min_reqs) +{ + return 0; +} + +static inline int svc_is_backchannel(const struct svc_rqst *rqstp) +{ + return 0; +} + +static inline void xprt_free_bc_request(struct rpc_rqst *req) +{ +} +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ +#endif /* _LINUX_SUNRPC_BC_XPRT_H */ + diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h new file mode 100644 index 000000000..437ddb6c4 --- /dev/null +++ b/include/linux/sunrpc/cache.h @@ -0,0 +1,298 @@ +/* + * include/linux/sunrpc/cache.h + * + * Generic code for various authentication-related caches + * used by sunrpc clients and servers. + * + * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au> + * + * Released under terms in GPL version 2. See COPYING. + * + */ + +#ifndef _LINUX_SUNRPC_CACHE_H_ +#define _LINUX_SUNRPC_CACHE_H_ + +#include <linux/kref.h> +#include <linux/slab.h> +#include <linux/atomic.h> +#include <linux/proc_fs.h> + +/* + * Each cache requires: + * - A 'struct cache_detail' which contains information specific to the cache + * for common code to use. + * - An item structure that must contain a "struct cache_head" + * - A lookup function defined using DefineCacheLookup + * - A 'put' function that can release a cache item. It will only + * be called after cache_put has succeed, so there are guarantee + * to be no references. + * - A function to calculate a hash of an item's key. + * + * as well as assorted code fragments (e.g. compare keys) and numbers + * (e.g. hash size, goal_age, etc). + * + * Each cache must be registered so that it can be cleaned regularly. + * When the cache is unregistered, it is flushed completely. + * + * Entries have a ref count and a 'hashed' flag which counts the existence + * in the hash table. + * We only expire entries when refcount is zero. + * Existence in the cache is counted the refcount. + */ + +/* Every cache item has a common header that is used + * for expiring and refreshing entries. + * + */ +struct cache_head { + struct cache_head * next; + time_t expiry_time; /* After time time, don't use the data */ + time_t last_refresh; /* If CACHE_PENDING, this is when upcall + * was sent, else this is when update was received + */ + struct kref ref; + unsigned long flags; +}; +#define CACHE_VALID 0 /* Entry contains valid data */ +#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */ +#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/ +#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */ + +#define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ + +struct cache_detail_procfs { + struct proc_dir_entry *proc_ent; + struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; +}; + +struct cache_detail_pipefs { + struct dentry *dir; +}; + +struct cache_detail { + struct module * owner; + int hash_size; + struct cache_head ** hash_table; + rwlock_t hash_lock; + + atomic_t inuse; /* active user-space update or lookup */ + + char *name; + void (*cache_put)(struct kref *); + + int (*cache_upcall)(struct cache_detail *, + struct cache_head *); + + void (*cache_request)(struct cache_detail *cd, + struct cache_head *ch, + char **bpp, int *blen); + + int (*cache_parse)(struct cache_detail *, + char *buf, int len); + + int (*cache_show)(struct seq_file *m, + struct cache_detail *cd, + struct cache_head *h); + void (*warn_no_listener)(struct cache_detail *cd, + int has_died); + + struct cache_head * (*alloc)(void); + int (*match)(struct cache_head *orig, struct cache_head *new); + void (*init)(struct cache_head *orig, struct cache_head *new); + void (*update)(struct cache_head *orig, struct cache_head *new); + + /* fields below this comment are for internal use + * and should not be touched by cache owners + */ + time_t flush_time; /* flush all cache items with last_refresh + * earlier than this */ + struct list_head others; + time_t nextcheck; + int entries; + + /* fields for communication over channel */ + struct list_head queue; + + atomic_t readers; /* how many time is /chennel open */ + time_t last_close; /* if no readers, when did last close */ + time_t last_warn; /* when we last warned about no readers */ + + union { + struct cache_detail_procfs procfs; + struct cache_detail_pipefs pipefs; + } u; + struct net *net; +}; + + +/* this must be embedded in any request structure that + * identifies an object that will want a callback on + * a cache fill + */ +struct cache_req { + struct cache_deferred_req *(*defer)(struct cache_req *req); + int thread_wait; /* How long (jiffies) we can block the + * current thread to wait for updates. + */ +}; +/* this must be embedded in a deferred_request that is being + * delayed awaiting cache-fill + */ +struct cache_deferred_req { + struct hlist_node hash; /* on hash chain */ + struct list_head recent; /* on fifo */ + struct cache_head *item; /* cache item we wait on */ + void *owner; /* we might need to discard all defered requests + * owned by someone */ + void (*revisit)(struct cache_deferred_req *req, + int too_many); +}; + +/* + * timestamps kept in the cache are expressed in seconds + * since boot. This is the best for measuring differences in + * real time. + */ +static inline time_t seconds_since_boot(void) +{ + struct timespec boot; + getboottime(&boot); + return get_seconds() - boot.tv_sec; +} + +static inline time_t convert_to_wallclock(time_t sinceboot) +{ + struct timespec boot; + getboottime(&boot); + return boot.tv_sec + sinceboot; +} + +extern const struct file_operations cache_file_operations_pipefs; +extern const struct file_operations content_file_operations_pipefs; +extern const struct file_operations cache_flush_operations_pipefs; + +extern struct cache_head * +sunrpc_cache_lookup(struct cache_detail *detail, + struct cache_head *key, int hash); +extern struct cache_head * +sunrpc_cache_update(struct cache_detail *detail, + struct cache_head *new, struct cache_head *old, int hash); + +extern int +sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); + + +extern void cache_clean_deferred(void *owner); + +static inline struct cache_head *cache_get(struct cache_head *h) +{ + kref_get(&h->ref); + return h; +} + + +static inline void cache_put(struct cache_head *h, struct cache_detail *cd) +{ + if (atomic_read(&h->ref.refcount) <= 2 && + h->expiry_time < cd->nextcheck) + cd->nextcheck = h->expiry_time; + kref_put(&h->ref, cd->cache_put); +} + +static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) +{ + return (h->expiry_time < seconds_since_boot()) || + (detail->flush_time > h->last_refresh); +} + +extern int cache_check(struct cache_detail *detail, + struct cache_head *h, struct cache_req *rqstp); +extern void cache_flush(void); +extern void cache_purge(struct cache_detail *detail); +#define NEVER (0x7FFFFFFF) +extern void __init cache_initialize(void); +extern int cache_register_net(struct cache_detail *cd, struct net *net); +extern void cache_unregister_net(struct cache_detail *cd, struct net *net); + +extern struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net); +extern void cache_destroy_net(struct cache_detail *cd, struct net *net); + +extern void sunrpc_init_cache_detail(struct cache_detail *cd); +extern void sunrpc_destroy_cache_detail(struct cache_detail *cd); +extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, + umode_t, struct cache_detail *); +extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); + +extern void qword_add(char **bpp, int *lp, char *str); +extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); +extern int qword_get(char **bpp, char *dest, int bufsize); + +static inline int get_int(char **bpp, int *anint) +{ + char buf[50]; + char *ep; + int rv; + int len = qword_get(bpp, buf, sizeof(buf)); + + if (len < 0) + return -EINVAL; + if (len == 0) + return -ENOENT; + + rv = simple_strtol(buf, &ep, 0); + if (*ep) + return -EINVAL; + + *anint = rv; + return 0; +} + +static inline int get_uint(char **bpp, unsigned int *anint) +{ + char buf[50]; + int len = qword_get(bpp, buf, sizeof(buf)); + + if (len < 0) + return -EINVAL; + if (len == 0) + return -ENOENT; + + if (kstrtouint(buf, 0, anint)) + return -EINVAL; + + return 0; +} + +static inline int get_time(char **bpp, time_t *time) +{ + char buf[50]; + long long ll; + int len = qword_get(bpp, buf, sizeof(buf)); + + if (len < 0) + return -EINVAL; + if (len == 0) + return -ENOENT; + + if (kstrtoll(buf, 0, &ll)) + return -EINVAL; + + *time = (time_t)ll; + return 0; +} + +static inline time_t get_expiry(char **bpp) +{ + time_t rv; + struct timespec boot; + + if (get_time(bpp, &rv)) + return 0; + if (rv < 0) + return 0; + getboottime(&boot); + return rv - boot.tv_sec; +} + +#endif /* _LINUX_SUNRPC_CACHE_H_ */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h new file mode 100644 index 000000000..598ba80ec --- /dev/null +++ b/include/linux/sunrpc/clnt.h @@ -0,0 +1,185 @@ +/* + * linux/include/linux/sunrpc/clnt.h + * + * Declarations for the high-level RPC client interface + * + * Copyright (C) 1995, 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef _LINUX_SUNRPC_CLNT_H +#define _LINUX_SUNRPC_CLNT_H + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/in6.h> + +#include <linux/sunrpc/msg_prot.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/auth.h> +#include <linux/sunrpc/stats.h> +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/timer.h> +#include <linux/sunrpc/rpc_pipe_fs.h> +#include <asm/signal.h> +#include <linux/path.h> +#include <net/ipv6.h> + +struct rpc_inode; + +/* + * The high-level client handle + */ +struct rpc_clnt { + atomic_t cl_count; /* Number of references */ + unsigned int cl_clid; /* client id */ + struct list_head cl_clients; /* Global list of clients */ + struct list_head cl_tasks; /* List of tasks */ + spinlock_t cl_lock; /* spinlock */ + struct rpc_xprt __rcu * cl_xprt; /* transport */ + struct rpc_procinfo * cl_procinfo; /* procedure info */ + u32 cl_prog, /* RPC program number */ + cl_vers, /* RPC version number */ + cl_maxproc; /* max procedure number */ + + struct rpc_auth * cl_auth; /* authenticator */ + struct rpc_stat * cl_stats; /* per-program statistics */ + struct rpc_iostats * cl_metrics; /* per-client statistics */ + + unsigned int cl_softrtry : 1,/* soft timeouts */ + cl_discrtry : 1,/* disconnect before retry */ + cl_noretranstimeo: 1,/* No retransmit timeouts */ + cl_autobind : 1,/* use getport() */ + cl_chatty : 1;/* be verbose */ + + struct rpc_rtt * cl_rtt; /* RTO estimator data */ + const struct rpc_timeout *cl_timeout; /* Timeout strategy */ + + int cl_nodelen; /* nodename length */ + char cl_nodename[UNX_MAXNODENAME+1]; + struct rpc_pipe_dir_head cl_pipedir_objects; + struct rpc_clnt * cl_parent; /* Points to parent of clones */ + struct rpc_rtt cl_rtt_default; + struct rpc_timeout cl_timeout_default; + const struct rpc_program *cl_program; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + struct dentry *cl_debugfs; /* debugfs directory */ +#endif +}; + +/* + * General RPC program info + */ +#define RPC_MAXVERSION 4 +struct rpc_program { + const char * name; /* protocol name */ + u32 number; /* program number */ + unsigned int nrvers; /* number of versions */ + const struct rpc_version ** version; /* version array */ + struct rpc_stat * stats; /* statistics */ + const char * pipe_dir_name; /* path to rpc_pipefs dir */ +}; + +struct rpc_version { + u32 number; /* version number */ + unsigned int nrprocs; /* number of procs */ + struct rpc_procinfo * procs; /* procedure array */ +}; + +/* + * Procedure information + */ +struct rpc_procinfo { + u32 p_proc; /* RPC procedure number */ + kxdreproc_t p_encode; /* XDR encode function */ + kxdrdproc_t p_decode; /* XDR decode function */ + unsigned int p_arglen; /* argument hdr length (u32) */ + unsigned int p_replen; /* reply hdr length (u32) */ + unsigned int p_count; /* call count */ + unsigned int p_timer; /* Which RTT timer to use */ + u32 p_statidx; /* Which procedure to account */ + const char * p_name; /* name of procedure */ +}; + +#ifdef __KERNEL__ + +struct rpc_create_args { + struct net *net; + int protocol; + struct sockaddr *address; + size_t addrsize; + struct sockaddr *saddress; + const struct rpc_timeout *timeout; + const char *servername; + const char *nodename; + const struct rpc_program *program; + u32 prognumber; /* overrides program->number */ + u32 version; + rpc_authflavor_t authflavor; + unsigned long flags; + char *client_name; + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ +}; + +/* Values for "flags" field */ +#define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) +#define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) +#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3) +#define RPC_CLNT_CREATE_NOPING (1UL << 4) +#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) +#define RPC_CLNT_CREATE_QUIET (1UL << 6) +#define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7) +#define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8) +#define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) + +struct rpc_clnt *rpc_create(struct rpc_create_args *args); +struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, + struct rpc_xprt *xprt); +struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, + const struct rpc_program *, u32); +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); +struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); +struct rpc_clnt *rpc_clone_client_set_auth(struct rpc_clnt *, + rpc_authflavor_t); +int rpc_switch_client_transport(struct rpc_clnt *, + struct xprt_create *, + const struct rpc_timeout *); + +void rpc_shutdown_client(struct rpc_clnt *); +void rpc_release_client(struct rpc_clnt *); +void rpc_task_release_client(struct rpc_task *); + +int rpcb_create_local(struct net *); +void rpcb_put_local(struct net *); +int rpcb_register(struct net *, u32, u32, int, unsigned short); +int rpcb_v4_register(struct net *net, const u32 program, + const u32 version, + const struct sockaddr *address, + const char *netid); +void rpcb_getport_async(struct rpc_task *); + +void rpc_call_start(struct rpc_task *); +int rpc_call_async(struct rpc_clnt *clnt, + const struct rpc_message *msg, int flags, + const struct rpc_call_ops *tk_ops, + void *calldata); +int rpc_call_sync(struct rpc_clnt *clnt, + const struct rpc_message *msg, int flags); +struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, + int flags); +int rpc_restart_call_prepare(struct rpc_task *); +int rpc_restart_call(struct rpc_task *); +void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); +int rpc_protocol(struct rpc_clnt *); +struct net * rpc_net_ns(struct rpc_clnt *); +size_t rpc_max_payload(struct rpc_clnt *); +unsigned long rpc_get_timeout(struct rpc_clnt *clnt); +void rpc_force_rebind(struct rpc_clnt *); +size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); +const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); +int rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t); + +const char *rpc_proc_name(const struct rpc_task *task); +#endif /* __KERNEL__ */ +#endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h new file mode 100644 index 000000000..59a7889e1 --- /dev/null +++ b/include/linux/sunrpc/debug.h @@ -0,0 +1,107 @@ +/* + * linux/include/linux/sunrpc/debug.h + * + * Debugging support for sunrpc module + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ +#ifndef _LINUX_SUNRPC_DEBUG_H_ +#define _LINUX_SUNRPC_DEBUG_H_ + +#include <uapi/linux/sunrpc/debug.h> + +/* + * Debugging macros etc + */ +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +extern unsigned int rpc_debug; +extern unsigned int nfs_debug; +extern unsigned int nfsd_debug; +extern unsigned int nlm_debug; +#endif + +#define dprintk(args...) dfprintk(FACILITY, ## args) +#define dprintk_rcu(args...) dfprintk_rcu(FACILITY, ## args) + +#undef ifdebug +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +# define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac)) + +# define dfprintk(fac, args...) \ + do { \ + ifdebug(fac) \ + printk(KERN_DEFAULT args); \ + } while (0) + +# define dfprintk_rcu(fac, args...) \ + do { \ + ifdebug(fac) { \ + rcu_read_lock(); \ + printk(KERN_DEFAULT args); \ + rcu_read_unlock(); \ + } \ + } while (0) + +# define RPC_IFDEBUG(x) x +#else +# define ifdebug(fac) if (0) +# define dfprintk(fac, args...) do {} while (0) +# define dfprintk_rcu(fac, args...) do {} while (0) +# define RPC_IFDEBUG(x) +#endif + +/* + * Sysctl interface for RPC debugging + */ + +struct rpc_clnt; +struct rpc_xprt; + +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +void rpc_register_sysctl(void); +void rpc_unregister_sysctl(void); +void sunrpc_debugfs_init(void); +void sunrpc_debugfs_exit(void); +void rpc_clnt_debugfs_register(struct rpc_clnt *); +void rpc_clnt_debugfs_unregister(struct rpc_clnt *); +void rpc_xprt_debugfs_register(struct rpc_xprt *); +void rpc_xprt_debugfs_unregister(struct rpc_xprt *); +#else +static inline void +sunrpc_debugfs_init(void) +{ + return; +} + +static inline void +sunrpc_debugfs_exit(void) +{ + return; +} + +static inline void +rpc_clnt_debugfs_register(struct rpc_clnt *clnt) +{ + return; +} + +static inline void +rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) +{ + return; +} + +static inline void +rpc_xprt_debugfs_register(struct rpc_xprt *xprt) +{ + return; +} + +static inline void +rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) +{ + return; +} +#endif + +#endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h new file mode 100644 index 000000000..1f911ccb2 --- /dev/null +++ b/include/linux/sunrpc/gss_api.h @@ -0,0 +1,162 @@ +/* + * linux/include/linux/sunrpc/gss_api.h + * + * Somewhat simplified version of the gss api. + * + * Dug Song <dugsong@monkey.org> + * Andy Adamson <andros@umich.edu> + * Bruce Fields <bfields@umich.edu> + * Copyright (c) 2000 The Regents of the University of Michigan + */ + +#ifndef _LINUX_SUNRPC_GSS_API_H +#define _LINUX_SUNRPC_GSS_API_H + +#ifdef __KERNEL__ +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/msg_prot.h> +#include <linux/uio.h> + +/* The mechanism-independent gss-api context: */ +struct gss_ctx { + struct gss_api_mech *mech_type; + void *internal_ctx_id; +}; + +#define GSS_C_NO_BUFFER ((struct xdr_netobj) 0) +#define GSS_C_NO_CONTEXT ((struct gss_ctx *) 0) +#define GSS_C_QOP_DEFAULT (0) + +/*XXX arbitrary length - is this set somewhere? */ +#define GSS_OID_MAX_LEN 32 +struct rpcsec_gss_oid { + unsigned int len; + u8 data[GSS_OID_MAX_LEN]; +}; + +/* From RFC 3530 */ +struct rpcsec_gss_info { + struct rpcsec_gss_oid oid; + u32 qop; + u32 service; +}; + +/* gss-api prototypes; note that these are somewhat simplified versions of + * the prototypes specified in RFC 2744. */ +int gss_import_sec_context( + const void* input_token, + size_t bufsize, + struct gss_api_mech *mech, + struct gss_ctx **ctx_id, + time_t *endtime, + gfp_t gfp_mask); +u32 gss_get_mic( + struct gss_ctx *ctx_id, + struct xdr_buf *message, + struct xdr_netobj *mic_token); +u32 gss_verify_mic( + struct gss_ctx *ctx_id, + struct xdr_buf *message, + struct xdr_netobj *mic_token); +u32 gss_wrap( + struct gss_ctx *ctx_id, + int offset, + struct xdr_buf *outbuf, + struct page **inpages); +u32 gss_unwrap( + struct gss_ctx *ctx_id, + int offset, + struct xdr_buf *inbuf); +u32 gss_delete_sec_context( + struct gss_ctx **ctx_id); + +rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *, u32 qop, + u32 service); +u32 gss_pseudoflavor_to_service(struct gss_api_mech *, u32 pseudoflavor); +char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service); + +struct pf_desc { + u32 pseudoflavor; + u32 qop; + u32 service; + char *name; + char *auth_domain_name; +}; + +/* Different mechanisms (e.g., krb5 or spkm3) may implement gss-api, and + * mechanisms may be dynamically registered or unregistered by modules. */ + +/* Each mechanism is described by the following struct: */ +struct gss_api_mech { + struct list_head gm_list; + struct module *gm_owner; + struct rpcsec_gss_oid gm_oid; + char *gm_name; + const struct gss_api_ops *gm_ops; + /* pseudoflavors supported by this mechanism: */ + int gm_pf_num; + struct pf_desc * gm_pfs; + /* Should the following be a callback operation instead? */ + const char *gm_upcall_enctypes; +}; + +/* and must provide the following operations: */ +struct gss_api_ops { + int (*gss_import_sec_context)( + const void *input_token, + size_t bufsize, + struct gss_ctx *ctx_id, + time_t *endtime, + gfp_t gfp_mask); + u32 (*gss_get_mic)( + struct gss_ctx *ctx_id, + struct xdr_buf *message, + struct xdr_netobj *mic_token); + u32 (*gss_verify_mic)( + struct gss_ctx *ctx_id, + struct xdr_buf *message, + struct xdr_netobj *mic_token); + u32 (*gss_wrap)( + struct gss_ctx *ctx_id, + int offset, + struct xdr_buf *outbuf, + struct page **inpages); + u32 (*gss_unwrap)( + struct gss_ctx *ctx_id, + int offset, + struct xdr_buf *buf); + void (*gss_delete_sec_context)( + void *internal_ctx_id); +}; + +int gss_mech_register(struct gss_api_mech *); +void gss_mech_unregister(struct gss_api_mech *); + +/* returns a mechanism descriptor given an OID, and increments the mechanism's + * reference count. */ +struct gss_api_mech * gss_mech_get_by_OID(struct rpcsec_gss_oid *); + +/* Given a GSS security tuple, look up a pseudoflavor */ +rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *); + +/* Given a pseudoflavor, look up a GSS security tuple */ +int gss_mech_flavor2info(rpc_authflavor_t, struct rpcsec_gss_info *); + +/* Returns a reference to a mechanism, given a name like "krb5" etc. */ +struct gss_api_mech *gss_mech_get_by_name(const char *); + +/* Similar, but get by pseudoflavor. */ +struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32); + +/* Fill in an array with a list of supported pseudoflavors */ +int gss_mech_list_pseudoflavors(rpc_authflavor_t *, int); + +struct gss_api_mech * gss_mech_get(struct gss_api_mech *); + +/* For every successful gss_mech_get or gss_mech_get_by_* call there must be a + * corresponding call to gss_mech_put. */ +void gss_mech_put(struct gss_api_mech *); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_SUNRPC_GSS_API_H */ + diff --git a/include/linux/sunrpc/gss_asn1.h b/include/linux/sunrpc/gss_asn1.h new file mode 100644 index 000000000..3ccecd0ad --- /dev/null +++ b/include/linux/sunrpc/gss_asn1.h @@ -0,0 +1,81 @@ +/* + * linux/include/linux/sunrpc/gss_asn1.h + * + * minimal asn1 for generic encoding/decoding of gss tokens + * + * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, + * lib/gssapi/krb5/gssapiP_krb5.h, and others + * + * Copyright (c) 2000 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson <andros@umich.edu> + */ + +/* + * Copyright 1995 by the Massachusetts Institute of Technology. + * All Rights Reserved. + * + * Export of this software from the United States of America may + * require a specific license from the United States Government. + * It is the responsibility of any person or organization contemplating + * export to obtain such a license before exporting. + * + * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and + * distribute this software and its documentation for any purpose and + * without fee is hereby granted, provided that the above copyright + * notice appear in all copies and that both that copyright notice and + * this permission notice appear in supporting documentation, and that + * the name of M.I.T. not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. Furthermore if you modify this software you must label + * your software as modified software and not distribute it in such a + * fashion that it might be confused with the original M.I.T. software. + * M.I.T. makes no representations about the suitability of + * this software for any purpose. It is provided "as is" without express + * or implied warranty. + * + */ + + +#include <linux/sunrpc/gss_api.h> + +#define SIZEOF_INT 4 + +/* from gssapi_err_generic.h */ +#define G_BAD_SERVICE_NAME (-2045022976L) +#define G_BAD_STRING_UID (-2045022975L) +#define G_NOUSER (-2045022974L) +#define G_VALIDATE_FAILED (-2045022973L) +#define G_BUFFER_ALLOC (-2045022972L) +#define G_BAD_MSG_CTX (-2045022971L) +#define G_WRONG_SIZE (-2045022970L) +#define G_BAD_USAGE (-2045022969L) +#define G_UNKNOWN_QOP (-2045022968L) +#define G_NO_HOSTNAME (-2045022967L) +#define G_BAD_HOSTNAME (-2045022966L) +#define G_WRONG_MECH (-2045022965L) +#define G_BAD_TOK_HEADER (-2045022964L) +#define G_BAD_DIRECTION (-2045022963L) +#define G_TOK_TRUNC (-2045022962L) +#define G_REFLECT (-2045022961L) +#define G_WRONG_TOKID (-2045022960L) + +#define g_OID_equal(o1,o2) \ + (((o1)->len == (o2)->len) && \ + (memcmp((o1)->data,(o2)->data,(int) (o1)->len) == 0)) + +u32 g_verify_token_header( + struct xdr_netobj *mech, + int *body_size, + unsigned char **buf_in, + int toksize); + +int g_token_size( + struct xdr_netobj *mech, + unsigned int body_size); + +void g_make_token_header( + struct xdr_netobj *mech, + int body_size, + unsigned char **buf); diff --git a/include/linux/sunrpc/gss_err.h b/include/linux/sunrpc/gss_err.h new file mode 100644 index 000000000..a6807867b --- /dev/null +++ b/include/linux/sunrpc/gss_err.h @@ -0,0 +1,167 @@ +/* + * linux/include/sunrpc/gss_err.h + * + * Adapted from MIT Kerberos 5-1.2.1 include/gssapi/gssapi.h + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson <andros@umich.edu> + */ + +/* + * Copyright 1993 by OpenVision Technologies, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appears in all copies and + * that both that copyright notice and this permission notice appear in + * supporting documentation, and that the name of OpenVision not be used + * in advertising or publicity pertaining to distribution of the software + * without specific, written prior permission. OpenVision makes no + * representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied warranty. + * + * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF + * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _LINUX_SUNRPC_GSS_ERR_H +#define _LINUX_SUNRPC_GSS_ERR_H + +#ifdef __KERNEL__ + +typedef unsigned int OM_uint32; + +/* + * Flag bits for context-level services. + */ +#define GSS_C_DELEG_FLAG 1 +#define GSS_C_MUTUAL_FLAG 2 +#define GSS_C_REPLAY_FLAG 4 +#define GSS_C_SEQUENCE_FLAG 8 +#define GSS_C_CONF_FLAG 16 +#define GSS_C_INTEG_FLAG 32 +#define GSS_C_ANON_FLAG 64 +#define GSS_C_PROT_READY_FLAG 128 +#define GSS_C_TRANS_FLAG 256 + +/* + * Credential usage options + */ +#define GSS_C_BOTH 0 +#define GSS_C_INITIATE 1 +#define GSS_C_ACCEPT 2 + +/* + * Status code types for gss_display_status + */ +#define GSS_C_GSS_CODE 1 +#define GSS_C_MECH_CODE 2 + + +/* + * Expiration time of 2^32-1 seconds means infinite lifetime for a + * credential or security context + */ +#define GSS_C_INDEFINITE ((OM_uint32) 0xfffffffful) + + +/* Major status codes */ + +#define GSS_S_COMPLETE 0 + +/* + * Some "helper" definitions to make the status code macros obvious. + */ +#define GSS_C_CALLING_ERROR_OFFSET 24 +#define GSS_C_ROUTINE_ERROR_OFFSET 16 +#define GSS_C_SUPPLEMENTARY_OFFSET 0 +#define GSS_C_CALLING_ERROR_MASK ((OM_uint32) 0377ul) +#define GSS_C_ROUTINE_ERROR_MASK ((OM_uint32) 0377ul) +#define GSS_C_SUPPLEMENTARY_MASK ((OM_uint32) 0177777ul) + +/* + * The macros that test status codes for error conditions. Note that the + * GSS_ERROR() macro has changed slightly from the V1 GSSAPI so that it now + * evaluates its argument only once. + */ +#define GSS_CALLING_ERROR(x) \ + ((x) & (GSS_C_CALLING_ERROR_MASK << GSS_C_CALLING_ERROR_OFFSET)) +#define GSS_ROUTINE_ERROR(x) \ + ((x) & (GSS_C_ROUTINE_ERROR_MASK << GSS_C_ROUTINE_ERROR_OFFSET)) +#define GSS_SUPPLEMENTARY_INFO(x) \ + ((x) & (GSS_C_SUPPLEMENTARY_MASK << GSS_C_SUPPLEMENTARY_OFFSET)) +#define GSS_ERROR(x) \ + ((x) & ((GSS_C_CALLING_ERROR_MASK << GSS_C_CALLING_ERROR_OFFSET) | \ + (GSS_C_ROUTINE_ERROR_MASK << GSS_C_ROUTINE_ERROR_OFFSET))) + +/* + * Now the actual status code definitions + */ + +/* + * Calling errors: + */ +#define GSS_S_CALL_INACCESSIBLE_READ \ + (((OM_uint32) 1ul) << GSS_C_CALLING_ERROR_OFFSET) +#define GSS_S_CALL_INACCESSIBLE_WRITE \ + (((OM_uint32) 2ul) << GSS_C_CALLING_ERROR_OFFSET) +#define GSS_S_CALL_BAD_STRUCTURE \ + (((OM_uint32) 3ul) << GSS_C_CALLING_ERROR_OFFSET) + +/* + * Routine errors: + */ +#define GSS_S_BAD_MECH (((OM_uint32) 1ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_NAME (((OM_uint32) 2ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_NAMETYPE (((OM_uint32) 3ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_BINDINGS (((OM_uint32) 4ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_STATUS (((OM_uint32) 5ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_SIG (((OM_uint32) 6ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_NO_CRED (((OM_uint32) 7ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_NO_CONTEXT (((OM_uint32) 8ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_DEFECTIVE_TOKEN (((OM_uint32) 9ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_DEFECTIVE_CREDENTIAL \ + (((OM_uint32) 10ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_CREDENTIALS_EXPIRED \ + (((OM_uint32) 11ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_CONTEXT_EXPIRED \ + (((OM_uint32) 12ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_FAILURE (((OM_uint32) 13ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_QOP (((OM_uint32) 14ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_UNAUTHORIZED (((OM_uint32) 15ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_UNAVAILABLE (((OM_uint32) 16ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_DUPLICATE_ELEMENT \ + (((OM_uint32) 17ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_NAME_NOT_MN \ + (((OM_uint32) 18ul) << GSS_C_ROUTINE_ERROR_OFFSET) + +/* + * Supplementary info bits: + */ +#define GSS_S_CONTINUE_NEEDED (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 0)) +#define GSS_S_DUPLICATE_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 1)) +#define GSS_S_OLD_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 2)) +#define GSS_S_UNSEQ_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 3)) +#define GSS_S_GAP_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 4)) + +/* XXXX these are not part of the GSSAPI C bindings! (but should be) */ + +#define GSS_CALLING_ERROR_FIELD(x) \ + (((x) >> GSS_C_CALLING_ERROR_OFFSET) & GSS_C_CALLING_ERROR_MASK) +#define GSS_ROUTINE_ERROR_FIELD(x) \ + (((x) >> GSS_C_ROUTINE_ERROR_OFFSET) & GSS_C_ROUTINE_ERROR_MASK) +#define GSS_SUPPLEMENTARY_INFO_FIELD(x) \ + (((x) >> GSS_C_SUPPLEMENTARY_OFFSET) & GSS_C_SUPPLEMENTARY_MASK) + +/* XXXX This is a necessary evil until the spec is fixed */ +#define GSS_S_CRED_UNAVAIL GSS_S_FAILURE + +#endif /* __KERNEL__ */ +#endif /* __LINUX_SUNRPC_GSS_ERR_H */ diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h new file mode 100644 index 000000000..df02a4188 --- /dev/null +++ b/include/linux/sunrpc/gss_krb5.h @@ -0,0 +1,331 @@ +/* + * linux/include/linux/sunrpc/gss_krb5_types.h + * + * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, + * lib/gssapi/krb5/gssapiP_krb5.h, and others + * + * Copyright (c) 2000-2008 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson <andros@umich.edu> + * Bruce Fields <bfields@umich.edu> + */ + +/* + * Copyright 1995 by the Massachusetts Institute of Technology. + * All Rights Reserved. + * + * Export of this software from the United States of America may + * require a specific license from the United States Government. + * It is the responsibility of any person or organization contemplating + * export to obtain such a license before exporting. + * + * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and + * distribute this software and its documentation for any purpose and + * without fee is hereby granted, provided that the above copyright + * notice appear in all copies and that both that copyright notice and + * this permission notice appear in supporting documentation, and that + * the name of M.I.T. not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. Furthermore if you modify this software you must label + * your software as modified software and not distribute it in such a + * fashion that it might be confused with the original M.I.T. software. + * M.I.T. makes no representations about the suitability of + * this software for any purpose. It is provided "as is" without express + * or implied warranty. + * + */ + +#include <linux/crypto.h> +#include <linux/sunrpc/auth_gss.h> +#include <linux/sunrpc/gss_err.h> +#include <linux/sunrpc/gss_asn1.h> + +/* Length of constant used in key derivation */ +#define GSS_KRB5_K5CLENGTH (5) + +/* Maximum key length (in bytes) for the supported crypto algorithms*/ +#define GSS_KRB5_MAX_KEYLEN (32) + +/* Maximum checksum function output for the supported crypto algorithms */ +#define GSS_KRB5_MAX_CKSUM_LEN (20) + +/* Maximum blocksize for the supported crypto algorithms */ +#define GSS_KRB5_MAX_BLOCKSIZE (16) + +struct krb5_ctx; + +struct gss_krb5_enctype { + const u32 etype; /* encryption (key) type */ + const u32 ctype; /* checksum type */ + const char *name; /* "friendly" name */ + const char *encrypt_name; /* crypto encrypt name */ + const char *cksum_name; /* crypto checksum name */ + const u16 signalg; /* signing algorithm */ + const u16 sealalg; /* sealing algorithm */ + const u32 blocksize; /* encryption blocksize */ + const u32 conflen; /* confounder length + (normally the same as + the blocksize) */ + const u32 cksumlength; /* checksum length */ + const u32 keyed_cksum; /* is it a keyed cksum? */ + const u32 keybytes; /* raw key len, in bytes */ + const u32 keylength; /* final key len, in bytes */ + u32 (*encrypt) (struct crypto_blkcipher *tfm, + void *iv, void *in, void *out, + int length); /* encryption function */ + u32 (*decrypt) (struct crypto_blkcipher *tfm, + void *iv, void *in, void *out, + int length); /* decryption function */ + u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, + struct xdr_netobj *in, + struct xdr_netobj *out); /* complete key generation */ + u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset, + struct xdr_buf *buf, + struct page **pages); /* v2 encryption function */ + u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, + struct xdr_buf *buf, u32 *headskip, + u32 *tailskip); /* v2 decryption function */ +}; + +/* krb5_ctx flags definitions */ +#define KRB5_CTX_FLAG_INITIATOR 0x00000001 +#define KRB5_CTX_FLAG_CFX 0x00000002 +#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004 + +struct krb5_ctx { + int initiate; /* 1 = initiating, 0 = accepting */ + u32 enctype; + u32 flags; + const struct gss_krb5_enctype *gk5e; /* enctype-specific info */ + struct crypto_blkcipher *enc; + struct crypto_blkcipher *seq; + struct crypto_blkcipher *acceptor_enc; + struct crypto_blkcipher *initiator_enc; + struct crypto_blkcipher *acceptor_enc_aux; + struct crypto_blkcipher *initiator_enc_aux; + u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ + u8 cksum[GSS_KRB5_MAX_KEYLEN]; + s32 endtime; + u32 seq_send; + u64 seq_send64; + struct xdr_netobj mech_used; + u8 initiator_sign[GSS_KRB5_MAX_KEYLEN]; + u8 acceptor_sign[GSS_KRB5_MAX_KEYLEN]; + u8 initiator_seal[GSS_KRB5_MAX_KEYLEN]; + u8 acceptor_seal[GSS_KRB5_MAX_KEYLEN]; + u8 initiator_integ[GSS_KRB5_MAX_KEYLEN]; + u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; +}; + +extern spinlock_t krb5_seq_lock; + +/* The length of the Kerberos GSS token header */ +#define GSS_KRB5_TOK_HDR_LEN (16) + +#define KG_TOK_MIC_MSG 0x0101 +#define KG_TOK_WRAP_MSG 0x0201 + +#define KG2_TOK_INITIAL 0x0101 +#define KG2_TOK_RESPONSE 0x0202 +#define KG2_TOK_MIC 0x0404 +#define KG2_TOK_WRAP 0x0504 + +#define KG2_TOKEN_FLAG_SENTBYACCEPTOR 0x01 +#define KG2_TOKEN_FLAG_SEALED 0x02 +#define KG2_TOKEN_FLAG_ACCEPTORSUBKEY 0x04 + +#define KG2_RESP_FLAG_ERROR 0x0001 +#define KG2_RESP_FLAG_DELEG_OK 0x0002 + +enum sgn_alg { + SGN_ALG_DES_MAC_MD5 = 0x0000, + SGN_ALG_MD2_5 = 0x0001, + SGN_ALG_DES_MAC = 0x0002, + SGN_ALG_3 = 0x0003, /* not published */ + SGN_ALG_HMAC_MD5 = 0x0011, /* microsoft w2k; no support */ + SGN_ALG_HMAC_SHA1_DES3_KD = 0x0004 +}; +enum seal_alg { + SEAL_ALG_NONE = 0xffff, + SEAL_ALG_DES = 0x0000, + SEAL_ALG_1 = 0x0001, /* not published */ + SEAL_ALG_MICROSOFT_RC4 = 0x0010,/* microsoft w2k; no support */ + SEAL_ALG_DES3KD = 0x0002 +}; + +#define CKSUMTYPE_CRC32 0x0001 +#define CKSUMTYPE_RSA_MD4 0x0002 +#define CKSUMTYPE_RSA_MD4_DES 0x0003 +#define CKSUMTYPE_DESCBC 0x0004 +#define CKSUMTYPE_RSA_MD5 0x0007 +#define CKSUMTYPE_RSA_MD5_DES 0x0008 +#define CKSUMTYPE_NIST_SHA 0x0009 +#define CKSUMTYPE_HMAC_SHA1_DES3 0x000c +#define CKSUMTYPE_HMAC_SHA1_96_AES128 0x000f +#define CKSUMTYPE_HMAC_SHA1_96_AES256 0x0010 +#define CKSUMTYPE_HMAC_MD5_ARCFOUR -138 /* Microsoft md5 hmac cksumtype */ + +/* from gssapi_err_krb5.h */ +#define KG_CCACHE_NOMATCH (39756032L) +#define KG_KEYTAB_NOMATCH (39756033L) +#define KG_TGT_MISSING (39756034L) +#define KG_NO_SUBKEY (39756035L) +#define KG_CONTEXT_ESTABLISHED (39756036L) +#define KG_BAD_SIGN_TYPE (39756037L) +#define KG_BAD_LENGTH (39756038L) +#define KG_CTX_INCOMPLETE (39756039L) +#define KG_CONTEXT (39756040L) +#define KG_CRED (39756041L) +#define KG_ENC_DESC (39756042L) +#define KG_BAD_SEQ (39756043L) +#define KG_EMPTY_CCACHE (39756044L) +#define KG_NO_CTYPES (39756045L) + +/* per Kerberos v5 protocol spec crypto types from the wire. + * these get mapped to linux kernel crypto routines. + */ +#define ENCTYPE_NULL 0x0000 +#define ENCTYPE_DES_CBC_CRC 0x0001 /* DES cbc mode with CRC-32 */ +#define ENCTYPE_DES_CBC_MD4 0x0002 /* DES cbc mode with RSA-MD4 */ +#define ENCTYPE_DES_CBC_MD5 0x0003 /* DES cbc mode with RSA-MD5 */ +#define ENCTYPE_DES_CBC_RAW 0x0004 /* DES cbc mode raw */ +/* XXX deprecated? */ +#define ENCTYPE_DES3_CBC_SHA 0x0005 /* DES-3 cbc mode with NIST-SHA */ +#define ENCTYPE_DES3_CBC_RAW 0x0006 /* DES-3 cbc mode raw */ +#define ENCTYPE_DES_HMAC_SHA1 0x0008 +#define ENCTYPE_DES3_CBC_SHA1 0x0010 +#define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011 +#define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012 +#define ENCTYPE_ARCFOUR_HMAC 0x0017 +#define ENCTYPE_ARCFOUR_HMAC_EXP 0x0018 +#define ENCTYPE_UNKNOWN 0x01ff + +/* + * Constants used for key derivation + */ +/* for 3DES */ +#define KG_USAGE_SEAL (22) +#define KG_USAGE_SIGN (23) +#define KG_USAGE_SEQ (24) + +/* from rfc3961 */ +#define KEY_USAGE_SEED_CHECKSUM (0x99) +#define KEY_USAGE_SEED_ENCRYPTION (0xAA) +#define KEY_USAGE_SEED_INTEGRITY (0x55) + +/* from rfc4121 */ +#define KG_USAGE_ACCEPTOR_SEAL (22) +#define KG_USAGE_ACCEPTOR_SIGN (23) +#define KG_USAGE_INITIATOR_SEAL (24) +#define KG_USAGE_INITIATOR_SIGN (25) + +/* + * This compile-time check verifies that we will not exceed the + * slack space allotted by the client and server auth_gss code + * before they call gss_wrap(). + */ +#define GSS_KRB5_MAX_SLACK_NEEDED \ + (GSS_KRB5_TOK_HDR_LEN /* gss token header */ \ + + GSS_KRB5_MAX_CKSUM_LEN /* gss token checksum */ \ + + GSS_KRB5_MAX_BLOCKSIZE /* confounder */ \ + + GSS_KRB5_MAX_BLOCKSIZE /* possible padding */ \ + + GSS_KRB5_TOK_HDR_LEN /* encrypted hdr in v2 token */\ + + GSS_KRB5_MAX_CKSUM_LEN /* encryption hmac */ \ + + 4 + 4 /* RPC verifier */ \ + + GSS_KRB5_TOK_HDR_LEN \ + + GSS_KRB5_MAX_CKSUM_LEN) + +u32 +make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, + struct xdr_buf *body, int body_offset, u8 *cksumkey, + unsigned int usage, struct xdr_netobj *cksumout); + +u32 +make_checksum_v2(struct krb5_ctx *, char *header, int hdrlen, + struct xdr_buf *body, int body_offset, u8 *key, + unsigned int usage, struct xdr_netobj *cksum); + +u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, + struct xdr_netobj *); + +u32 gss_verify_mic_kerberos(struct gss_ctx *, struct xdr_buf *, + struct xdr_netobj *); + +u32 +gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, + struct xdr_buf *outbuf, struct page **pages); + +u32 +gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, + struct xdr_buf *buf); + + +u32 +krb5_encrypt(struct crypto_blkcipher *key, + void *iv, void *in, void *out, int length); + +u32 +krb5_decrypt(struct crypto_blkcipher *key, + void *iv, void *in, void *out, int length); + +int +gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *outbuf, + int offset, struct page **pages); + +int +gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *inbuf, + int offset); + +s32 +krb5_make_seq_num(struct krb5_ctx *kctx, + struct crypto_blkcipher *key, + int direction, + u32 seqnum, unsigned char *cksum, unsigned char *buf); + +s32 +krb5_get_seq_num(struct krb5_ctx *kctx, + unsigned char *cksum, + unsigned char *buf, int *direction, u32 *seqnum); + +int +xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen); + +u32 +krb5_derive_key(const struct gss_krb5_enctype *gk5e, + const struct xdr_netobj *inkey, + struct xdr_netobj *outkey, + const struct xdr_netobj *in_constant, + gfp_t gfp_mask); + +u32 +gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e, + struct xdr_netobj *randombits, + struct xdr_netobj *key); + +u32 +gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e, + struct xdr_netobj *randombits, + struct xdr_netobj *key); + +u32 +gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, + struct xdr_buf *buf, + struct page **pages); + +u32 +gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, + struct xdr_buf *buf, u32 *plainoffset, + u32 *plainlen); + +int +krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, + struct crypto_blkcipher *cipher, + unsigned char *cksum); + +int +krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, + struct crypto_blkcipher *cipher, + s32 seqnum); +void +gss_krb5_make_confounder(char *p, u32 conflen); diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h new file mode 100644 index 000000000..ec6234eee --- /dev/null +++ b/include/linux/sunrpc/gss_krb5_enctypes.h @@ -0,0 +1,4 @@ +/* + * Dumb way to share this static piece of information with nfsd + */ +#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2" diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h new file mode 100644 index 000000000..694eecb2f --- /dev/null +++ b/include/linux/sunrpc/metrics.h @@ -0,0 +1,102 @@ +/* + * linux/include/linux/sunrpc/metrics.h + * + * Declarations for RPC client per-operation metrics + * + * Copyright (C) 2005 Chuck Lever <cel@netapp.com> + * + * RPC client per-operation statistics provide latency and retry + * information about each type of RPC procedure in a given RPC program. + * These statistics are not for detailed problem diagnosis, but simply + * to indicate whether the problem is local or remote. + * + * These counters are not meant to be human-readable, but are meant to be + * integrated into system monitoring tools such as "sar" and "iostat". As + * such, the counters are sampled by the tools over time, and are never + * zeroed after a file system is mounted. Moving averages can be computed + * by the tools by taking the difference between two instantaneous samples + * and dividing that by the time between the samples. + * + * The counters are maintained in a single array per RPC client, indexed + * by procedure number. There is no need to maintain separate counter + * arrays per-CPU because these counters are always modified behind locks. + */ + +#ifndef _LINUX_SUNRPC_METRICS_H +#define _LINUX_SUNRPC_METRICS_H + +#include <linux/seq_file.h> +#include <linux/ktime.h> +#include <linux/spinlock.h> + +#define RPC_IOSTATS_VERS "1.0" + +struct rpc_iostats { + spinlock_t om_lock; + + /* + * These counters give an idea about how many request + * transmissions are required, on average, to complete that + * particular procedure. Some procedures may require more + * than one transmission because the server is unresponsive, + * the client is retransmitting too aggressively, or the + * requests are large and the network is congested. + */ + unsigned long om_ops, /* count of operations */ + om_ntrans, /* count of RPC transmissions */ + om_timeouts; /* count of major timeouts */ + + /* + * These count how many bytes are sent and received for a + * given RPC procedure type. This indicates how much load a + * particular procedure is putting on the network. These + * counts include the RPC and ULP headers, and the request + * payload. + */ + unsigned long long om_bytes_sent, /* count of bytes out */ + om_bytes_recv; /* count of bytes in */ + + /* + * The length of time an RPC request waits in queue before + * transmission, the network + server latency of the request, + * and the total time the request spent from init to release + * are measured. + */ + ktime_t om_queue, /* queued for xmit */ + om_rtt, /* RPC RTT */ + om_execute; /* RPC execution */ +} ____cacheline_aligned; + +struct rpc_task; +struct rpc_clnt; + +/* + * EXPORTed functions for managing rpc_iostats structures + */ + +#ifdef CONFIG_PROC_FS + +struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *); +void rpc_count_iostats(const struct rpc_task *, + struct rpc_iostats *); +void rpc_count_iostats_metrics(const struct rpc_task *, + struct rpc_iostats *); +void rpc_print_iostats(struct seq_file *, struct rpc_clnt *); +void rpc_free_iostats(struct rpc_iostats *); + +#else /* CONFIG_PROC_FS */ + +static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; } +static inline void rpc_count_iostats(const struct rpc_task *task, + struct rpc_iostats *stats) {} +static inline void rpc_count_iostats_metrics(const struct rpc_task *task, + struct rpc_iostats *stats) +{ +} + +static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {} +static inline void rpc_free_iostats(struct rpc_iostats *stats) {} + +#endif /* CONFIG_PROC_FS */ + +#endif /* _LINUX_SUNRPC_METRICS_H */ diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h new file mode 100644 index 000000000..807371357 --- /dev/null +++ b/include/linux/sunrpc/msg_prot.h @@ -0,0 +1,220 @@ +/* + * linux/include/linux/sunrpc/msg_prot.h + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef _LINUX_SUNRPC_MSGPROT_H_ +#define _LINUX_SUNRPC_MSGPROT_H_ + +#ifdef __KERNEL__ /* user programs should get these from the rpc header files */ + +#define RPC_VERSION 2 + +/* size of an XDR encoding unit in bytes, i.e. 32bit */ +#define XDR_UNIT (4) + +/* spec defines authentication flavor as an unsigned 32 bit integer */ +typedef u32 rpc_authflavor_t; + +enum rpc_auth_flavors { + RPC_AUTH_NULL = 0, + RPC_AUTH_UNIX = 1, + RPC_AUTH_SHORT = 2, + RPC_AUTH_DES = 3, + RPC_AUTH_KRB = 4, + RPC_AUTH_GSS = 6, + RPC_AUTH_MAXFLAVOR = 8, + /* pseudoflavors: */ + RPC_AUTH_GSS_KRB5 = 390003, + RPC_AUTH_GSS_KRB5I = 390004, + RPC_AUTH_GSS_KRB5P = 390005, + RPC_AUTH_GSS_LKEY = 390006, + RPC_AUTH_GSS_LKEYI = 390007, + RPC_AUTH_GSS_LKEYP = 390008, + RPC_AUTH_GSS_SPKM = 390009, + RPC_AUTH_GSS_SPKMI = 390010, + RPC_AUTH_GSS_SPKMP = 390011, +}; + +/* Maximum size (in bytes) of an rpc credential or verifier */ +#define RPC_MAX_AUTH_SIZE (400) + +enum rpc_msg_type { + RPC_CALL = 0, + RPC_REPLY = 1 +}; + +enum rpc_reply_stat { + RPC_MSG_ACCEPTED = 0, + RPC_MSG_DENIED = 1 +}; + +enum rpc_accept_stat { + RPC_SUCCESS = 0, + RPC_PROG_UNAVAIL = 1, + RPC_PROG_MISMATCH = 2, + RPC_PROC_UNAVAIL = 3, + RPC_GARBAGE_ARGS = 4, + RPC_SYSTEM_ERR = 5, + /* internal use only */ + RPC_DROP_REPLY = 60000, +}; + +enum rpc_reject_stat { + RPC_MISMATCH = 0, + RPC_AUTH_ERROR = 1 +}; + +enum rpc_auth_stat { + RPC_AUTH_OK = 0, + RPC_AUTH_BADCRED = 1, + RPC_AUTH_REJECTEDCRED = 2, + RPC_AUTH_BADVERF = 3, + RPC_AUTH_REJECTEDVERF = 4, + RPC_AUTH_TOOWEAK = 5, + /* RPCSEC_GSS errors */ + RPCSEC_GSS_CREDPROBLEM = 13, + RPCSEC_GSS_CTXPROBLEM = 14 +}; + +#define RPC_MAXNETNAMELEN 256 + +/* + * From RFC 1831: + * + * "A record is composed of one or more record fragments. A record + * fragment is a four-byte header followed by 0 to (2**31) - 1 bytes of + * fragment data. The bytes encode an unsigned binary number; as with + * XDR integers, the byte order is from highest to lowest. The number + * encodes two values -- a boolean which indicates whether the fragment + * is the last fragment of the record (bit value 1 implies the fragment + * is the last fragment) and a 31-bit unsigned binary value which is the + * length in bytes of the fragment's data. The boolean value is the + * highest-order bit of the header; the length is the 31 low-order bits. + * (Note that this record specification is NOT in XDR standard form!)" + * + * The Linux RPC client always sends its requests in a single record + * fragment, limiting the maximum payload size for stream transports to + * 2GB. + */ + +typedef __be32 rpc_fraghdr; + +#define RPC_LAST_STREAM_FRAGMENT (1U << 31) +#define RPC_FRAGMENT_SIZE_MASK (~RPC_LAST_STREAM_FRAGMENT) +#define RPC_MAX_FRAGMENT_SIZE ((1U << 31) - 1) + +/* + * RPC call and reply header size as number of 32bit words (verifier + * size computed separately, see below) + */ +#define RPC_CALLHDRSIZE (6) +#define RPC_REPHDRSIZE (4) + + +/* + * Maximum RPC header size, including authentication, + * as number of 32bit words (see RFCs 1831, 1832). + * + * xid 1 xdr unit = 4 bytes + * mtype 1 + * rpc_version 1 + * program 1 + * prog_version 1 + * procedure 1 + * cred { + * flavor 1 + * length 1 + * body<RPC_MAX_AUTH_SIZE> 100 xdr units = 400 bytes + * } + * verf { + * flavor 1 + * length 1 + * body<RPC_MAX_AUTH_SIZE> 100 xdr units = 400 bytes + * } + * TOTAL 210 xdr units = 840 bytes + */ +#define RPC_MAX_HEADER_WITH_AUTH \ + (RPC_CALLHDRSIZE + 2*(2+RPC_MAX_AUTH_SIZE/4)) + +#define RPC_MAX_REPHEADER_WITH_AUTH \ + (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4)) + +/* + * Well-known netids. See: + * + * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml + */ +#define RPCBIND_NETID_UDP "udp" +#define RPCBIND_NETID_TCP "tcp" +#define RPCBIND_NETID_RDMA "rdma" +#define RPCBIND_NETID_SCTP "sctp" +#define RPCBIND_NETID_UDP6 "udp6" +#define RPCBIND_NETID_TCP6 "tcp6" +#define RPCBIND_NETID_RDMA6 "rdma6" +#define RPCBIND_NETID_SCTP6 "sctp6" +#define RPCBIND_NETID_LOCAL "local" + +/* + * Note that RFC 1833 does not put any size restrictions on the + * netid string, but all currently defined netid's fit in 4 bytes. + */ +#define RPCBIND_MAXNETIDLEN (4u) + +/* + * Universal addresses are introduced in RFC 1833 and further spelled + * out in RFC 3530. RPCBIND_MAXUADDRLEN defines a maximum byte length + * of a universal address for use in allocating buffers and character + * arrays. + * + * Quoting RFC 3530, section 2.2: + * + * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the + * US-ASCII string: + * + * h1.h2.h3.h4.p1.p2 + * + * The prefix, "h1.h2.h3.h4", is the standard textual form for + * representing an IPv4 address, which is always four octets long. + * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively, + * the first through fourth octets each converted to ASCII-decimal. + * Assuming big-endian ordering, p1 and p2 are, respectively, the first + * and second octets each converted to ASCII-decimal. For example, if a + * host, in big-endian order, has an address of 0x0A010307 and there is + * a service listening on, in big endian order, port 0x020F (decimal + * 527), then the complete universal address is "10.1.3.7.2.15". + * + * ... + * + * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the + * US-ASCII string: + * + * x1:x2:x3:x4:x5:x6:x7:x8.p1.p2 + * + * The suffix "p1.p2" is the service port, and is computed the same way + * as with universal addresses for TCP and UDP over IPv4. The prefix, + * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for + * representing an IPv6 address as defined in Section 2.2 of [RFC2373]. + * Additionally, the two alternative forms specified in Section 2.2 of + * [RFC2373] are also acceptable. + */ + +#include <linux/inet.h> + +/* Maximum size of the port number part of a universal address */ +#define RPCBIND_MAXUADDRPLEN sizeof(".255.255") + +/* Maximum size of an IPv4 universal address */ +#define RPCBIND_MAXUADDR4LEN \ + (INET_ADDRSTRLEN + RPCBIND_MAXUADDRPLEN) + +/* Maximum size of an IPv6 universal address */ +#define RPCBIND_MAXUADDR6LEN \ + (INET6_ADDRSTRLEN + RPCBIND_MAXUADDRPLEN) + +/* Assume INET6_ADDRSTRLEN will always be larger than INET_ADDRSTRLEN... */ +#define RPCBIND_MAXUADDRLEN RPCBIND_MAXUADDR6LEN + +#endif /* __KERNEL__ */ +#endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h new file mode 100644 index 000000000..7f490bef9 --- /dev/null +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -0,0 +1,137 @@ +#ifndef _LINUX_SUNRPC_RPC_PIPE_FS_H +#define _LINUX_SUNRPC_RPC_PIPE_FS_H + +#ifdef __KERNEL__ + +#include <linux/workqueue.h> + +struct rpc_pipe_dir_head { + struct list_head pdh_entries; + struct dentry *pdh_dentry; +}; + +struct rpc_pipe_dir_object_ops; +struct rpc_pipe_dir_object { + struct list_head pdo_head; + const struct rpc_pipe_dir_object_ops *pdo_ops; + + void *pdo_data; +}; + +struct rpc_pipe_dir_object_ops { + int (*create)(struct dentry *dir, + struct rpc_pipe_dir_object *pdo); + void (*destroy)(struct dentry *dir, + struct rpc_pipe_dir_object *pdo); +}; + +struct rpc_pipe_msg { + struct list_head list; + void *data; + size_t len; + size_t copied; + int errno; +}; + +struct rpc_pipe_ops { + ssize_t (*upcall)(struct file *, struct rpc_pipe_msg *, char __user *, size_t); + ssize_t (*downcall)(struct file *, const char __user *, size_t); + void (*release_pipe)(struct inode *); + int (*open_pipe)(struct inode *); + void (*destroy_msg)(struct rpc_pipe_msg *); +}; + +struct rpc_pipe { + struct list_head pipe; + struct list_head in_upcall; + struct list_head in_downcall; + int pipelen; + int nreaders; + int nwriters; +#define RPC_PIPE_WAIT_FOR_OPEN 1 + int flags; + struct delayed_work queue_timeout; + const struct rpc_pipe_ops *ops; + spinlock_t lock; + struct dentry *dentry; +}; + +struct rpc_inode { + struct inode vfs_inode; + void *private; + struct rpc_pipe *pipe; + wait_queue_head_t waitq; +}; + +static inline struct rpc_inode * +RPC_I(struct inode *inode) +{ + return container_of(inode, struct rpc_inode, vfs_inode); +} + +enum { + SUNRPC_PIPEFS_NFS_PRIO, + SUNRPC_PIPEFS_RPC_PRIO, +}; + +extern int rpc_pipefs_notifier_register(struct notifier_block *); +extern void rpc_pipefs_notifier_unregister(struct notifier_block *); + +enum { + RPC_PIPEFS_MOUNT, + RPC_PIPEFS_UMOUNT, +}; + +extern struct dentry *rpc_d_lookup_sb(const struct super_block *sb, + const unsigned char *dir_name); +extern int rpc_pipefs_init_net(struct net *net); +extern void rpc_pipefs_exit_net(struct net *net); +extern struct super_block *rpc_get_sb_net(const struct net *net); +extern void rpc_put_sb_net(const struct net *net); + +extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, + char __user *, size_t); +extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); + +struct rpc_clnt; +extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); +extern int rpc_remove_client_dir(struct rpc_clnt *); + +extern void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh); +extern void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo, + const struct rpc_pipe_dir_object_ops *pdo_ops, + void *pdo_data); +extern int rpc_add_pipe_dir_object(struct net *net, + struct rpc_pipe_dir_head *pdh, + struct rpc_pipe_dir_object *pdo); +extern void rpc_remove_pipe_dir_object(struct net *net, + struct rpc_pipe_dir_head *pdh, + struct rpc_pipe_dir_object *pdo); +extern struct rpc_pipe_dir_object *rpc_find_or_alloc_pipe_dir_object( + struct net *net, + struct rpc_pipe_dir_head *pdh, + int (*match)(struct rpc_pipe_dir_object *, void *), + struct rpc_pipe_dir_object *(*alloc)(void *), + void *data); + +struct cache_detail; +extern struct dentry *rpc_create_cache_dir(struct dentry *, + const char *, + umode_t umode, + struct cache_detail *); +extern void rpc_remove_cache_dir(struct dentry *); + +extern int rpc_rmdir(struct dentry *dentry); + +struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags); +void rpc_destroy_pipe_data(struct rpc_pipe *pipe); +extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *, + struct rpc_pipe *); +extern int rpc_unlink(struct dentry *); +extern int register_rpc_pipefs(void); +extern void unregister_rpc_pipefs(void); + +extern bool gssd_running(struct net *net); + +#endif +#endif diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h new file mode 100644 index 000000000..f33c5a4d6 --- /dev/null +++ b/include/linux/sunrpc/rpc_rdma.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_SUNRPC_RPC_RDMA_H +#define _LINUX_SUNRPC_RPC_RDMA_H + +#include <linux/types.h> + +#define RPCRDMA_VERSION 1 +#define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) + +struct rpcrdma_segment { + __be32 rs_handle; /* Registered memory handle */ + __be32 rs_length; /* Length of the chunk in bytes */ + __be64 rs_offset; /* Chunk virtual address or offset */ +}; + +/* + * read chunk(s), encoded as a linked list. + */ +struct rpcrdma_read_chunk { + __be32 rc_discrim; /* 1 indicates presence */ + __be32 rc_position; /* Position in XDR stream */ + struct rpcrdma_segment rc_target; +}; + +/* + * write chunk, and reply chunk. + */ +struct rpcrdma_write_chunk { + struct rpcrdma_segment wc_target; +}; + +/* + * write chunk(s), encoded as a counted array. + */ +struct rpcrdma_write_array { + __be32 wc_discrim; /* 1 indicates presence */ + __be32 wc_nchunks; /* Array count */ + struct rpcrdma_write_chunk wc_array[0]; +}; + +struct rpcrdma_msg { + __be32 rm_xid; /* Mirrors the RPC header xid */ + __be32 rm_vers; /* Version of this protocol */ + __be32 rm_credit; /* Buffers requested/granted */ + __be32 rm_type; /* Type of message (enum rpcrdma_proc) */ + union { + + struct { /* no chunks */ + __be32 rm_empty[3]; /* 3 empty chunk lists */ + } rm_nochunks; + + struct { /* no chunks and padded */ + __be32 rm_align; /* Padding alignment */ + __be32 rm_thresh; /* Padding threshold */ + __be32 rm_pempty[3]; /* 3 empty chunk lists */ + } rm_padded; + + __be32 rm_chunks[0]; /* read, write and reply chunks */ + + } rm_body; +}; + +/* + * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks + */ +#define RPCRDMA_HDRLEN_MIN (sizeof(__be32) * 7) + +enum rpcrdma_errcode { + ERR_VERS = 1, + ERR_CHUNK = 2 +}; + +struct rpcrdma_err_vers { + uint32_t rdma_vers_low; /* Version range supported by peer */ + uint32_t rdma_vers_high; +}; + +enum rpcrdma_proc { + RDMA_MSG = 0, /* An RPC call or reply msg */ + RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */ + RDMA_MSGP = 2, /* An RPC call or reply msg with padding */ + RDMA_DONE = 3, /* Client signals reply completion */ + RDMA_ERROR = 4 /* An RPC RDMA encoding error */ +}; + +#define rdma_msg cpu_to_be32(RDMA_MSG) +#define rdma_nomsg cpu_to_be32(RDMA_NOMSG) +#define rdma_msgp cpu_to_be32(RDMA_MSGP) +#define rdma_done cpu_to_be32(RDMA_DONE) +#define rdma_error cpu_to_be32(RDMA_ERROR) + +#endif /* _LINUX_SUNRPC_RPC_RDMA_H */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h new file mode 100644 index 000000000..5f1e6bd4c --- /dev/null +++ b/include/linux/sunrpc/sched.h @@ -0,0 +1,272 @@ +/* + * linux/include/linux/sunrpc/sched.h + * + * Scheduling primitives for kernel Sun RPC. + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef _LINUX_SUNRPC_SCHED_H_ +#define _LINUX_SUNRPC_SCHED_H_ + +#include <linux/timer.h> +#include <linux/ktime.h> +#include <linux/sunrpc/types.h> +#include <linux/spinlock.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <linux/sunrpc/xdr.h> + +/* + * This is the actual RPC procedure call info. + */ +struct rpc_procinfo; +struct rpc_message { + struct rpc_procinfo * rpc_proc; /* Procedure information */ + void * rpc_argp; /* Arguments */ + void * rpc_resp; /* Result */ + struct rpc_cred * rpc_cred; /* Credentials */ +}; + +struct rpc_call_ops; +struct rpc_wait_queue; +struct rpc_wait { + struct list_head list; /* wait queue links */ + struct list_head links; /* Links to related tasks */ + struct list_head timer_list; /* Timer list */ + unsigned long expires; +}; + +/* + * This is the RPC task struct + */ +struct rpc_task { + atomic_t tk_count; /* Reference count */ + struct list_head tk_task; /* global list of tasks */ + struct rpc_clnt * tk_client; /* RPC client */ + struct rpc_rqst * tk_rqstp; /* RPC request */ + + /* + * RPC call state + */ + struct rpc_message tk_msg; /* RPC call info */ + + /* + * callback to be executed after waking up + * action next procedure for async tasks + * tk_ops caller callbacks + */ + void (*tk_callback)(struct rpc_task *); + void (*tk_action)(struct rpc_task *); + const struct rpc_call_ops *tk_ops; + void * tk_calldata; + + unsigned long tk_timeout; /* timeout for rpc_sleep() */ + unsigned long tk_runstate; /* Task run status */ + struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could + * be any workqueue + */ + struct rpc_wait_queue *tk_waitqueue; /* RPC wait queue we're on */ + union { + struct work_struct tk_work; /* Async task work queue */ + struct rpc_wait tk_wait; /* RPC wait */ + } u; + + ktime_t tk_start; /* RPC task init timestamp */ + + pid_t tk_owner; /* Process id for batching tasks */ + int tk_status; /* result of last operation */ + unsigned short tk_flags; /* misc flags */ + unsigned short tk_timeouts; /* maj timeouts */ + +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) + unsigned short tk_pid; /* debugging aid */ +#endif + unsigned char tk_priority : 2,/* Task priority */ + tk_garb_retry : 2, + tk_cred_retry : 2, + tk_rebind_retry : 2; +}; + +typedef void (*rpc_action)(struct rpc_task *); + +struct rpc_call_ops { + void (*rpc_call_prepare)(struct rpc_task *, void *); + void (*rpc_call_done)(struct rpc_task *, void *); + void (*rpc_count_stats)(struct rpc_task *, void *); + void (*rpc_release)(void *); +}; + +struct rpc_task_setup { + struct rpc_task *task; + struct rpc_clnt *rpc_client; + const struct rpc_message *rpc_message; + const struct rpc_call_ops *callback_ops; + void *callback_data; + struct workqueue_struct *workqueue; + unsigned short flags; + signed char priority; +}; + +/* + * RPC task flags + */ +#define RPC_TASK_ASYNC 0x0001 /* is an async task */ +#define RPC_TASK_SWAPPER 0x0002 /* is swapping in/out */ +#define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ +#define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ +#define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ +#define RPC_TASK_KILLED 0x0100 /* task was killed */ +#define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ +#define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ +#define RPC_TASK_SENT 0x0800 /* message was sent */ +#define RPC_TASK_TIMEOUT 0x1000 /* fail with ETIMEDOUT on timeout */ +#define RPC_TASK_NOCONNECT 0x2000 /* return ENOTCONN if not connected */ +#define RPC_TASK_NO_RETRANS_TIMEOUT 0x4000 /* wait forever for a reply */ + +#define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) +#define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) +#define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) +#define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) +#define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT)) +#define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN) +#define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT) + +#define RPC_TASK_RUNNING 0 +#define RPC_TASK_QUEUED 1 +#define RPC_TASK_ACTIVE 2 + +#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) +#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) +#define rpc_test_and_set_running(t) \ + test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) +#define rpc_clear_running(t) \ + do { \ + smp_mb__before_atomic(); \ + clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \ + smp_mb__after_atomic(); \ + } while (0) + +#define RPC_IS_QUEUED(t) test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) +#define rpc_set_queued(t) set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) +#define rpc_clear_queued(t) \ + do { \ + smp_mb__before_atomic(); \ + clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \ + smp_mb__after_atomic(); \ + } while (0) + +#define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) + +/* + * Task priorities. + * Note: if you change these, you must also change + * the task initialization definitions below. + */ +#define RPC_PRIORITY_LOW (-1) +#define RPC_PRIORITY_NORMAL (0) +#define RPC_PRIORITY_HIGH (1) +#define RPC_PRIORITY_PRIVILEGED (2) +#define RPC_NR_PRIORITY (1 + RPC_PRIORITY_PRIVILEGED - RPC_PRIORITY_LOW) + +struct rpc_timer { + struct timer_list timer; + struct list_head list; + unsigned long expires; +}; + +/* + * RPC synchronization objects + */ +struct rpc_wait_queue { + spinlock_t lock; + struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ + pid_t owner; /* process id of last task serviced */ + unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ + unsigned char priority; /* current priority */ + unsigned char nr; /* # tasks remaining for cookie */ + unsigned short qlen; /* total # tasks waiting in queue */ + struct rpc_timer timer_list; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) + const char * name; +#endif +}; + +/* + * This is the # requests to send consecutively + * from a single cookie. The aim is to improve + * performance of NFS operations such as read/write. + */ +#define RPC_BATCH_COUNT 16 +#define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0) + +/* + * Function prototypes + */ +struct rpc_task *rpc_new_task(const struct rpc_task_setup *); +struct rpc_task *rpc_run_task(const struct rpc_task_setup *); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, + const struct rpc_call_ops *ops); +void rpc_put_task(struct rpc_task *); +void rpc_put_task_async(struct rpc_task *); +void rpc_exit_task(struct rpc_task *); +void rpc_exit(struct rpc_task *, int); +void rpc_release_calldata(const struct rpc_call_ops *, void *); +void rpc_killall_tasks(struct rpc_clnt *); +void rpc_execute(struct rpc_task *); +void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); +void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); +void rpc_destroy_wait_queue(struct rpc_wait_queue *); +void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, + rpc_action action); +void rpc_sleep_on_priority(struct rpc_wait_queue *, + struct rpc_task *, + rpc_action action, + int priority); +void rpc_wake_up_queued_task(struct rpc_wait_queue *, + struct rpc_task *); +void rpc_wake_up(struct rpc_wait_queue *); +struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); +struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *, + bool (*)(struct rpc_task *, void *), + void *); +void rpc_wake_up_status(struct rpc_wait_queue *, int); +void rpc_delay(struct rpc_task *, unsigned long); +void * rpc_malloc(struct rpc_task *, size_t); +void rpc_free(void *); +int rpciod_up(void); +void rpciod_down(void); +int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *); +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +struct net; +void rpc_show_tasks(struct net *); +#endif +int rpc_init_mempool(void); +void rpc_destroy_mempool(void); +extern struct workqueue_struct *rpciod_workqueue; +void rpc_prepare_task(struct rpc_task *task); + +static inline int rpc_wait_for_completion_task(struct rpc_task *task) +{ + return __rpc_wait_for_completion_task(task, NULL); +} + +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) +static inline const char * rpc_qname(const struct rpc_wait_queue *q) +{ + return ((q && q->name) ? q->name : "unknown"); +} + +static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, + const char *name) +{ + q->name = name; +} +#else +static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, + const char *name) +{ +} +#endif + +#endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h new file mode 100644 index 000000000..edc64219f --- /dev/null +++ b/include/linux/sunrpc/stats.h @@ -0,0 +1,84 @@ +/* + * linux/include/linux/sunrpc/stats.h + * + * Client statistics collection for SUN RPC + * + * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef _LINUX_SUNRPC_STATS_H +#define _LINUX_SUNRPC_STATS_H + +#include <linux/proc_fs.h> + +struct rpc_stat { + const struct rpc_program *program; + + unsigned int netcnt, + netudpcnt, + nettcpcnt, + nettcpconn, + netreconn; + unsigned int rpccnt, + rpcretrans, + rpcauthrefresh, + rpcgarbage; +}; + +struct svc_stat { + struct svc_program * program; + + unsigned int netcnt, + netudpcnt, + nettcpcnt, + nettcpconn; + unsigned int rpccnt, + rpcbadfmt, + rpcbadauth, + rpcbadclnt; +}; + +struct net; +#ifdef CONFIG_PROC_FS +int rpc_proc_init(struct net *); +void rpc_proc_exit(struct net *); +#else +static inline int rpc_proc_init(struct net *net) +{ + return 0; +} + +static inline void rpc_proc_exit(struct net *net) +{ +} +#endif + +#ifdef MODULE +void rpc_modcount(struct inode *, int); +#endif + +#ifdef CONFIG_PROC_FS +struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *); +void rpc_proc_unregister(struct net *,const char *); +void rpc_proc_zero(const struct rpc_program *); +struct proc_dir_entry * svc_proc_register(struct net *, struct svc_stat *, + const struct file_operations *); +void svc_proc_unregister(struct net *, const char *); + +void svc_seq_show(struct seq_file *, + const struct svc_stat *); +#else + +static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; } +static inline void rpc_proc_unregister(struct net *net, const char *p) {} +static inline void rpc_proc_zero(const struct rpc_program *p) {} + +static inline struct proc_dir_entry *svc_proc_register(struct net *net, struct svc_stat *s, + const struct file_operations *f) { return NULL; } +static inline void svc_proc_unregister(struct net *net, const char *p) {} + +static inline void svc_seq_show(struct seq_file *seq, + const struct svc_stat *st) {} +#endif + +#endif /* _LINUX_SUNRPC_STATS_H */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h new file mode 100644 index 000000000..fae6fb947 --- /dev/null +++ b/include/linux/sunrpc/svc.h @@ -0,0 +1,468 @@ +/* + * linux/include/linux/sunrpc/svc.h + * + * RPC server declarations. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + + +#ifndef SUNRPC_SVC_H +#define SUNRPC_SVC_H + +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/sunrpc/types.h> +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/auth.h> +#include <linux/sunrpc/svcauth.h> +#include <linux/wait.h> +#include <linux/mm.h> + +/* + * This is the RPC server thread function prototype + */ +typedef int (*svc_thread_fn)(void *); + +/* statistics for svc_pool structures */ +struct svc_pool_stats { + atomic_long_t packets; + unsigned long sockets_queued; + atomic_long_t threads_woken; + atomic_long_t threads_timedout; +}; + +/* + * + * RPC service thread pool. + * + * Pool of threads and temporary sockets. Generally there is only + * a single one of these per RPC service, but on NUMA machines those + * services that can benefit from it (i.e. nfs but not lockd) will + * have one pool per NUMA node. This optimisation reduces cross- + * node traffic on multi-node NUMA NFS servers. + */ +struct svc_pool { + unsigned int sp_id; /* pool id; also node id on NUMA */ + spinlock_t sp_lock; /* protects all fields */ + struct list_head sp_sockets; /* pending sockets */ + unsigned int sp_nrthreads; /* # of threads in pool */ + struct list_head sp_all_threads; /* all server threads */ + struct svc_pool_stats sp_stats; /* statistics on pool operation */ +#define SP_TASK_PENDING (0) /* still work to do even if no + * xprt is queued. */ + unsigned long sp_flags; +} ____cacheline_aligned_in_smp; + +/* + * RPC service. + * + * An RPC service is a ``daemon,'' possibly multithreaded, which + * receives and processes incoming RPC messages. + * It has one or more transport sockets associated with it, and maintains + * a list of idle threads waiting for input. + * + * We currently do not support more than one RPC program per daemon. + */ +struct svc_serv { + struct svc_program * sv_program; /* RPC program */ + struct svc_stat * sv_stats; /* RPC statistics */ + spinlock_t sv_lock; + unsigned int sv_nrthreads; /* # of server threads */ + unsigned int sv_maxconn; /* max connections allowed or + * '0' causing max to be based + * on number of threads. */ + + unsigned int sv_max_payload; /* datagram payload size */ + unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ + unsigned int sv_xdrsize; /* XDR buffer size */ + struct list_head sv_permsocks; /* all permanent sockets */ + struct list_head sv_tempsocks; /* all temporary sockets */ + int sv_tmpcnt; /* count of temporary sockets */ + struct timer_list sv_temptimer; /* timer for aging temporary sockets */ + + char * sv_name; /* service name */ + + unsigned int sv_nrpools; /* number of thread pools */ + struct svc_pool * sv_pools; /* array of thread pools */ + + void (*sv_shutdown)(struct svc_serv *serv, + struct net *net); + /* Callback to use when last thread + * exits. + */ + + struct module * sv_module; /* optional module to count when + * adding threads */ + svc_thread_fn sv_function; /* main function for threads */ +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + struct list_head sv_cb_list; /* queue for callback requests + * that arrive over the same + * connection */ + spinlock_t sv_cb_lock; /* protects the svc_cb_list */ + wait_queue_head_t sv_cb_waitq; /* sleep here if there are no + * entries in the svc_cb_list */ + struct svc_xprt *sv_bc_xprt; /* callback on fore channel */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ +}; + +/* + * We use sv_nrthreads as a reference count. svc_destroy() drops + * this refcount, so we need to bump it up around operations that + * change the number of threads. Horrible, but there it is. + * Should be called with the "service mutex" held. + */ +static inline void svc_get(struct svc_serv *serv) +{ + serv->sv_nrthreads++; +} + +/* + * Maximum payload size supported by a kernel RPC server. + * This is use to determine the max number of pages nfsd is + * willing to return in a single READ operation. + * + * These happen to all be powers of 2, which is not strictly + * necessary but helps enforce the real limitation, which is + * that they should be multiples of PAGE_CACHE_SIZE. + * + * For UDP transports, a block plus NFS,RPC, and UDP headers + * has to fit into the IP datagram limit of 64K. The largest + * feasible number for all known page sizes is probably 48K, + * but we choose 32K here. This is the same as the historical + * Linux limit; someone who cares more about NFS/UDP performance + * can test a larger number. + * + * For TCP transports we have more freedom. A size of 1MB is + * chosen to match the client limit. Other OSes are known to + * have larger limits, but those numbers are probably beyond + * the point of diminishing returns. + */ +#define RPCSVC_MAXPAYLOAD (1*1024*1024u) +#define RPCSVC_MAXPAYLOAD_TCP RPCSVC_MAXPAYLOAD +#define RPCSVC_MAXPAYLOAD_UDP (32*1024u) + +extern u32 svc_max_payload(const struct svc_rqst *rqstp); + +/* + * RPC Requsts and replies are stored in one or more pages. + * We maintain an array of pages for each server thread. + * Requests are copied into these pages as they arrive. Remaining + * pages are available to write the reply into. + * + * Pages are sent using ->sendpage so each server thread needs to + * allocate more to replace those used in sending. To help keep track + * of these pages we have a receive list where all pages initialy live, + * and a send list where pages are moved to when there are to be part + * of a reply. + * + * We use xdr_buf for holding responses as it fits well with NFS + * read responses (that have a header, and some data pages, and possibly + * a tail) and means we can share some client side routines. + * + * The xdr_buf.head kvec always points to the first page in the rq_*pages + * list. The xdr_buf.pages pointer points to the second page on that + * list. xdr_buf.tail points to the end of the first page. + * This assumes that the non-page part of an rpc reply will fit + * in a page - NFSd ensures this. lockd also has no trouble. + * + * Each request/reply pair can have at most one "payload", plus two pages, + * one for the request, and one for the reply. + * We using ->sendfile to return read data, we might need one extra page + * if the request is not page-aligned. So add another '1'. + */ +#define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ + + 2 + 1) + +static inline u32 svc_getnl(struct kvec *iov) +{ + __be32 val, *vp; + vp = iov->iov_base; + val = *vp++; + iov->iov_base = (void*)vp; + iov->iov_len -= sizeof(__be32); + return ntohl(val); +} + +static inline void svc_putnl(struct kvec *iov, u32 val) +{ + __be32 *vp = iov->iov_base + iov->iov_len; + *vp = htonl(val); + iov->iov_len += sizeof(__be32); +} + +static inline __be32 svc_getu32(struct kvec *iov) +{ + __be32 val, *vp; + vp = iov->iov_base; + val = *vp++; + iov->iov_base = (void*)vp; + iov->iov_len -= sizeof(__be32); + return val; +} + +static inline void svc_ungetu32(struct kvec *iov) +{ + __be32 *vp = (__be32 *)iov->iov_base; + iov->iov_base = (void *)(vp - 1); + iov->iov_len += sizeof(*vp); +} + +static inline void svc_putu32(struct kvec *iov, __be32 val) +{ + __be32 *vp = iov->iov_base + iov->iov_len; + *vp = val; + iov->iov_len += sizeof(__be32); +} + +/* + * The context of a single thread, including the request currently being + * processed. + */ +struct svc_rqst { + struct list_head rq_all; /* all threads list */ + struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ + struct svc_xprt * rq_xprt; /* transport ptr */ + + struct sockaddr_storage rq_addr; /* peer address */ + size_t rq_addrlen; + struct sockaddr_storage rq_daddr; /* dest addr of request + * - reply from here */ + size_t rq_daddrlen; + + struct svc_serv * rq_server; /* RPC service definition */ + struct svc_pool * rq_pool; /* thread pool */ + struct svc_procedure * rq_procinfo; /* procedure info */ + struct auth_ops * rq_authop; /* authentication flavour */ + struct svc_cred rq_cred; /* auth info */ + void * rq_xprt_ctxt; /* transport specific context ptr */ + struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ + + size_t rq_xprt_hlen; /* xprt header len */ + struct xdr_buf rq_arg; + struct xdr_buf rq_res; + struct page * rq_pages[RPCSVC_MAXPAGES]; + struct page * *rq_respages; /* points into rq_pages */ + struct page * *rq_next_page; /* next reply page to use */ + struct page * *rq_page_end; /* one past the last page */ + + struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ + + __be32 rq_xid; /* transmission id */ + u32 rq_prog; /* program number */ + u32 rq_vers; /* program version */ + u32 rq_proc; /* procedure number */ + u32 rq_prot; /* IP protocol */ + int rq_cachetype; /* catering to nfsd */ +#define RQ_SECURE (0) /* secure port */ +#define RQ_LOCAL (1) /* local request */ +#define RQ_USEDEFERRAL (2) /* use deferral */ +#define RQ_DROPME (3) /* drop current reply */ +#define RQ_SPLICE_OK (4) /* turned off in gss privacy + * to prevent encrypting page + * cache pages */ +#define RQ_VICTIM (5) /* about to be shut down */ +#define RQ_BUSY (6) /* request is busy */ + unsigned long rq_flags; /* flags field */ + + void * rq_argp; /* decoded arguments */ + void * rq_resp; /* xdr'd results */ + void * rq_auth_data; /* flavor-specific data */ + int rq_auth_slack; /* extra space xdr code + * should leave in head + * for krb5i, krb5p. + */ + int rq_reserved; /* space on socket outq + * reserved for this request + */ + + struct cache_req rq_chandle; /* handle passed to caches for + * request delaying + */ + /* Catering to nfsd */ + struct auth_domain * rq_client; /* RPC peer info */ + struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ + struct svc_cacherep * rq_cacherep; /* cache info */ + struct task_struct *rq_task; /* service thread */ + spinlock_t rq_lock; /* per-request lock */ +}; + +#define SVC_NET(svc_rqst) (svc_rqst->rq_xprt->xpt_net) + +/* + * Rigorous type checking on sockaddr type conversions + */ +static inline struct sockaddr_in *svc_addr_in(const struct svc_rqst *rqst) +{ + return (struct sockaddr_in *) &rqst->rq_addr; +} + +static inline struct sockaddr_in6 *svc_addr_in6(const struct svc_rqst *rqst) +{ + return (struct sockaddr_in6 *) &rqst->rq_addr; +} + +static inline struct sockaddr *svc_addr(const struct svc_rqst *rqst) +{ + return (struct sockaddr *) &rqst->rq_addr; +} + +static inline struct sockaddr_in *svc_daddr_in(const struct svc_rqst *rqst) +{ + return (struct sockaddr_in *) &rqst->rq_daddr; +} + +static inline struct sockaddr_in6 *svc_daddr_in6(const struct svc_rqst *rqst) +{ + return (struct sockaddr_in6 *) &rqst->rq_daddr; +} + +static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst) +{ + return (struct sockaddr *) &rqst->rq_daddr; +} + +/* + * Check buffer bounds after decoding arguments + */ +static inline int +xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p) +{ + char *cp = (char *)p; + struct kvec *vec = &rqstp->rq_arg.head[0]; + return cp >= (char*)vec->iov_base + && cp <= (char*)vec->iov_base + vec->iov_len; +} + +static inline int +xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p) +{ + struct kvec *vec = &rqstp->rq_res.head[0]; + char *cp = (char*)p; + + vec->iov_len = cp - (char*)vec->iov_base; + + return vec->iov_len <= PAGE_SIZE; +} + +static inline void svc_free_res_pages(struct svc_rqst *rqstp) +{ + while (rqstp->rq_next_page != rqstp->rq_respages) { + struct page **pp = --rqstp->rq_next_page; + if (*pp) { + put_page(*pp); + *pp = NULL; + } + } +} + +struct svc_deferred_req { + u32 prot; /* protocol (UDP or TCP) */ + struct svc_xprt *xprt; + struct sockaddr_storage addr; /* where reply must go */ + size_t addrlen; + struct sockaddr_storage daddr; /* where reply must come from */ + size_t daddrlen; + struct cache_deferred_req handle; + size_t xprt_hlen; + int argslen; + __be32 args[0]; +}; + +/* + * List of RPC programs on the same transport endpoint + */ +struct svc_program { + struct svc_program * pg_next; /* other programs (same xprt) */ + u32 pg_prog; /* program number */ + unsigned int pg_lovers; /* lowest version */ + unsigned int pg_hivers; /* highest version */ + unsigned int pg_nvers; /* number of versions */ + struct svc_version ** pg_vers; /* version array */ + char * pg_name; /* service name */ + char * pg_class; /* class name: services sharing authentication */ + struct svc_stat * pg_stats; /* rpc statistics */ + int (*pg_authenticate)(struct svc_rqst *); +}; + +/* + * RPC program version + */ +struct svc_version { + u32 vs_vers; /* version number */ + u32 vs_nproc; /* number of procedures */ + struct svc_procedure * vs_proc; /* per-procedure info */ + u32 vs_xdrsize; /* xdrsize needed for this version */ + + unsigned int vs_hidden : 1, /* Don't register with portmapper. + * Only used for nfsacl so far. */ + vs_rpcb_optnl:1;/* Don't care the result of register. + * Only used for nfsv4. */ + + /* Override dispatch function (e.g. when caching replies). + * A return value of 0 means drop the request. + * vs_dispatch == NULL means use default dispatcher. + */ + int (*vs_dispatch)(struct svc_rqst *, __be32 *); +}; + +/* + * RPC procedure info + */ +typedef __be32 (*svc_procfunc)(struct svc_rqst *, void *argp, void *resp); +struct svc_procedure { + svc_procfunc pc_func; /* process the request */ + kxdrproc_t pc_decode; /* XDR decode args */ + kxdrproc_t pc_encode; /* XDR encode result */ + kxdrproc_t pc_release; /* XDR free result */ + unsigned int pc_argsize; /* argument struct size */ + unsigned int pc_ressize; /* result struct size */ + unsigned int pc_count; /* call count */ + unsigned int pc_cachetype; /* cache info (NFS) */ + unsigned int pc_xdrressize; /* maximum size of XDR reply */ +}; + +/* + * Function prototypes. + */ +int svc_rpcb_setup(struct svc_serv *serv, struct net *net); +void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); +int svc_bind(struct svc_serv *serv, struct net *net); +struct svc_serv *svc_create(struct svc_program *, unsigned int, + void (*shutdown)(struct svc_serv *, struct net *net)); +struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, + struct svc_pool *pool, int node); +void svc_exit_thread(struct svc_rqst *); +struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, + void (*shutdown)(struct svc_serv *, struct net *net), + svc_thread_fn, struct module *); +int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); +int svc_pool_stats_open(struct svc_serv *serv, struct file *file); +void svc_destroy(struct svc_serv *); +void svc_shutdown_net(struct svc_serv *, struct net *); +int svc_process(struct svc_rqst *); +int bc_svc_process(struct svc_serv *, struct rpc_rqst *, + struct svc_rqst *); +int svc_register(const struct svc_serv *, struct net *, const int, + const unsigned short, const unsigned short); + +void svc_wake_up(struct svc_serv *); +void svc_reserve(struct svc_rqst *rqstp, int space); +struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); +char * svc_print_addr(struct svc_rqst *, char *, size_t); + +#define RPC_MAX_ADDRBUFLEN (63U) + +/* + * When we want to reduce the size of the reserved space in the response + * buffer, we need to take into account the size of any checksum data that + * may be at the end of the packet. This is difficult to determine exactly + * for all cases without actually generating the checksum, so we just use a + * static value. + */ +static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) +{ + svc_reserve(rqstp, space + rqstp->rq_auth_slack); +} + +#endif /* SUNRPC_SVC_H */ diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h new file mode 100644 index 000000000..df8edf8ec --- /dev/null +++ b/include/linux/sunrpc/svc_rdma.h @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Author: Tom Tucker <tom@opengridcomputing.com> + */ + +#ifndef SVC_RDMA_H +#define SVC_RDMA_H +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/svcsock.h> +#include <linux/sunrpc/rpc_rdma.h> +#include <rdma/ib_verbs.h> +#include <rdma/rdma_cm.h> +#define SVCRDMA_DEBUG + +/* RPC/RDMA parameters and stats */ +extern unsigned int svcrdma_ord; +extern unsigned int svcrdma_max_requests; +extern unsigned int svcrdma_max_req_size; + +extern atomic_t rdma_stat_recv; +extern atomic_t rdma_stat_read; +extern atomic_t rdma_stat_write; +extern atomic_t rdma_stat_sq_starve; +extern atomic_t rdma_stat_rq_starve; +extern atomic_t rdma_stat_rq_poll; +extern atomic_t rdma_stat_rq_prod; +extern atomic_t rdma_stat_sq_poll; +extern atomic_t rdma_stat_sq_prod; + +/* + * Contexts are built when an RDMA request is created and are a + * record of the resources that can be recovered when the request + * completes. + */ +struct svc_rdma_op_ctxt { + struct svc_rdma_op_ctxt *read_hdr; + struct svc_rdma_fastreg_mr *frmr; + int hdr_count; + struct xdr_buf arg; + struct list_head dto_q; + enum ib_wr_opcode wr_op; + enum ib_wc_status wc_status; + u32 byte_len; + u32 position; + struct svcxprt_rdma *xprt; + unsigned long flags; + enum dma_data_direction direction; + int count; + struct ib_sge sge[RPCSVC_MAXPAGES]; + struct page *pages[RPCSVC_MAXPAGES]; +}; + +/* + * NFS_ requests are mapped on the client side by the chunk lists in + * the RPCRDMA header. During the fetching of the RPC from the client + * and the writing of the reply to the client, the memory in the + * client and the memory in the server must be mapped as contiguous + * vaddr/len for access by the hardware. These data strucures keep + * these mappings. + * + * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the + * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the + * 'ch' field maps the read-list of the RPCRDMA header to the 'sge' + * mapping of the reply. + */ +struct svc_rdma_chunk_sge { + int start; /* sge no for this chunk */ + int count; /* sge count for this chunk */ +}; +struct svc_rdma_fastreg_mr { + struct ib_mr *mr; + void *kva; + struct ib_fast_reg_page_list *page_list; + int page_list_len; + unsigned long access_flags; + unsigned long map_len; + enum dma_data_direction direction; + struct list_head frmr_list; +}; +struct svc_rdma_req_map { + unsigned long count; + union { + struct kvec sge[RPCSVC_MAXPAGES]; + struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; + unsigned long lkey[RPCSVC_MAXPAGES]; + }; +}; +#define RDMACTXT_F_LAST_CTXT 2 + +#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ +#define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */ + +struct svcxprt_rdma { + struct svc_xprt sc_xprt; /* SVC transport structure */ + struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ + struct list_head sc_accept_q; /* Conn. waiting accept */ + int sc_ord; /* RDMA read limit */ + int sc_max_sge; + + int sc_sq_depth; /* Depth of SQ */ + atomic_t sc_sq_count; /* Number of SQ WR on queue */ + + int sc_max_requests; /* Depth of RQ */ + int sc_max_req_size; /* Size of each RQ WR buf */ + + struct ib_pd *sc_pd; + + atomic_t sc_dma_used; + atomic_t sc_ctxt_used; + struct list_head sc_rq_dto_q; + spinlock_t sc_rq_dto_lock; + struct ib_qp *sc_qp; + struct ib_cq *sc_rq_cq; + struct ib_cq *sc_sq_cq; + struct ib_mr *sc_phys_mr; /* MR for server memory */ + int (*sc_reader)(struct svcxprt_rdma *, + struct svc_rqst *, + struct svc_rdma_op_ctxt *, + int *, u32 *, u32, u32, u64, bool); + u32 sc_dev_caps; /* distilled device caps */ + u32 sc_dma_lkey; /* local dma key */ + unsigned int sc_frmr_pg_list_len; + struct list_head sc_frmr_q; + spinlock_t sc_frmr_q_lock; + + spinlock_t sc_lock; /* transport lock */ + + wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ + unsigned long sc_flags; + struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ + struct list_head sc_read_complete_q; + struct work_struct sc_work; +}; +/* sc_flags */ +#define RDMAXPRT_RQ_PENDING 1 +#define RDMAXPRT_SQ_PENDING 2 +#define RDMAXPRT_CONN_PENDING 3 + +#define RPCRDMA_LISTEN_BACKLOG 10 +/* The default ORD value is based on two outstanding full-size writes with a + * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ +#define RPCRDMA_ORD (64/4) +#define RPCRDMA_SQ_DEPTH_MULT 8 +#define RPCRDMA_MAX_REQUESTS 32 +#define RPCRDMA_MAX_REQ_SIZE 4096 + +/* svc_rdma_marshal.c */ +extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); +extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); +extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, + struct rpcrdma_msg *, + enum rpcrdma_errcode, u32 *); +extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); +extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); +extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, + __be32, __be64, u32); +extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, + struct rpcrdma_msg *, + struct rpcrdma_msg *, + enum rpcrdma_proc); +extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); + +/* svc_rdma_recvfrom.c */ +extern int svc_rdma_recvfrom(struct svc_rqst *); +extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *, + struct svc_rdma_op_ctxt *, int *, u32 *, + u32, u32, u64, bool); +extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, + struct svc_rdma_op_ctxt *, int *, u32 *, + u32, u32, u64, bool); + +/* svc_rdma_sendto.c */ +extern int svc_rdma_sendto(struct svc_rqst *); + +/* svc_rdma_transport.c */ +extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); +extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, + enum rpcrdma_errcode); +struct page *svc_rdma_get_page(void); +extern int svc_rdma_post_recv(struct svcxprt_rdma *); +extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); +extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); +extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); +extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); +extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); +extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); +extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); +extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); +extern void svc_rdma_put_frmr(struct svcxprt_rdma *, + struct svc_rdma_fastreg_mr *); +extern void svc_sq_reap(struct svcxprt_rdma *); +extern void svc_rq_reap(struct svcxprt_rdma *); +extern struct svc_xprt_class svc_rdma_class; +extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); + +/* svc_rdma.c */ +extern int svc_rdma_init(void); +extern void svc_rdma_cleanup(void); + +/* + * Returns the address of the first read chunk or <nul> if no read chunk is + * present + */ +static inline struct rpcrdma_read_chunk * +svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *ch = + (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + + if (ch->rc_discrim == 0) + return NULL; + + return ch; +} + +/* + * Returns the address of the first read write array element or <nul> if no + * write array list is present + */ +static inline struct rpcrdma_write_array * +svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) +{ + if (rmsgp->rm_body.rm_chunks[0] != 0 + || rmsgp->rm_body.rm_chunks[1] == 0) + return NULL; + + return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; +} + +/* + * Returns the address of the first reply array element or <nul> if no + * reply array is present + */ +static inline struct rpcrdma_write_array * +svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *rch; + struct rpcrdma_write_array *wr_ary; + struct rpcrdma_write_array *rp_ary; + + /* XXX: Need to fix when reply list may occur with read-list and/or + * write list */ + if (rmsgp->rm_body.rm_chunks[0] != 0 || + rmsgp->rm_body.rm_chunks[1] != 0) + return NULL; + + rch = svc_rdma_get_read_chunk(rmsgp); + if (rch) { + while (rch->rc_discrim) + rch++; + + /* The reply list follows an empty write array located + * at 'rc_position' here. The reply array is at rc_target. + */ + rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; + + goto found_it; + } + + wr_ary = svc_rdma_get_write_array(rmsgp); + if (wr_ary) { + rp_ary = (struct rpcrdma_write_array *) + &wr_ary-> + wc_array[ntohl(wr_ary->wc_nchunks)].wc_target.rs_length; + + goto found_it; + } + + /* No read list, no write list */ + rp_ary = (struct rpcrdma_write_array *) + &rmsgp->rm_body.rm_chunks[2]; + + found_it: + if (rp_ary->wc_discrim == 0) + return NULL; + + return rp_ary; +} +#endif diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h new file mode 100644 index 000000000..79f6f8f3d --- /dev/null +++ b/include/linux/sunrpc/svc_xprt.h @@ -0,0 +1,210 @@ +/* + * linux/include/linux/sunrpc/svc_xprt.h + * + * RPC server transport I/O + */ + +#ifndef SUNRPC_SVC_XPRT_H +#define SUNRPC_SVC_XPRT_H + +#include <linux/sunrpc/svc.h> + +struct module; + +struct svc_xprt_ops { + struct svc_xprt *(*xpo_create)(struct svc_serv *, + struct net *net, + struct sockaddr *, int, + int); + struct svc_xprt *(*xpo_accept)(struct svc_xprt *); + int (*xpo_has_wspace)(struct svc_xprt *); + int (*xpo_recvfrom)(struct svc_rqst *); + void (*xpo_prep_reply_hdr)(struct svc_rqst *); + int (*xpo_sendto)(struct svc_rqst *); + void (*xpo_release_rqst)(struct svc_rqst *); + void (*xpo_detach)(struct svc_xprt *); + void (*xpo_free)(struct svc_xprt *); + int (*xpo_secure_port)(struct svc_rqst *); + void (*xpo_adjust_wspace)(struct svc_xprt *); +}; + +struct svc_xprt_class { + const char *xcl_name; + struct module *xcl_owner; + struct svc_xprt_ops *xcl_ops; + struct list_head xcl_list; + u32 xcl_max_payload; + int xcl_ident; +}; + +/* + * This is embedded in an object that wants a callback before deleting + * an xprt; intended for use by NFSv4.1, which needs to know when a + * client's tcp connection (and hence possibly a backchannel) goes away. + */ +struct svc_xpt_user { + struct list_head list; + void (*callback)(struct svc_xpt_user *); +}; + +struct svc_xprt { + struct svc_xprt_class *xpt_class; + struct svc_xprt_ops *xpt_ops; + struct kref xpt_ref; + struct list_head xpt_list; + struct list_head xpt_ready; + unsigned long xpt_flags; +#define XPT_BUSY 0 /* enqueued/receiving */ +#define XPT_CONN 1 /* conn pending */ +#define XPT_CLOSE 2 /* dead or dying */ +#define XPT_DATA 3 /* data pending */ +#define XPT_TEMP 4 /* connected transport */ +#define XPT_DEAD 6 /* transport closed */ +#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */ +#define XPT_DEFERRED 8 /* deferred request pending */ +#define XPT_OLD 9 /* used for xprt aging mark+sweep */ +#define XPT_LISTENER 10 /* listening endpoint */ +#define XPT_CACHE_AUTH 11 /* cache auth info */ +#define XPT_LOCAL 12 /* connection from loopback interface */ + + struct svc_serv *xpt_server; /* service for transport */ + atomic_t xpt_reserved; /* space on outq that is rsvd */ + struct mutex xpt_mutex; /* to serialize sending data */ + spinlock_t xpt_lock; /* protects sk_deferred + * and xpt_auth_cache */ + void *xpt_auth_cache;/* auth cache */ + struct list_head xpt_deferred; /* deferred requests that need + * to be revisted */ + struct sockaddr_storage xpt_local; /* local address */ + size_t xpt_locallen; /* length of address */ + struct sockaddr_storage xpt_remote; /* remote peer's address */ + size_t xpt_remotelen; /* length of address */ + struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ + struct list_head xpt_users; /* callbacks on free */ + + struct net *xpt_net; + struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ +}; + +static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) +{ + spin_lock(&xpt->xpt_lock); + list_del_init(&u->list); + spin_unlock(&xpt->xpt_lock); +} + +static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) +{ + spin_lock(&xpt->xpt_lock); + if (test_bit(XPT_CLOSE, &xpt->xpt_flags)) { + /* + * The connection is about to be deleted soon (or, + * worse, may already be deleted--in which case we've + * already notified the xpt_users). + */ + spin_unlock(&xpt->xpt_lock); + return -ENOTCONN; + } + list_add(&u->list, &xpt->xpt_users); + spin_unlock(&xpt->xpt_lock); + return 0; +} + +int svc_reg_xprt_class(struct svc_xprt_class *); +void svc_unreg_xprt_class(struct svc_xprt_class *); +void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, + struct svc_serv *); +int svc_create_xprt(struct svc_serv *, const char *, struct net *, + const int, const unsigned short, int); +void svc_xprt_enqueue(struct svc_xprt *xprt); +void svc_xprt_put(struct svc_xprt *xprt); +void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); +void svc_close_xprt(struct svc_xprt *xprt); +int svc_port_is_privileged(struct sockaddr *sin); +int svc_print_xprts(char *buf, int maxlen); +struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, + struct net *net, const sa_family_t af, + const unsigned short port); +int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen); +void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *xprt); + +static inline void svc_xprt_get(struct svc_xprt *xprt) +{ + kref_get(&xprt->xpt_ref); +} +static inline void svc_xprt_set_local(struct svc_xprt *xprt, + const struct sockaddr *sa, + const size_t salen) +{ + memcpy(&xprt->xpt_local, sa, salen); + xprt->xpt_locallen = salen; +} +static inline void svc_xprt_set_remote(struct svc_xprt *xprt, + const struct sockaddr *sa, + const size_t salen) +{ + memcpy(&xprt->xpt_remote, sa, salen); + xprt->xpt_remotelen = salen; +} +static inline unsigned short svc_addr_port(const struct sockaddr *sa) +{ + const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa; + + switch (sa->sa_family) { + case AF_INET: + return ntohs(sin->sin_port); + case AF_INET6: + return ntohs(sin6->sin6_port); + } + + return 0; +} + +static inline size_t svc_addr_len(const struct sockaddr *sa) +{ + switch (sa->sa_family) { + case AF_INET: + return sizeof(struct sockaddr_in); + case AF_INET6: + return sizeof(struct sockaddr_in6); + } + BUG(); +} + +static inline unsigned short svc_xprt_local_port(const struct svc_xprt *xprt) +{ + return svc_addr_port((const struct sockaddr *)&xprt->xpt_local); +} + +static inline unsigned short svc_xprt_remote_port(const struct svc_xprt *xprt) +{ + return svc_addr_port((const struct sockaddr *)&xprt->xpt_remote); +} + +static inline char *__svc_print_addr(const struct sockaddr *addr, + char *buf, const size_t len) +{ + const struct sockaddr_in *sin = (const struct sockaddr_in *)addr; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)addr; + + switch (addr->sa_family) { + case AF_INET: + snprintf(buf, len, "%pI4, port=%u", &sin->sin_addr, + ntohs(sin->sin_port)); + break; + + case AF_INET6: + snprintf(buf, len, "%pI6, port=%u", + &sin6->sin6_addr, + ntohs(sin6->sin6_port)); + break; + + default: + snprintf(buf, len, "unknown address type: %d", addr->sa_family); + break; + } + + return buf; +} +#endif /* SUNRPC_SVC_XPRT_H */ diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h new file mode 100644 index 000000000..8d71d6577 --- /dev/null +++ b/include/linux/sunrpc/svcauth.h @@ -0,0 +1,200 @@ +/* + * linux/include/linux/sunrpc/svcauth.h + * + * RPC server-side authentication stuff. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef _LINUX_SUNRPC_SVCAUTH_H_ +#define _LINUX_SUNRPC_SVCAUTH_H_ + +#ifdef __KERNEL__ + +#include <linux/string.h> +#include <linux/sunrpc/msg_prot.h> +#include <linux/sunrpc/cache.h> +#include <linux/sunrpc/gss_api.h> +#include <linux/hash.h> +#include <linux/cred.h> + +struct svc_cred { + kuid_t cr_uid; + kgid_t cr_gid; + struct group_info *cr_group_info; + u32 cr_flavor; /* pseudoflavor */ + char *cr_principal; /* for gss */ + struct gss_api_mech *cr_gss_mech; +}; + +static inline void init_svc_cred(struct svc_cred *cred) +{ + cred->cr_group_info = NULL; + cred->cr_principal = NULL; + cred->cr_gss_mech = NULL; +} + +static inline void free_svc_cred(struct svc_cred *cred) +{ + if (cred->cr_group_info) + put_group_info(cred->cr_group_info); + kfree(cred->cr_principal); + gss_mech_put(cred->cr_gss_mech); + init_svc_cred(cred); +} + +struct svc_rqst; /* forward decl */ +struct in6_addr; + +/* Authentication is done in the context of a domain. + * + * Currently, the nfs server uses the auth_domain to stand + * for the "client" listed in /etc/exports. + * + * More generally, a domain might represent a group of clients using + * a common mechanism for authentication and having a common mapping + * between local identity (uid) and network identity. All clients + * in a domain have similar general access rights. Each domain can + * contain multiple principals which will have different specific right + * based on normal Discretionary Access Control. + * + * A domain is created by an authentication flavour module based on name + * only. Userspace then fills in detail on demand. + * + * In the case of auth_unix and auth_null, the auth_domain is also + * associated with entries in another cache representing the mapping + * of ip addresses to the given client. + */ +struct auth_domain { + struct kref ref; + struct hlist_node hash; + char *name; + struct auth_ops *flavour; +}; + +/* + * Each authentication flavour registers an auth_ops + * structure. + * name is simply the name. + * flavour gives the auth flavour. It determines where the flavour is registered + * accept() is given a request and should verify it. + * It should inspect the authenticator and verifier, and possibly the data. + * If there is a problem with the authentication *authp should be set. + * The return value of accept() can indicate: + * OK - authorised. client and credential are set in rqstp. + * reqbuf points to arguments + * resbuf points to good place for results. verfier + * is (probably) already in place. Certainly space is + * reserved for it. + * DROP - simply drop the request. It may have been deferred + * GARBAGE - rpc garbage_args error + * SYSERR - rpc system_err error + * DENIED - authp holds reason for denial. + * COMPLETE - the reply is encoded already and ready to be sent; no + * further processing is necessary. (This is used for processing + * null procedure calls which are used to set up encryption + * contexts.) + * + * accept is passed the proc number so that it can accept NULL rpc requests + * even if it cannot authenticate the client (as is sometimes appropriate). + * + * release() is given a request after the procedure has been run. + * It should sign/encrypt the results if needed + * It should return: + * OK - the resbuf is ready to be sent + * DROP - the reply should be quitely dropped + * DENIED - authp holds a reason for MSG_DENIED + * SYSERR - rpc system_err + * + * domain_release() + * This call releases a domain. + * set_client() + * Givens a pending request (struct svc_rqst), finds and assigns + * an appropriate 'auth_domain' as the client. + */ +struct auth_ops { + char * name; + struct module *owner; + int flavour; + int (*accept)(struct svc_rqst *rq, __be32 *authp); + int (*release)(struct svc_rqst *rq); + void (*domain_release)(struct auth_domain *); + int (*set_client)(struct svc_rqst *rq); +}; + +#define SVC_GARBAGE 1 +#define SVC_SYSERR 2 +#define SVC_VALID 3 +#define SVC_NEGATIVE 4 +#define SVC_OK 5 +#define SVC_DROP 6 +#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely + * lost so if there is a tcp connection, it + * should be closed + */ +#define SVC_DENIED 8 +#define SVC_PENDING 9 +#define SVC_COMPLETE 10 + +struct svc_xprt; + +extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp); +extern int svc_authorise(struct svc_rqst *rqstp); +extern int svc_set_client(struct svc_rqst *rqstp); +extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); +extern void svc_auth_unregister(rpc_authflavor_t flavor); + +extern struct auth_domain *unix_domain_find(char *name); +extern void auth_domain_put(struct auth_domain *item); +extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom); +extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); +extern struct auth_domain *auth_domain_find(char *name); +extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr); +extern int auth_unix_forget_old(struct auth_domain *dom); +extern void svcauth_unix_purge(struct net *net); +extern void svcauth_unix_info_release(struct svc_xprt *xpt); +extern int svcauth_unix_set_client(struct svc_rqst *rqstp); + +extern int unix_gid_cache_create(struct net *net); +extern void unix_gid_cache_destroy(struct net *net); + +static inline unsigned long hash_str(char *name, int bits) +{ + unsigned long hash = 0; + unsigned long l = 0; + int len = 0; + unsigned char c; + do { + if (unlikely(!(c = *name++))) { + c = (char)len; len = -1; + } + l = (l << 8) | c; + len++; + if ((len & (BITS_PER_LONG/8-1))==0) + hash = hash_long(hash^l, BITS_PER_LONG); + } while (len); + return hash >> (BITS_PER_LONG - bits); +} + +static inline unsigned long hash_mem(char *buf, int length, int bits) +{ + unsigned long hash = 0; + unsigned long l = 0; + int len = 0; + unsigned char c; + do { + if (len == length) { + c = (char)len; len = -1; + } else + c = *buf++; + l = (l << 8) | c; + len++; + if ((len & (BITS_PER_LONG/8-1))==0) + hash = hash_long(hash^l, BITS_PER_LONG); + } while (len); + return hash >> (BITS_PER_LONG - bits); +} + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_SUNRPC_SVCAUTH_H_ */ diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h new file mode 100644 index 000000000..726aff1a5 --- /dev/null +++ b/include/linux/sunrpc/svcauth_gss.h @@ -0,0 +1,27 @@ +/* + * linux/include/linux/sunrpc/svcauth_gss.h + * + * Bruce Fields <bfields@umich.edu> + * Copyright (c) 2002 The Regents of the University of Michigan + */ + +#ifndef _LINUX_SUNRPC_SVCAUTH_GSS_H +#define _LINUX_SUNRPC_SVCAUTH_GSS_H + +#ifdef __KERNEL__ +#include <linux/sched.h> +#include <linux/sunrpc/types.h> +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/svcauth.h> +#include <linux/sunrpc/svcsock.h> +#include <linux/sunrpc/auth_gss.h> + +int gss_svc_init(void); +void gss_svc_shutdown(void); +int gss_svc_init_net(struct net *net); +void gss_svc_shutdown_net(struct net *net); +int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); +u32 svcauth_gss_flavor(struct auth_domain *dom); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h new file mode 100644 index 000000000..2e780134f --- /dev/null +++ b/include/linux/sunrpc/svcsock.h @@ -0,0 +1,74 @@ +/* + * linux/include/linux/sunrpc/svcsock.h + * + * RPC server socket I/O. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef SUNRPC_SVCSOCK_H +#define SUNRPC_SVCSOCK_H + +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/svc_xprt.h> + +/* + * RPC server socket. + */ +struct svc_sock { + struct svc_xprt sk_xprt; + struct socket * sk_sock; /* berkeley socket layer */ + struct sock * sk_sk; /* INET layer */ + + /* We keep the old state_change and data_ready CB's here */ + void (*sk_ostate)(struct sock *); + void (*sk_odata)(struct sock *); + void (*sk_owspace)(struct sock *); + + /* private TCP part */ + /* On-the-wire fragment header: */ + __be32 sk_reclen; + /* As we receive a record, this includes the length received so + * far (including the fragment header): */ + u32 sk_tcplen; + /* Total length of the data (not including fragment headers) + * received so far in the fragments making up this rpc: */ + u32 sk_datalen; + + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ +}; + +static inline u32 svc_sock_reclen(struct svc_sock *svsk) +{ + return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK; +} + +static inline u32 svc_sock_final_rec(struct svc_sock *svsk) +{ + return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT; +} + +/* + * Function prototypes. + */ +void svc_close_net(struct svc_serv *, struct net *); +int svc_recv(struct svc_rqst *, long); +int svc_send(struct svc_rqst *); +void svc_drop(struct svc_rqst *); +void svc_sock_update_bufs(struct svc_serv *serv); +bool svc_alien_sock(struct net *net, int fd); +int svc_addsock(struct svc_serv *serv, const int fd, + char *name_return, const size_t len); +void svc_init_xprt_sock(void); +void svc_cleanup_xprt_sock(void); +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot); +void svc_sock_destroy(struct svc_xprt *); + +/* + * svc_makesock socket characteristics + */ +#define SVC_SOCK_DEFAULTS (0U) +#define SVC_SOCK_ANONYMOUS (1U << 0) /* don't register with pmap */ +#define SVC_SOCK_TEMPORARY (1U << 1) /* flag socket as temporary */ + +#endif /* SUNRPC_SVCSOCK_H */ diff --git a/include/linux/sunrpc/timer.h b/include/linux/sunrpc/timer.h new file mode 100644 index 000000000..697d6e69d --- /dev/null +++ b/include/linux/sunrpc/timer.h @@ -0,0 +1,49 @@ +/* + * linux/include/linux/sunrpc/timer.h + * + * Declarations for the RPC transport timer. + * + * Copyright (C) 2002 Trond Myklebust <trond.myklebust@fys.uio.no> + */ + +#ifndef _LINUX_SUNRPC_TIMER_H +#define _LINUX_SUNRPC_TIMER_H + +#include <linux/atomic.h> + +struct rpc_rtt { + unsigned long timeo; /* default timeout value */ + unsigned long srtt[5]; /* smoothed round trip time << 3 */ + unsigned long sdrtt[5]; /* smoothed medium deviation of RTT */ + int ntimeouts[5]; /* Number of timeouts for the last request */ +}; + + +extern void rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo); +extern void rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m); +extern unsigned long rpc_calc_rto(struct rpc_rtt *rt, unsigned timer); + +static inline void rpc_set_timeo(struct rpc_rtt *rt, int timer, int ntimeo) +{ + int *t; + if (!timer) + return; + t = &rt->ntimeouts[timer-1]; + if (ntimeo < *t) { + if (*t > 0) + (*t)--; + } else { + if (ntimeo > 8) + ntimeo = 8; + *t = ntimeo; + } +} + +static inline int rpc_ntimeo(struct rpc_rtt *rt, int timer) +{ + if (!timer) + return 0; + return rt->ntimeouts[timer-1]; +} + +#endif /* _LINUX_SUNRPC_TIMER_H */ diff --git a/include/linux/sunrpc/types.h b/include/linux/sunrpc/types.h new file mode 100644 index 000000000..d222f4755 --- /dev/null +++ b/include/linux/sunrpc/types.h @@ -0,0 +1,22 @@ +/* + * linux/include/linux/sunrpc/types.h + * + * Generic types and misc stuff for RPC. + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef _LINUX_SUNRPC_TYPES_H_ +#define _LINUX_SUNRPC_TYPES_H_ + +#include <linux/timer.h> +#include <linux/workqueue.h> +#include <linux/sunrpc/debug.h> +#include <linux/list.h> + +/* + * Shorthands + */ +#define signalled() (signal_pending(current)) + +#endif /* _LINUX_SUNRPC_TYPES_H_ */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h new file mode 100644 index 000000000..70c6b92e1 --- /dev/null +++ b/include/linux/sunrpc/xdr.h @@ -0,0 +1,235 @@ +/* + * XDR standard data types and function declarations + * + * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> + * + * Based on: + * RFC 4506 "XDR: External Data Representation Standard", May 2006 + */ + +#ifndef _SUNRPC_XDR_H_ +#define _SUNRPC_XDR_H_ + +#ifdef __KERNEL__ + +#include <linux/uio.h> +#include <asm/byteorder.h> +#include <asm/unaligned.h> +#include <linux/scatterlist.h> + +/* + * Buffer adjustment + */ +#define XDR_QUADLEN(l) (((l) + 3) >> 2) + +/* + * Generic opaque `network object.' At the kernel level, this type + * is used only by lockd. + */ +#define XDR_MAX_NETOBJ 1024 +struct xdr_netobj { + unsigned int len; + u8 * data; +}; + +/* + * This is the legacy generic XDR function. rqstp is either a rpc_rqst + * (client side) or svc_rqst pointer (server side). + * Encode functions always assume there's enough room in the buffer. + */ +typedef int (*kxdrproc_t)(void *rqstp, __be32 *data, void *obj); + +/* + * Basic structure for transmission/reception of a client XDR message. + * Features a header (for a linear buffer containing RPC headers + * and the data payload for short messages), and then an array of + * pages. + * The tail iovec allows you to append data after the page array. Its + * main interest is for appending padding to the pages in order to + * satisfy the int_32-alignment requirements in RFC1832. + * + * For the future, we might want to string several of these together + * in a list if anybody wants to make use of NFSv4 COMPOUND + * operations and/or has a need for scatter/gather involving pages. + */ +struct xdr_buf { + struct kvec head[1], /* RPC header + non-page data */ + tail[1]; /* Appended after page data */ + + struct page ** pages; /* Array of pages */ + unsigned int page_base, /* Start of page data */ + page_len, /* Length of page data */ + flags; /* Flags for data disposition */ +#define XDRBUF_READ 0x01 /* target of file read */ +#define XDRBUF_WRITE 0x02 /* source of file write */ + + unsigned int buflen, /* Total length of storage buffer */ + len; /* Length of XDR encoded message */ +}; + +/* + * pre-xdr'ed macros. + */ + +#define xdr_zero cpu_to_be32(0) +#define xdr_one cpu_to_be32(1) +#define xdr_two cpu_to_be32(2) + +#define rpc_success cpu_to_be32(RPC_SUCCESS) +#define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL) +#define rpc_prog_mismatch cpu_to_be32(RPC_PROG_MISMATCH) +#define rpc_proc_unavail cpu_to_be32(RPC_PROC_UNAVAIL) +#define rpc_garbage_args cpu_to_be32(RPC_GARBAGE_ARGS) +#define rpc_system_err cpu_to_be32(RPC_SYSTEM_ERR) +#define rpc_drop_reply cpu_to_be32(RPC_DROP_REPLY) + +#define rpc_auth_ok cpu_to_be32(RPC_AUTH_OK) +#define rpc_autherr_badcred cpu_to_be32(RPC_AUTH_BADCRED) +#define rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED) +#define rpc_autherr_badverf cpu_to_be32(RPC_AUTH_BADVERF) +#define rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF) +#define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK) +#define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM) +#define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM) +#define rpc_autherr_oldseqnum cpu_to_be32(101) + +/* + * Miscellaneous XDR helper functions + */ +__be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); +__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); +__be32 *xdr_encode_string(__be32 *p, const char *s); +__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp, + unsigned int maxlen); +__be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); +__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); + +void xdr_inline_pages(struct xdr_buf *, unsigned int, + struct page **, unsigned int, unsigned int); +void xdr_terminate_string(struct xdr_buf *, const u32); + +static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) +{ + return xdr_encode_opaque(p, s, len); +} + +/* + * Decode 64bit quantities (NFSv3 support) + */ +static inline __be32 * +xdr_encode_hyper(__be32 *p, __u64 val) +{ + put_unaligned_be64(val, p); + return p + 2; +} + +static inline __be32 * +xdr_decode_hyper(__be32 *p, __u64 *valp) +{ + *valp = get_unaligned_be64(p); + return p + 2; +} + +static inline __be32 * +xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len) +{ + memcpy(ptr, p, len); + return p + XDR_QUADLEN(len); +} + +/* + * Adjust kvec to reflect end of xdr'ed data (RPC client XDR) + */ +static inline int +xdr_adjust_iovec(struct kvec *iov, __be32 *p) +{ + return iov->iov_len = ((u8 *) p - (u8 *) iov->iov_base); +} + +/* + * XDR buffer helper functions + */ +extern void xdr_shift_buf(struct xdr_buf *, size_t); +extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); +extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_trim(struct xdr_buf *, unsigned int); +extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); +extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); +extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); + +/* + * Helper structure for copying from an sk_buff. + */ +struct xdr_skb_reader { + struct sk_buff *skb; + unsigned int offset; + size_t count; + __wsum csum; +}; + +typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len); + +size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len); +extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); +extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, + struct xdr_skb_reader *, xdr_skb_read_actor); + +extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); +extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); + +struct xdr_array2_desc; +typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); +struct xdr_array2_desc { + unsigned int elem_size; + unsigned int array_len; + unsigned int array_maxlen; + xdr_xcode_elem_t xcode; +}; + +extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc); +extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc); +extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase, + size_t len); + +/* + * Provide some simple tools for XDR buffer overflow-checking etc. + */ +struct xdr_stream { + __be32 *p; /* start of available buffer */ + struct xdr_buf *buf; /* XDR buffer to read/write */ + + __be32 *end; /* end of available buffer space */ + struct kvec *iov; /* pointer to the current kvec */ + struct kvec scratch; /* Scratch buffer */ + struct page **page_ptr; /* pointer to the current page */ + unsigned int nwords; /* Remaining decode buffer length */ +}; + +/* + * These are the xdr_stream style generic XDR encode and decode functions. + */ +typedef void (*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); +typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); + +extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); +extern void xdr_commit_encode(struct xdr_stream *xdr); +extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); +extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); +extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, + unsigned int base, unsigned int len); +extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); +extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, unsigned int len); +extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); +extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); +extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); +extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); +extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); + +#endif /* __KERNEL__ */ + +#endif /* _SUNRPC_XDR_H_ */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h new file mode 100644 index 000000000..8b93ef53d --- /dev/null +++ b/include/linux/sunrpc/xprt.h @@ -0,0 +1,436 @@ +/* + * linux/include/linux/sunrpc/xprt.h + * + * Declarations for the RPC transport interface. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef _LINUX_SUNRPC_XPRT_H +#define _LINUX_SUNRPC_XPRT_H + +#include <linux/uio.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/ktime.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/msg_prot.h> + +#ifdef __KERNEL__ + +#define RPC_MIN_SLOT_TABLE (2U) +#define RPC_DEF_SLOT_TABLE (16U) +#define RPC_MAX_SLOT_TABLE_LIMIT (65536U) +#define RPC_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE_LIMIT + +#define RPC_CWNDSHIFT (8U) +#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) +#define RPC_INITCWND RPC_CWNDSCALE +#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) +#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) + +/* + * This describes a timeout strategy + */ +struct rpc_timeout { + unsigned long to_initval, /* initial timeout */ + to_maxval, /* max timeout */ + to_increment; /* if !exponential */ + unsigned int to_retries; /* max # of retries */ + unsigned char to_exponential; +}; + +enum rpc_display_format_t { + RPC_DISPLAY_ADDR = 0, + RPC_DISPLAY_PORT, + RPC_DISPLAY_PROTO, + RPC_DISPLAY_HEX_ADDR, + RPC_DISPLAY_HEX_PORT, + RPC_DISPLAY_NETID, + RPC_DISPLAY_MAX, +}; + +struct rpc_task; +struct rpc_xprt; +struct seq_file; + +/* + * This describes a complete RPC request + */ +struct rpc_rqst { + /* + * This is the user-visible part + */ + struct rpc_xprt * rq_xprt; /* RPC client */ + struct xdr_buf rq_snd_buf; /* send buffer */ + struct xdr_buf rq_rcv_buf; /* recv buffer */ + + /* + * This is the private part + */ + struct rpc_task * rq_task; /* RPC task data */ + struct rpc_cred * rq_cred; /* Bound cred */ + __be32 rq_xid; /* request XID */ + int rq_cong; /* has incremented xprt->cong */ + u32 rq_seqno; /* gss seq no. used on req. */ + int rq_enc_pages_num; + struct page **rq_enc_pages; /* scratch pages for use by + gss privacy code */ + void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ + struct list_head rq_list; + + __u32 * rq_buffer; /* XDR encode buffer */ + size_t rq_callsize, + rq_rcvsize; + size_t rq_xmit_bytes_sent; /* total bytes sent */ + size_t rq_reply_bytes_recvd; /* total reply bytes */ + /* received */ + + struct xdr_buf rq_private_buf; /* The receive buffer + * used in the softirq. + */ + unsigned long rq_majortimeo; /* major timeout alarm */ + unsigned long rq_timeout; /* Current timeout value */ + ktime_t rq_rtt; /* round-trip time */ + unsigned int rq_retries; /* # of retries */ + unsigned int rq_connect_cookie; + /* A cookie used to track the + state of the transport + connection */ + + /* + * Partial send handling + */ + u32 rq_bytes_sent; /* Bytes we have sent */ + + ktime_t rq_xtime; /* transmit time stamp */ + int rq_ntrans; + +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + struct list_head rq_bc_list; /* Callback service list */ + unsigned long rq_bc_pa_state; /* Backchannel prealloc state */ + struct list_head rq_bc_pa_list; /* Backchannel prealloc list */ +#endif /* CONFIG_SUNRPC_BACKCHANEL */ +}; +#define rq_svec rq_snd_buf.head +#define rq_slen rq_snd_buf.len + +struct rpc_xprt_ops { + void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); + int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*rpcbind)(struct rpc_task *task); + void (*set_port)(struct rpc_xprt *xprt, unsigned short port); + void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); + void * (*buf_alloc)(struct rpc_task *task, size_t size); + void (*buf_free)(void *buffer); + int (*send_request)(struct rpc_task *task); + void (*set_retrans_timeout)(struct rpc_task *task); + void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*release_request)(struct rpc_task *task); + void (*close)(struct rpc_xprt *xprt); + void (*destroy)(struct rpc_xprt *xprt); + void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); +}; + +/* + * RPC transport identifiers + * + * To preserve compatibility with the historical use of raw IP protocol + * id's for transport selection, UDP and TCP identifiers are specified + * with the previous values. No such restriction exists for new transports, + * except that they may not collide with these values (17 and 6, + * respectively). + */ +#define XPRT_TRANSPORT_BC (1 << 31) +enum xprt_transports { + XPRT_TRANSPORT_UDP = IPPROTO_UDP, + XPRT_TRANSPORT_TCP = IPPROTO_TCP, + XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, + XPRT_TRANSPORT_RDMA = 256, + XPRT_TRANSPORT_LOCAL = 257, +}; + +struct rpc_xprt { + atomic_t count; /* Reference count */ + struct rpc_xprt_ops * ops; /* transport methods */ + + const struct rpc_timeout *timeout; /* timeout parms */ + struct sockaddr_storage addr; /* server address */ + size_t addrlen; /* size of server address */ + int prot; /* IP protocol */ + + unsigned long cong; /* current congestion */ + unsigned long cwnd; /* congestion window */ + + size_t max_payload; /* largest RPC payload size, + in bytes */ + unsigned int tsh_size; /* size of transport specific + header */ + + struct rpc_wait_queue binding; /* requests waiting on rpcbind */ + struct rpc_wait_queue sending; /* requests waiting to send */ + struct rpc_wait_queue pending; /* requests in flight */ + struct rpc_wait_queue backlog; /* waiting for slot */ + struct list_head free; /* free slots */ + unsigned int max_reqs; /* max number of slots */ + unsigned int min_reqs; /* min number of slots */ + atomic_t num_reqs; /* total slots */ + unsigned long state; /* transport state */ + unsigned char resvport : 1; /* use a reserved port */ + unsigned int swapper; /* we're swapping over this + transport */ + unsigned int bind_index; /* bind function index */ + + /* + * Connection of transports + */ + unsigned long bind_timeout, + reestablish_timeout; + unsigned int connect_cookie; /* A cookie that gets bumped + every time the transport + is reconnected */ + + /* + * Disconnection of idle transports + */ + struct work_struct task_cleanup; + struct timer_list timer; + unsigned long last_used, + idle_timeout; + + /* + * Send stuff + */ + spinlock_t transport_lock; /* lock transport info */ + spinlock_t reserve_lock; /* lock slot table */ + u32 xid; /* Next XID value to use */ + struct rpc_task * snd_task; /* Task blocked in send */ + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + struct svc_serv *bc_serv; /* The RPC service which will */ + /* process the callback */ + unsigned int bc_alloc_count; /* Total number of preallocs */ + spinlock_t bc_pa_lock; /* Protects the preallocated + * items */ + struct list_head bc_pa_list; /* List of preallocated + * backchannel rpc_rqst's */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + struct list_head recv; + + struct { + unsigned long bind_count, /* total number of binds */ + connect_count, /* total number of connects */ + connect_start, /* connect start timestamp */ + connect_time, /* jiffies waiting for connect */ + sends, /* how many complete requests */ + recvs, /* how many complete requests */ + bad_xids, /* lookup_rqst didn't find XID */ + max_slots; /* max rpc_slots used */ + + unsigned long long req_u, /* average requests on the wire */ + bklog_u, /* backlog queue utilization */ + sending_u, /* send q utilization */ + pending_u; /* pend q utilization */ + } stat; + + struct net *xprt_net; + const char *servername; + const char *address_strings[RPC_DISPLAY_MAX]; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + struct dentry *debugfs; /* debugfs directory */ +#endif +}; + +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +/* + * Backchannel flags + */ +#define RPC_BC_PA_IN_USE 0x0001 /* Preallocated backchannel */ + /* buffer in use */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +static inline int bc_prealloc(struct rpc_rqst *req) +{ + return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); +} +#else +static inline int bc_prealloc(struct rpc_rqst *req) +{ + return 0; +} +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + +#define XPRT_CREATE_INFINITE_SLOTS (1U) +#define XPRT_CREATE_NO_IDLE_TIMEOUT (1U << 1) + +struct xprt_create { + int ident; /* XPRT_TRANSPORT identifier */ + struct net * net; + struct sockaddr * srcaddr; /* optional local address */ + struct sockaddr * dstaddr; /* remote peer address */ + size_t addrlen; + const char *servername; + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ + unsigned int flags; +}; + +struct xprt_class { + struct list_head list; + int ident; /* XPRT_TRANSPORT identifier */ + struct rpc_xprt * (*setup)(struct xprt_create *); + struct module *owner; + char name[32]; +}; + +/* + * Generic internal transport functions + */ +struct rpc_xprt *xprt_create_transport(struct xprt_create *args); +void xprt_connect(struct rpc_task *task); +void xprt_reserve(struct rpc_task *task); +void xprt_retry_reserve(struct rpc_task *task); +int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); +int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); +void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); +void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); +bool xprt_prepare_transmit(struct rpc_task *task); +void xprt_transmit(struct rpc_task *task); +void xprt_end_transmit(struct rpc_task *task); +int xprt_adjust_timeout(struct rpc_rqst *req); +void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); +void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); +void xprt_release(struct rpc_task *task); +void xprt_put(struct rpc_xprt *xprt); +struct rpc_xprt * xprt_alloc(struct net *net, size_t size, + unsigned int num_prealloc, + unsigned int max_req); +void xprt_free(struct rpc_xprt *); + +/** + * xprt_get - return a reference to an RPC transport. + * @xprt: pointer to the transport + * + */ +static inline struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) +{ + if (atomic_inc_not_zero(&xprt->count)) + return xprt; + return NULL; +} + +static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) +{ + return p + xprt->tsh_size; +} + +/* + * Transport switch helper functions + */ +int xprt_register_transport(struct xprt_class *type); +int xprt_unregister_transport(struct xprt_class *type); +int xprt_load_transport(const char *); +void xprt_set_retrans_timeout_def(struct rpc_task *task); +void xprt_set_retrans_timeout_rtt(struct rpc_task *task); +void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); +void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action); +void xprt_write_space(struct rpc_xprt *xprt); +void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); +struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); +void xprt_complete_rqst(struct rpc_task *task, int copied); +void xprt_release_rqst_cong(struct rpc_task *task); +void xprt_disconnect_done(struct rpc_xprt *xprt); +void xprt_force_disconnect(struct rpc_xprt *xprt); +void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); +int xs_swapper(struct rpc_xprt *xprt, int enable); + +bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); +void xprt_unlock_connect(struct rpc_xprt *, void *); + +/* + * Reserved bit positions in xprt->state + */ +#define XPRT_LOCKED (0) +#define XPRT_CONNECTED (1) +#define XPRT_CONNECTING (2) +#define XPRT_CLOSE_WAIT (3) +#define XPRT_BOUND (4) +#define XPRT_BINDING (5) +#define XPRT_CLOSING (6) +#define XPRT_CONGESTED (9) + +static inline void xprt_set_connected(struct rpc_xprt *xprt) +{ + set_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline void xprt_clear_connected(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline int xprt_connected(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline int xprt_test_and_set_connected(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline int xprt_test_and_clear_connected(struct rpc_xprt *xprt) +{ + return test_and_clear_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline void xprt_clear_connecting(struct rpc_xprt *xprt) +{ + smp_mb__before_atomic(); + clear_bit(XPRT_CONNECTING, &xprt->state); + smp_mb__after_atomic(); +} + +static inline int xprt_connecting(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_CONNECTING, &xprt->state); +} + +static inline int xprt_test_and_set_connecting(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_CONNECTING, &xprt->state); +} + +static inline void xprt_set_bound(struct rpc_xprt *xprt) +{ + test_and_set_bit(XPRT_BOUND, &xprt->state); +} + +static inline int xprt_bound(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_BOUND, &xprt->state); +} + +static inline void xprt_clear_bound(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_BOUND, &xprt->state); +} + +static inline void xprt_clear_binding(struct rpc_xprt *xprt) +{ + smp_mb__before_atomic(); + clear_bit(XPRT_BINDING, &xprt->state); + smp_mb__after_atomic(); +} + +static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_BINDING, &xprt->state); +} + +#endif /* __KERNEL__*/ + +#endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h new file mode 100644 index 000000000..c984c8598 --- /dev/null +++ b/include/linux/sunrpc/xprtrdma.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_SUNRPC_XPRTRDMA_H +#define _LINUX_SUNRPC_XPRTRDMA_H + +/* + * Constants. Max RPC/NFS header is big enough to account for + * additional marshaling buffers passed down by Linux client. + * + * RDMA header is currently fixed max size, and is big enough for a + * fully-chunked NFS message (read chunks are the largest). Note only + * a single chunk type per message is supported currently. + */ +#define RPCRDMA_MIN_SLOT_TABLE (2U) +#define RPCRDMA_DEF_SLOT_TABLE (32U) +#define RPCRDMA_MAX_SLOT_TABLE (256U) + +#define RPCRDMA_DEF_INLINE (1024) /* default inline max */ + +#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ + +/* memory registration strategies */ +enum rpcrdma_memreg { + RPCRDMA_BOUNCEBUFFERS = 0, + RPCRDMA_REGISTER, + RPCRDMA_MEMWINDOWS, + RPCRDMA_MEMWINDOWS_ASYNC, + RPCRDMA_MTHCAFMR, + RPCRDMA_FRMR, + RPCRDMA_ALLPHYSICAL, + RPCRDMA_LAST +}; + +#endif /* _LINUX_SUNRPC_XPRTRDMA_H */ diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h new file mode 100644 index 000000000..7591788e9 --- /dev/null +++ b/include/linux/sunrpc/xprtsock.h @@ -0,0 +1,81 @@ +/* + * linux/include/linux/sunrpc/xprtsock.h + * + * Declarations for the RPC transport socket provider. + */ + +#ifndef _LINUX_SUNRPC_XPRTSOCK_H +#define _LINUX_SUNRPC_XPRTSOCK_H + +#ifdef __KERNEL__ + +int init_socket_xprt(void); +void cleanup_socket_xprt(void); + +#define RPC_MIN_RESVPORT (1U) +#define RPC_MAX_RESVPORT (65535U) +#define RPC_DEF_MIN_RESVPORT (665U) +#define RPC_DEF_MAX_RESVPORT (1023U) + +struct sock_xprt { + struct rpc_xprt xprt; + + /* + * Network layer + */ + struct socket * sock; + struct sock * inet; + + /* + * State of TCP reply receive + */ + __be32 tcp_fraghdr, + tcp_xid, + tcp_calldir; + + u32 tcp_offset, + tcp_reclen; + + unsigned long tcp_copied, + tcp_flags; + + /* + * Connection of transports + */ + struct delayed_work connect_worker; + struct sockaddr_storage srcaddr; + unsigned short srcport; + + /* + * UDP socket buffer size parameters + */ + size_t rcvsize, + sndsize; + + /* + * Saved socket callback addresses + */ + void (*old_data_ready)(struct sock *); + void (*old_state_change)(struct sock *); + void (*old_write_space)(struct sock *); + void (*old_error_report)(struct sock *); +}; + +/* + * TCP receive state flags + */ +#define TCP_RCV_LAST_FRAG (1UL << 0) +#define TCP_RCV_COPY_FRAGHDR (1UL << 1) +#define TCP_RCV_COPY_XID (1UL << 2) +#define TCP_RCV_COPY_DATA (1UL << 3) +#define TCP_RCV_READ_CALLDIR (1UL << 4) +#define TCP_RCV_COPY_CALLDIR (1UL << 5) + +/* + * TCP RPC flags + */ +#define TCP_RPC_REPLY (1UL << 6) + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_SUNRPC_XPRTSOCK_H */ |