diff options
Diffstat (limited to 'arch/s390/hypfs')
-rw-r--r-- | arch/s390/hypfs/Makefile | 8 | ||||
-rw-r--r-- | arch/s390/hypfs/hypfs.h | 76 | ||||
-rw-r--r-- | arch/s390/hypfs/hypfs_dbfs.c | 104 | ||||
-rw-r--r-- | arch/s390/hypfs/hypfs_diag.c | 769 | ||||
-rw-r--r-- | arch/s390/hypfs/hypfs_diag0c.c | 135 | ||||
-rw-r--r-- | arch/s390/hypfs/hypfs_sprp.c | 141 | ||||
-rw-r--r-- | arch/s390/hypfs/hypfs_vm.c | 287 | ||||
-rw-r--r-- | arch/s390/hypfs/inode.c | 522 |
8 files changed, 2042 insertions, 0 deletions
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile new file mode 100644 index 000000000..2ee25ba25 --- /dev/null +++ b/arch/s390/hypfs/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the linux hypfs filesystem routines. +# + +obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o + +s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o +s390_hypfs-objs += hypfs_diag0c.o diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h new file mode 100644 index 000000000..eecde500e --- /dev/null +++ b/arch/s390/hypfs/hypfs.h @@ -0,0 +1,76 @@ +/* + * Hypervisor filesystem for Linux on s390. + * + * Copyright IBM Corp. 2006 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#ifndef _HYPFS_H_ +#define _HYPFS_H_ + +#include <linux/fs.h> +#include <linux/types.h> +#include <linux/debugfs.h> +#include <linux/workqueue.h> +#include <linux/kref.h> +#include <asm/hypfs.h> + +#define REG_FILE_MODE 0440 +#define UPDATE_FILE_MODE 0220 +#define DIR_MODE 0550 + +extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name); + +extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name, + __u64 value); + +extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name, + char *string); + +/* LPAR Hypervisor */ +extern int hypfs_diag_init(void); +extern void hypfs_diag_exit(void); +extern int hypfs_diag_create_files(struct dentry *root); + +/* VM Hypervisor */ +extern int hypfs_vm_init(void); +extern void hypfs_vm_exit(void); +extern int hypfs_vm_create_files(struct dentry *root); + +/* VM diagnose 0c */ +int hypfs_diag0c_init(void); +void hypfs_diag0c_exit(void); + +/* Set Partition-Resource Parameter */ +int hypfs_sprp_init(void); +void hypfs_sprp_exit(void); + +/* debugfs interface */ +struct hypfs_dbfs_file; + +struct hypfs_dbfs_data { + void *buf; + void *buf_free_ptr; + size_t size; + struct hypfs_dbfs_file *dbfs_file; +}; + +struct hypfs_dbfs_file { + const char *name; + int (*data_create)(void **data, void **data_free_ptr, + size_t *size); + void (*data_free)(const void *buf_free_ptr); + long (*unlocked_ioctl) (struct file *, unsigned int, + unsigned long); + + /* Private data for hypfs_dbfs.c */ + struct mutex lock; + struct dentry *dentry; +}; + +extern int hypfs_dbfs_init(void); +extern void hypfs_dbfs_exit(void); +extern int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df); +extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df); + +#endif /* _HYPFS_H_ */ diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c new file mode 100644 index 000000000..752f6df3e --- /dev/null +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -0,0 +1,104 @@ +/* + * Hypervisor filesystem for Linux on s390 - debugfs interface + * + * Copyright IBM Corp. 2010 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/slab.h> +#include "hypfs.h" + +static struct dentry *dbfs_dir; + +static struct hypfs_dbfs_data *hypfs_dbfs_data_alloc(struct hypfs_dbfs_file *f) +{ + struct hypfs_dbfs_data *data; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + data->dbfs_file = f; + return data; +} + +static void hypfs_dbfs_data_free(struct hypfs_dbfs_data *data) +{ + data->dbfs_file->data_free(data->buf_free_ptr); + kfree(data); +} + +static ssize_t dbfs_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct hypfs_dbfs_data *data; + struct hypfs_dbfs_file *df; + ssize_t rc; + + if (*ppos != 0) + return 0; + + df = file_inode(file)->i_private; + mutex_lock(&df->lock); + data = hypfs_dbfs_data_alloc(df); + if (!data) { + mutex_unlock(&df->lock); + return -ENOMEM; + } + rc = df->data_create(&data->buf, &data->buf_free_ptr, &data->size); + if (rc) { + mutex_unlock(&df->lock); + kfree(data); + return rc; + } + mutex_unlock(&df->lock); + + rc = simple_read_from_buffer(buf, size, ppos, data->buf, data->size); + hypfs_dbfs_data_free(data); + return rc; +} + +static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct hypfs_dbfs_file *df = file_inode(file)->i_private; + long rc; + + mutex_lock(&df->lock); + if (df->unlocked_ioctl) + rc = df->unlocked_ioctl(file, cmd, arg); + else + rc = -ENOTTY; + mutex_unlock(&df->lock); + return rc; +} + +static const struct file_operations dbfs_ops = { + .read = dbfs_read, + .llseek = no_llseek, + .unlocked_ioctl = dbfs_ioctl, +}; + +int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) +{ + df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, + &dbfs_ops); + if (IS_ERR(df->dentry)) + return PTR_ERR(df->dentry); + mutex_init(&df->lock); + return 0; +} + +void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) +{ + debugfs_remove(df->dentry); +} + +int hypfs_dbfs_init(void) +{ + dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); + return PTR_ERR_OR_ZERO(dbfs_dir); +} + +void hypfs_dbfs_exit(void) +{ + debugfs_remove(dbfs_dir); +} diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c new file mode 100644 index 000000000..5eeffeefa --- /dev/null +++ b/arch/s390/hypfs/hypfs_diag.c @@ -0,0 +1,769 @@ +/* + * Hypervisor filesystem for Linux on s390. Diag 204 and 224 + * implementation. + * + * Copyright IBM Corp. 2006, 2008 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#define KMSG_COMPONENT "hypfs" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <asm/ebcdic.h> +#include "hypfs.h" + +#define LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */ +#define CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */ +#define TMP_SIZE 64 /* size of temporary buffers */ + +#define DBFS_D204_HDR_VERSION 0 + +/* diag 204 subcodes */ +enum diag204_sc { + SUBC_STIB4 = 4, + SUBC_RSI = 5, + SUBC_STIB6 = 6, + SUBC_STIB7 = 7 +}; + +/* The two available diag 204 data formats */ +enum diag204_format { + INFO_SIMPLE = 0, + INFO_EXT = 0x00010000 +}; + +/* bit is set in flags, when physical cpu info is included in diag 204 data */ +#define LPAR_PHYS_FLG 0x80 + +static char *diag224_cpu_names; /* diag 224 name table */ +static enum diag204_sc diag204_store_sc; /* used subcode for store */ +static enum diag204_format diag204_info_type; /* used diag 204 data format */ + +static void *diag204_buf; /* 4K aligned buffer for diag204 data */ +static void *diag204_buf_vmalloc; /* vmalloc pointer for diag204 data */ +static int diag204_buf_pages; /* number of pages for diag204 data */ + +static struct dentry *dbfs_d204_file; + +/* + * DIAG 204 data structures and member access functions. + * + * Since we have two different diag 204 data formats for old and new s390 + * machines, we do not access the structs directly, but use getter functions for + * each struct member instead. This should make the code more readable. + */ + +/* Time information block */ + +struct info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod; +} __attribute__ ((packed)); + +struct x_info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod1; + __u64 curtod2; + char reserved[40]; +} __attribute__ ((packed)); + +static inline int info_blk_hdr__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct info_blk_hdr); + else /* INFO_EXT */ + return sizeof(struct x_info_blk_hdr); +} + +static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct info_blk_hdr *)hdr)->npar; + else /* INFO_EXT */ + return ((struct x_info_blk_hdr *)hdr)->npar; +} + +static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct info_blk_hdr *)hdr)->flags; + else /* INFO_EXT */ + return ((struct x_info_blk_hdr *)hdr)->flags; +} + +static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct info_blk_hdr *)hdr)->phys_cpus; + else /* INFO_EXT */ + return ((struct x_info_blk_hdr *)hdr)->phys_cpus; +} + +/* Partition header */ + +struct part_hdr { + __u8 pn; + __u8 cpus; + char reserved[6]; + char part_name[LPAR_NAME_LEN]; +} __attribute__ ((packed)); + +struct x_part_hdr { + __u8 pn; + __u8 cpus; + __u8 rcpus; + __u8 pflag; + __u32 mlu; + char part_name[LPAR_NAME_LEN]; + char lpc_name[8]; + char os_name[8]; + __u64 online_cs; + __u64 online_es; + __u8 upid; + char reserved1[3]; + __u32 group_mlu; + char group_name[8]; + char reserved2[32]; +} __attribute__ ((packed)); + +static inline int part_hdr__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct part_hdr); + else /* INFO_EXT */ + return sizeof(struct x_part_hdr); +} + +static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct part_hdr *)hdr)->cpus; + else /* INFO_EXT */ + return ((struct x_part_hdr *)hdr)->rcpus; +} + +static inline void part_hdr__part_name(enum diag204_format type, void *hdr, + char *name) +{ + if (type == INFO_SIMPLE) + memcpy(name, ((struct part_hdr *)hdr)->part_name, + LPAR_NAME_LEN); + else /* INFO_EXT */ + memcpy(name, ((struct x_part_hdr *)hdr)->part_name, + LPAR_NAME_LEN); + EBCASC(name, LPAR_NAME_LEN); + name[LPAR_NAME_LEN] = 0; + strim(name); +} + +struct cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; +} __attribute__ ((packed)); + +struct x_cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; + __u16 min_weight; + __u16 cur_weight; + __u16 max_weight; + char reseved2[2]; + __u64 online_time; + __u64 wait_time; + __u32 pma_weight; + __u32 polar_weight; + char reserved3[40]; +} __attribute__ ((packed)); + +/* CPU info block */ + +static inline int cpu_info__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct cpu_info); + else /* INFO_EXT */ + return sizeof(struct x_cpu_info); +} + +static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->ctidx; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->ctidx; +} + +static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->cpu_addr; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->cpu_addr; +} + +static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->acc_time; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->acc_time; +} + +static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->lp_time; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->lp_time; +} + +static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return 0; /* online_time not available in simple info */ + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->online_time; +} + +/* Physical header */ + +struct phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; +} __attribute__ ((packed)); + +struct x_phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; + char reserved3[80]; +} __attribute__ ((packed)); + +static inline int phys_hdr__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct phys_hdr); + else /* INFO_EXT */ + return sizeof(struct x_phys_hdr); +} + +static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_hdr *)hdr)->cpus; + else /* INFO_EXT */ + return ((struct x_phys_hdr *)hdr)->cpus; +} + +/* Physical CPU info block */ + +struct phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[3]; + __u64 mgm_time; + char reserved3[8]; +} __attribute__ ((packed)); + +struct x_phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[3]; + __u64 mgm_time; + char reserved3[80]; +} __attribute__ ((packed)); + +static inline int phys_cpu__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct phys_cpu); + else /* INFO_EXT */ + return sizeof(struct x_phys_cpu); +} + +static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_cpu *)hdr)->cpu_addr; + else /* INFO_EXT */ + return ((struct x_phys_cpu *)hdr)->cpu_addr; +} + +static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_cpu *)hdr)->mgm_time; + else /* INFO_EXT */ + return ((struct x_phys_cpu *)hdr)->mgm_time; +} + +static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_cpu *)hdr)->ctidx; + else /* INFO_EXT */ + return ((struct x_phys_cpu *)hdr)->ctidx; +} + +/* Diagnose 204 functions */ + +static int diag204(unsigned long subcode, unsigned long size, void *addr) +{ + register unsigned long _subcode asm("0") = subcode; + register unsigned long _size asm("1") = size; + + asm volatile( + " diag %2,%0,0x204\n" + "0:\n" + EX_TABLE(0b,0b) + : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); + if (_subcode) + return -1; + return _size; +} + +/* + * For the old diag subcode 4 with simple data format we have to use real + * memory. If we use subcode 6 or 7 with extended data format, we can (and + * should) use vmalloc, since we need a lot of memory in that case. Currently + * up to 93 pages! + */ + +static void diag204_free_buffer(void) +{ + if (!diag204_buf) + return; + if (diag204_buf_vmalloc) { + vfree(diag204_buf_vmalloc); + diag204_buf_vmalloc = NULL; + } else { + free_pages((unsigned long) diag204_buf, 0); + } + diag204_buf = NULL; +} + +static void *page_align_ptr(void *ptr) +{ + return (void *) PAGE_ALIGN((unsigned long) ptr); +} + +static void *diag204_alloc_vbuf(int pages) +{ + /* The buffer has to be page aligned! */ + diag204_buf_vmalloc = vmalloc(PAGE_SIZE * (pages + 1)); + if (!diag204_buf_vmalloc) + return ERR_PTR(-ENOMEM); + diag204_buf = page_align_ptr(diag204_buf_vmalloc); + diag204_buf_pages = pages; + return diag204_buf; +} + +static void *diag204_alloc_rbuf(void) +{ + diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0); + if (!diag204_buf) + return ERR_PTR(-ENOMEM); + diag204_buf_pages = 1; + return diag204_buf; +} + +static void *diag204_get_buffer(enum diag204_format fmt, int *pages) +{ + if (diag204_buf) { + *pages = diag204_buf_pages; + return diag204_buf; + } + if (fmt == INFO_SIMPLE) { + *pages = 1; + return diag204_alloc_rbuf(); + } else {/* INFO_EXT */ + *pages = diag204((unsigned long)SUBC_RSI | + (unsigned long)INFO_EXT, 0, NULL); + if (*pages <= 0) + return ERR_PTR(-ENOSYS); + else + return diag204_alloc_vbuf(*pages); + } +} + +/* + * diag204_probe() has to find out, which type of diagnose 204 implementation + * we have on our machine. Currently there are three possible scanarios: + * - subcode 4 + simple data format (only one page) + * - subcode 4-6 + extended data format + * - subcode 4-7 + extended data format + * + * Subcode 5 is used to retrieve the size of the data, provided by subcodes + * 6 and 7. Subcode 7 basically has the same function as subcode 6. In addition + * to subcode 6 it provides also information about secondary cpus. + * In order to get as much information as possible, we first try + * subcode 7, then 6 and if both fail, we use subcode 4. + */ + +static int diag204_probe(void) +{ + void *buf; + int pages, rc; + + buf = diag204_get_buffer(INFO_EXT, &pages); + if (!IS_ERR(buf)) { + if (diag204((unsigned long)SUBC_STIB7 | + (unsigned long)INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = SUBC_STIB7; + diag204_info_type = INFO_EXT; + goto out; + } + if (diag204((unsigned long)SUBC_STIB6 | + (unsigned long)INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = SUBC_STIB6; + diag204_info_type = INFO_EXT; + goto out; + } + diag204_free_buffer(); + } + + /* subcodes 6 and 7 failed, now try subcode 4 */ + + buf = diag204_get_buffer(INFO_SIMPLE, &pages); + if (IS_ERR(buf)) { + rc = PTR_ERR(buf); + goto fail_alloc; + } + if (diag204((unsigned long)SUBC_STIB4 | + (unsigned long)INFO_SIMPLE, pages, buf) >= 0) { + diag204_store_sc = SUBC_STIB4; + diag204_info_type = INFO_SIMPLE; + goto out; + } else { + rc = -ENOSYS; + goto fail_store; + } +out: + rc = 0; +fail_store: + diag204_free_buffer(); +fail_alloc: + return rc; +} + +static int diag204_do_store(void *buf, int pages) +{ + int rc; + + rc = diag204((unsigned long) diag204_store_sc | + (unsigned long) diag204_info_type, pages, buf); + return rc < 0 ? -ENOSYS : 0; +} + +static void *diag204_store(void) +{ + void *buf; + int pages, rc; + + buf = diag204_get_buffer(diag204_info_type, &pages); + if (IS_ERR(buf)) + goto out; + rc = diag204_do_store(buf, pages); + if (rc) + return ERR_PTR(rc); +out: + return buf; +} + +/* Diagnose 224 functions */ + +static int diag224(void *ptr) +{ + int rc = -EOPNOTSUPP; + + asm volatile( + " diag %1,%2,0x224\n" + "0: lhi %0,0x0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) :"d" (0), "d" (ptr) : "memory"); + return rc; +} + +static int diag224_get_name_table(void) +{ + /* memory must be below 2GB */ + diag224_cpu_names = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA); + if (!diag224_cpu_names) + return -ENOMEM; + if (diag224(diag224_cpu_names)) { + kfree(diag224_cpu_names); + return -EOPNOTSUPP; + } + EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16); + return 0; +} + +static void diag224_delete_name_table(void) +{ + kfree(diag224_cpu_names); +} + +static int diag224_idx2name(int index, char *name) +{ + memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN), + CPU_NAME_LEN); + name[CPU_NAME_LEN] = 0; + strim(name); + return 0; +} + +struct dbfs_d204_hdr { + u64 len; /* Length of d204 buffer without header */ + u16 version; /* Version of header */ + u8 sc; /* Used subcode */ + char reserved[53]; +} __attribute__ ((packed)); + +struct dbfs_d204 { + struct dbfs_d204_hdr hdr; /* 64 byte header */ + char buf[]; /* d204 buffer */ +} __attribute__ ((packed)); + +static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size) +{ + struct dbfs_d204 *d204; + int rc, buf_size; + void *base; + + buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr); + base = vzalloc(buf_size); + if (!base) + return -ENOMEM; + d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr); + rc = diag204_do_store(d204->buf, diag204_buf_pages); + if (rc) { + vfree(base); + return rc; + } + d204->hdr.version = DBFS_D204_HDR_VERSION; + d204->hdr.len = PAGE_SIZE * diag204_buf_pages; + d204->hdr.sc = diag204_store_sc; + *data = d204; + *data_free_ptr = base; + *size = d204->hdr.len + sizeof(struct dbfs_d204_hdr); + return 0; +} + +static struct hypfs_dbfs_file dbfs_file_d204 = { + .name = "diag_204", + .data_create = dbfs_d204_create, + .data_free = vfree, +}; + +__init int hypfs_diag_init(void) +{ + int rc; + + if (diag204_probe()) { + pr_err("The hardware system does not support hypfs\n"); + return -ENODATA; + } + if (diag204_info_type == INFO_EXT) { + rc = hypfs_dbfs_create_file(&dbfs_file_d204); + if (rc) + return rc; + } + if (MACHINE_IS_LPAR) { + rc = diag224_get_name_table(); + if (rc) { + pr_err("The hardware system does not provide all " + "functions required by hypfs\n"); + debugfs_remove(dbfs_d204_file); + return rc; + } + } + return 0; +} + +void hypfs_diag_exit(void) +{ + debugfs_remove(dbfs_d204_file); + diag224_delete_name_table(); + diag204_free_buffer(); + hypfs_dbfs_remove_file(&dbfs_file_d204); +} + +/* + * Functions to create the directory structure + * ******************************************* + */ + +static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) +{ + struct dentry *cpu_dir; + char buffer[TMP_SIZE]; + void *rc; + + snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type, + cpu_info)); + cpu_dir = hypfs_mkdir(cpus_dir, buffer); + rc = hypfs_create_u64(cpu_dir, "mgmtime", + cpu_info__acc_time(diag204_info_type, cpu_info) - + cpu_info__lp_time(diag204_info_type, cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + rc = hypfs_create_u64(cpu_dir, "cputime", + cpu_info__lp_time(diag204_info_type, cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + if (diag204_info_type == INFO_EXT) { + rc = hypfs_create_u64(cpu_dir, "onlinetime", + cpu_info__online_time(diag204_info_type, + cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + } + diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_RET(rc); +} + +static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) +{ + struct dentry *cpus_dir; + struct dentry *lpar_dir; + char lpar_name[LPAR_NAME_LEN + 1]; + void *cpu_info; + int i; + + part_hdr__part_name(diag204_info_type, part_hdr, lpar_name); + lpar_name[LPAR_NAME_LEN] = 0; + lpar_dir = hypfs_mkdir(systems_dir, lpar_name); + if (IS_ERR(lpar_dir)) + return lpar_dir; + cpus_dir = hypfs_mkdir(lpar_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return cpus_dir; + cpu_info = part_hdr + part_hdr__size(diag204_info_type); + for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) { + int rc; + rc = hypfs_create_cpu_files(cpus_dir, cpu_info); + if (rc) + return ERR_PTR(rc); + cpu_info += cpu_info__size(diag204_info_type); + } + return cpu_info; +} + +static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) +{ + struct dentry *cpu_dir; + char buffer[TMP_SIZE]; + void *rc; + + snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type, + cpu_info)); + cpu_dir = hypfs_mkdir(cpus_dir, buffer); + if (IS_ERR(cpu_dir)) + return PTR_ERR(cpu_dir); + rc = hypfs_create_u64(cpu_dir, "mgmtime", + phys_cpu__mgm_time(diag204_info_type, cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_RET(rc); +} + +static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) +{ + int i; + void *cpu_info; + struct dentry *cpus_dir; + + cpus_dir = hypfs_mkdir(parent_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return cpus_dir; + cpu_info = phys_hdr + phys_hdr__size(diag204_info_type); + for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) { + int rc; + rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info); + if (rc) + return ERR_PTR(rc); + cpu_info += phys_cpu__size(diag204_info_type); + } + return cpu_info; +} + +int hypfs_diag_create_files(struct dentry *root) +{ + struct dentry *systems_dir, *hyp_dir; + void *time_hdr, *part_hdr; + int i, rc; + void *buffer, *ptr; + + buffer = diag204_store(); + if (IS_ERR(buffer)) + return PTR_ERR(buffer); + + systems_dir = hypfs_mkdir(root, "systems"); + if (IS_ERR(systems_dir)) { + rc = PTR_ERR(systems_dir); + goto err_out; + } + time_hdr = (struct x_info_blk_hdr *)buffer; + part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type); + for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) { + part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr); + if (IS_ERR(part_hdr)) { + rc = PTR_ERR(part_hdr); + goto err_out; + } + } + if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) { + ptr = hypfs_create_phys_files(root, part_hdr); + if (IS_ERR(ptr)) { + rc = PTR_ERR(ptr); + goto err_out; + } + } + hyp_dir = hypfs_mkdir(root, "hyp"); + if (IS_ERR(hyp_dir)) { + rc = PTR_ERR(hyp_dir); + goto err_out; + } + ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); + if (IS_ERR(ptr)) { + rc = PTR_ERR(ptr); + goto err_out; + } + rc = 0; + +err_out: + return rc; +} diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c new file mode 100644 index 000000000..24c747a0f --- /dev/null +++ b/arch/s390/hypfs/hypfs_diag0c.c @@ -0,0 +1,135 @@ +/* + * Hypervisor filesystem for Linux on s390 + * + * Diag 0C implementation + * + * Copyright IBM Corp. 2014 + */ + +#include <linux/slab.h> +#include <linux/cpu.h> +#include <asm/hypfs.h> +#include "hypfs.h" + +#define DBFS_D0C_HDR_VERSION 0 + +/* + * Execute diagnose 0c in 31 bit mode + */ +static void diag0c(struct hypfs_diag0c_entry *entry) +{ + asm volatile ( + " sam31\n" + " diag %0,%0,0x0c\n" + " sam64\n" + : /* no output register */ + : "a" (entry) + : "memory"); +} + +/* + * Get hypfs_diag0c_entry from CPU vector and store diag0c data + */ +static void diag0c_fn(void *data) +{ + diag0c(((void **) data)[smp_processor_id()]); +} + +/* + * Allocate buffer and store diag 0c data + */ +static void *diag0c_store(unsigned int *count) +{ + struct hypfs_diag0c_data *diag0c_data; + unsigned int cpu_count, cpu, i; + void **cpu_vec; + + get_online_cpus(); + cpu_count = num_online_cpus(); + cpu_vec = kmalloc(sizeof(*cpu_vec) * num_possible_cpus(), GFP_KERNEL); + if (!cpu_vec) + goto fail_put_online_cpus; + /* Note: Diag 0c needs 8 byte alignment and real storage */ + diag0c_data = kzalloc(sizeof(struct hypfs_diag0c_hdr) + + cpu_count * sizeof(struct hypfs_diag0c_entry), + GFP_KERNEL | GFP_DMA); + if (!diag0c_data) + goto fail_kfree_cpu_vec; + i = 0; + /* Fill CPU vector for each online CPU */ + for_each_online_cpu(cpu) { + diag0c_data->entry[i].cpu = cpu; + cpu_vec[cpu] = &diag0c_data->entry[i++]; + } + /* Collect data all CPUs */ + on_each_cpu(diag0c_fn, cpu_vec, 1); + *count = cpu_count; + kfree(cpu_vec); + put_online_cpus(); + return diag0c_data; + +fail_kfree_cpu_vec: + kfree(cpu_vec); +fail_put_online_cpus: + put_online_cpus(); + return ERR_PTR(-ENOMEM); +} + +/* + * Hypfs DBFS callback: Free diag 0c data + */ +static void dbfs_diag0c_free(const void *data) +{ + kfree(data); +} + +/* + * Hypfs DBFS callback: Create diag 0c data + */ +static int dbfs_diag0c_create(void **data, void **data_free_ptr, size_t *size) +{ + struct hypfs_diag0c_data *diag0c_data; + unsigned int count; + + diag0c_data = diag0c_store(&count); + if (IS_ERR(diag0c_data)) + return PTR_ERR(diag0c_data); + memset(&diag0c_data->hdr, 0, sizeof(diag0c_data->hdr)); + get_tod_clock_ext(diag0c_data->hdr.tod_ext); + diag0c_data->hdr.len = count * sizeof(struct hypfs_diag0c_entry); + diag0c_data->hdr.version = DBFS_D0C_HDR_VERSION; + diag0c_data->hdr.count = count; + *data = diag0c_data; + *data_free_ptr = diag0c_data; + *size = diag0c_data->hdr.len + sizeof(struct hypfs_diag0c_hdr); + return 0; +} + +/* + * Hypfs DBFS file structure + */ +static struct hypfs_dbfs_file dbfs_file_0c = { + .name = "diag_0c", + .data_create = dbfs_diag0c_create, + .data_free = dbfs_diag0c_free, +}; + +/* + * Initialize diag 0c interface for z/VM + */ +int __init hypfs_diag0c_init(void) +{ + if (!MACHINE_IS_VM) + return 0; + return hypfs_dbfs_create_file(&dbfs_file_0c); +} + +/* + * Shutdown diag 0c interface for z/VM + */ +void hypfs_diag0c_exit(void) +{ + if (!MACHINE_IS_VM) + return; + hypfs_dbfs_remove_file(&dbfs_file_0c); +} diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c new file mode 100644 index 000000000..f043c3c7e --- /dev/null +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -0,0 +1,141 @@ +/* + * Hypervisor filesystem for Linux on s390. + * Set Partition-Resource Parameter interface. + * + * Copyright IBM Corp. 2013 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/compat.h> +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <asm/compat.h> +#include <asm/sclp.h> +#include "hypfs.h" + +#define DIAG304_SET_WEIGHTS 0 +#define DIAG304_QUERY_PRP 1 +#define DIAG304_SET_CAPPING 2 + +#define DIAG304_CMD_MAX 2 + +static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd) +{ + register unsigned long _data asm("2") = (unsigned long) data; + register unsigned long _rc asm("3"); + register unsigned long _cmd asm("4") = cmd; + + asm volatile("diag %1,%2,0x304\n" + : "=d" (_rc) : "d" (_data), "d" (_cmd) : "memory"); + + return _rc; +} + +static void hypfs_sprp_free(const void *data) +{ + free_page((unsigned long) data); +} + +static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size) +{ + unsigned long rc; + void *data; + + data = (void *) get_zeroed_page(GFP_KERNEL); + if (!data) + return -ENOMEM; + rc = hypfs_sprp_diag304(data, DIAG304_QUERY_PRP); + if (rc != 1) { + *data_ptr = *free_ptr = NULL; + *size = 0; + free_page((unsigned long) data); + return -EIO; + } + *data_ptr = *free_ptr = data; + *size = PAGE_SIZE; + return 0; +} + +static int __hypfs_sprp_ioctl(void __user *user_area) +{ + struct hypfs_diag304 diag304; + unsigned long cmd; + void __user *udata; + void *data; + int rc; + + if (copy_from_user(&diag304, user_area, sizeof(diag304))) + return -EFAULT; + if ((diag304.args[0] >> 8) != 0 || diag304.args[1] > DIAG304_CMD_MAX) + return -EINVAL; + + data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!data) + return -ENOMEM; + + udata = (void __user *)(unsigned long) diag304.data; + if (diag304.args[1] == DIAG304_SET_WEIGHTS || + diag304.args[1] == DIAG304_SET_CAPPING) + if (copy_from_user(data, udata, PAGE_SIZE)) { + rc = -EFAULT; + goto out; + } + + cmd = *(unsigned long *) &diag304.args[0]; + diag304.rc = hypfs_sprp_diag304(data, cmd); + + if (diag304.args[1] == DIAG304_QUERY_PRP) + if (copy_to_user(udata, data, PAGE_SIZE)) { + rc = -EFAULT; + goto out; + } + + rc = copy_to_user(user_area, &diag304, sizeof(diag304)) ? -EFAULT : 0; +out: + free_page((unsigned long) data); + return rc; +} + +static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + void __user *argp; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (is_compat_task()) + argp = compat_ptr(arg); + else + argp = (void __user *) arg; + switch (cmd) { + case HYPFS_DIAG304: + return __hypfs_sprp_ioctl(argp); + default: /* unknown ioctl number */ + return -ENOTTY; + } + return 0; +} + +static struct hypfs_dbfs_file hypfs_sprp_file = { + .name = "diag_304", + .data_create = hypfs_sprp_create, + .data_free = hypfs_sprp_free, + .unlocked_ioctl = hypfs_sprp_ioctl, +}; + +int hypfs_sprp_init(void) +{ + if (!sclp_has_sprp()) + return 0; + return hypfs_dbfs_create_file(&hypfs_sprp_file); +} + +void hypfs_sprp_exit(void) +{ + if (!sclp_has_sprp()) + return; + hypfs_dbfs_remove_file(&hypfs_sprp_file); +} diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c new file mode 100644 index 000000000..afbe07907 --- /dev/null +++ b/arch/s390/hypfs/hypfs_vm.c @@ -0,0 +1,287 @@ +/* + * Hypervisor filesystem for Linux on s390. z/VM implementation. + * + * Copyright IBM Corp. 2006 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/vmalloc.h> +#include <asm/ebcdic.h> +#include <asm/timex.h> +#include "hypfs.h" + +#define NAME_LEN 8 +#define DBFS_D2FC_HDR_VERSION 0 + +static char local_guest[] = " "; +static char all_guests[] = "* "; +static char *guest_query; + +struct diag2fc_data { + __u32 version; + __u32 flags; + __u64 used_cpu; + __u64 el_time; + __u64 mem_min_kb; + __u64 mem_max_kb; + __u64 mem_share_kb; + __u64 mem_used_kb; + __u32 pcpus; + __u32 lcpus; + __u32 vcpus; + __u32 ocpus; + __u32 cpu_max; + __u32 cpu_shares; + __u32 cpu_use_samp; + __u32 cpu_delay_samp; + __u32 page_wait_samp; + __u32 idle_samp; + __u32 other_samp; + __u32 total_samp; + char guest_name[NAME_LEN]; +}; + +struct diag2fc_parm_list { + char userid[NAME_LEN]; + char aci_grp[NAME_LEN]; + __u64 addr; + __u32 size; + __u32 fmt; +}; + +static int diag2fc(int size, char* query, void *addr) +{ + unsigned long residual_cnt; + unsigned long rc; + struct diag2fc_parm_list parm_list; + + memcpy(parm_list.userid, query, NAME_LEN); + ASCEBC(parm_list.userid, NAME_LEN); + parm_list.addr = (unsigned long) addr ; + parm_list.size = size; + parm_list.fmt = 0x02; + memset(parm_list.aci_grp, 0x40, NAME_LEN); + rc = -1; + + asm volatile( + " diag %0,%1,0x2fc\n" + "0:\n" + EX_TABLE(0b,0b) + : "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory"); + + if ((rc != 0 ) && (rc != -2)) + return rc; + else + return -residual_cnt; +} + +/* + * Allocate buffer for "query" and store diag 2fc at "offset" + */ +static void *diag2fc_store(char *query, unsigned int *count, int offset) +{ + void *data; + int size; + + do { + size = diag2fc(0, query, NULL); + if (size < 0) + return ERR_PTR(-EACCES); + data = vmalloc(size + offset); + if (!data) + return ERR_PTR(-ENOMEM); + if (diag2fc(size, query, data + offset) == 0) + break; + vfree(data); + } while (1); + *count = (size / sizeof(struct diag2fc_data)); + + return data; +} + +static void diag2fc_free(const void *data) +{ + vfree(data); +} + +#define ATTRIBUTE(dir, name, member) \ +do { \ + void *rc; \ + rc = hypfs_create_u64(dir, name, member); \ + if (IS_ERR(rc)) \ + return PTR_ERR(rc); \ +} while(0) + +static int hpyfs_vm_create_guest(struct dentry *systems_dir, + struct diag2fc_data *data) +{ + char guest_name[NAME_LEN + 1] = {}; + struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir; + int dedicated_flag, capped_value; + + capped_value = (data->flags & 0x00000006) >> 1; + dedicated_flag = (data->flags & 0x00000008) >> 3; + + /* guest dir */ + memcpy(guest_name, data->guest_name, NAME_LEN); + EBCASC(guest_name, NAME_LEN); + strim(guest_name); + guest_dir = hypfs_mkdir(systems_dir, guest_name); + if (IS_ERR(guest_dir)) + return PTR_ERR(guest_dir); + ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time); + + /* logical cpu information */ + cpus_dir = hypfs_mkdir(guest_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return PTR_ERR(cpus_dir); + ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu); + ATTRIBUTE(cpus_dir, "capped", capped_value); + ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag); + ATTRIBUTE(cpus_dir, "count", data->vcpus); + /* + * Note: The "weight_min" attribute got the wrong name. + * The value represents the number of non-stopped (operating) + * CPUS. + */ + ATTRIBUTE(cpus_dir, "weight_min", data->ocpus); + ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max); + ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares); + + /* memory information */ + mem_dir = hypfs_mkdir(guest_dir, "mem"); + if (IS_ERR(mem_dir)) + return PTR_ERR(mem_dir); + ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb); + ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb); + ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb); + ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb); + + /* samples */ + samples_dir = hypfs_mkdir(guest_dir, "samples"); + if (IS_ERR(samples_dir)) + return PTR_ERR(samples_dir); + ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp); + ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp); + ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp); + ATTRIBUTE(samples_dir, "idle", data->idle_samp); + ATTRIBUTE(samples_dir, "other", data->other_samp); + ATTRIBUTE(samples_dir, "total", data->total_samp); + return 0; +} + +int hypfs_vm_create_files(struct dentry *root) +{ + struct dentry *dir, *file; + struct diag2fc_data *data; + unsigned int count = 0; + int rc, i; + + data = diag2fc_store(guest_query, &count, 0); + if (IS_ERR(data)) + return PTR_ERR(data); + + /* Hpervisor Info */ + dir = hypfs_mkdir(root, "hyp"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + file = hypfs_create_str(dir, "type", "z/VM Hypervisor"); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + goto failed; + } + + /* physical cpus */ + dir = hypfs_mkdir(root, "cpus"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + file = hypfs_create_u64(dir, "count", data->lcpus); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + goto failed; + } + + /* guests */ + dir = hypfs_mkdir(root, "systems"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + + for (i = 0; i < count; i++) { + rc = hpyfs_vm_create_guest(dir, &(data[i])); + if (rc) + goto failed; + } + diag2fc_free(data); + return 0; + +failed: + diag2fc_free(data); + return rc; +} + +struct dbfs_d2fc_hdr { + u64 len; /* Length of d2fc buffer without header */ + u16 version; /* Version of header */ + char tod_ext[STORE_CLOCK_EXT_SIZE]; /* TOD clock for d2fc */ + u64 count; /* Number of VM guests in d2fc buffer */ + char reserved[30]; +} __attribute__ ((packed)); + +struct dbfs_d2fc { + struct dbfs_d2fc_hdr hdr; /* 64 byte header */ + char buf[]; /* d2fc buffer */ +} __attribute__ ((packed)); + +static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size) +{ + struct dbfs_d2fc *d2fc; + unsigned int count; + + d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr)); + if (IS_ERR(d2fc)) + return PTR_ERR(d2fc); + get_tod_clock_ext(d2fc->hdr.tod_ext); + d2fc->hdr.len = count * sizeof(struct diag2fc_data); + d2fc->hdr.version = DBFS_D2FC_HDR_VERSION; + d2fc->hdr.count = count; + memset(&d2fc->hdr.reserved, 0, sizeof(d2fc->hdr.reserved)); + *data = d2fc; + *data_free_ptr = d2fc; + *size = d2fc->hdr.len + sizeof(struct dbfs_d2fc_hdr); + return 0; +} + +static struct hypfs_dbfs_file dbfs_file_2fc = { + .name = "diag_2fc", + .data_create = dbfs_diag2fc_create, + .data_free = diag2fc_free, +}; + +int hypfs_vm_init(void) +{ + if (!MACHINE_IS_VM) + return 0; + if (diag2fc(0, all_guests, NULL) > 0) + guest_query = all_guests; + else if (diag2fc(0, local_guest, NULL) > 0) + guest_query = local_guest; + else + return -EACCES; + return hypfs_dbfs_create_file(&dbfs_file_2fc); +} + +void hypfs_vm_exit(void) +{ + if (!MACHINE_IS_VM) + return; + hypfs_dbfs_remove_file(&dbfs_file_2fc); +} diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c new file mode 100644 index 000000000..2eeb0a0f5 --- /dev/null +++ b/arch/s390/hypfs/inode.c @@ -0,0 +1,522 @@ +/* + * Hypervisor filesystem for Linux on s390. + * + * Copyright IBM Corp. 2006, 2008 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#define KMSG_COMPONENT "hypfs" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/vfs.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/time.h> +#include <linux/parser.h> +#include <linux/sysfs.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/mount.h> +#include <linux/uio.h> +#include <asm/ebcdic.h> +#include "hypfs.h" + +#define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */ +#define TMP_SIZE 64 /* size of temporary buffers */ + +static struct dentry *hypfs_create_update_file(struct dentry *dir); + +struct hypfs_sb_info { + kuid_t uid; /* uid used for files and dirs */ + kgid_t gid; /* gid used for files and dirs */ + struct dentry *update_file; /* file to trigger update */ + time_t last_update; /* last update time in secs since 1970 */ + struct mutex lock; /* lock to protect update process */ +}; + +static const struct file_operations hypfs_file_ops; +static struct file_system_type hypfs_type; +static const struct super_operations hypfs_s_ops; + +/* start of list of all dentries, which have to be deleted on update */ +static struct dentry *hypfs_last_dentry; + +static void hypfs_update_update(struct super_block *sb) +{ + struct hypfs_sb_info *sb_info = sb->s_fs_info; + struct inode *inode = d_inode(sb_info->update_file); + + sb_info->last_update = get_seconds(); + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +} + +/* directory tree removal functions */ + +static void hypfs_add_dentry(struct dentry *dentry) +{ + dentry->d_fsdata = hypfs_last_dentry; + hypfs_last_dentry = dentry; +} + +static inline int hypfs_positive(struct dentry *dentry) +{ + return d_really_is_positive(dentry) && !d_unhashed(dentry); +} + +static void hypfs_remove(struct dentry *dentry) +{ + struct dentry *parent; + + parent = dentry->d_parent; + mutex_lock(&d_inode(parent)->i_mutex); + if (hypfs_positive(dentry)) { + if (d_is_dir(dentry)) + simple_rmdir(d_inode(parent), dentry); + else + simple_unlink(d_inode(parent), dentry); + } + d_delete(dentry); + dput(dentry); + mutex_unlock(&d_inode(parent)->i_mutex); +} + +static void hypfs_delete_tree(struct dentry *root) +{ + while (hypfs_last_dentry) { + struct dentry *next_dentry; + next_dentry = hypfs_last_dentry->d_fsdata; + hypfs_remove(hypfs_last_dentry); + hypfs_last_dentry = next_dentry; + } +} + +static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode) +{ + struct inode *ret = new_inode(sb); + + if (ret) { + struct hypfs_sb_info *hypfs_info = sb->s_fs_info; + ret->i_ino = get_next_ino(); + ret->i_mode = mode; + ret->i_uid = hypfs_info->uid; + ret->i_gid = hypfs_info->gid; + ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; + if (S_ISDIR(mode)) + set_nlink(ret, 2); + } + return ret; +} + +static void hypfs_evict_inode(struct inode *inode) +{ + clear_inode(inode); + kfree(inode->i_private); +} + +static int hypfs_open(struct inode *inode, struct file *filp) +{ + char *data = file_inode(filp)->i_private; + struct hypfs_sb_info *fs_info; + + if (filp->f_mode & FMODE_WRITE) { + if (!(inode->i_mode & S_IWUGO)) + return -EACCES; + } + if (filp->f_mode & FMODE_READ) { + if (!(inode->i_mode & S_IRUGO)) + return -EACCES; + } + + fs_info = inode->i_sb->s_fs_info; + if(data) { + mutex_lock(&fs_info->lock); + filp->private_data = kstrdup(data, GFP_KERNEL); + if (!filp->private_data) { + mutex_unlock(&fs_info->lock); + return -ENOMEM; + } + mutex_unlock(&fs_info->lock); + } + return nonseekable_open(inode, filp); +} + +static ssize_t hypfs_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + char *data = file->private_data; + size_t available = strlen(data); + loff_t pos = iocb->ki_pos; + size_t count; + + if (pos < 0) + return -EINVAL; + if (pos >= available || !iov_iter_count(to)) + return 0; + count = copy_to_iter(data + pos, available - pos, to); + if (!count) + return -EFAULT; + iocb->ki_pos = pos + count; + file_accessed(file); + return count; +} + +static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + int rc; + struct super_block *sb = file_inode(iocb->ki_filp)->i_sb; + struct hypfs_sb_info *fs_info = sb->s_fs_info; + size_t count = iov_iter_count(from); + + /* + * Currently we only allow one update per second for two reasons: + * 1. diag 204 is VERY expensive + * 2. If several processes do updates in parallel and then read the + * hypfs data, the likelihood of collisions is reduced, if we restrict + * the minimum update interval. A collision occurs, if during the + * data gathering of one process another process triggers an update + * If the first process wants to ensure consistent data, it has + * to restart data collection in this case. + */ + mutex_lock(&fs_info->lock); + if (fs_info->last_update == get_seconds()) { + rc = -EBUSY; + goto out; + } + hypfs_delete_tree(sb->s_root); + if (MACHINE_IS_VM) + rc = hypfs_vm_create_files(sb->s_root); + else + rc = hypfs_diag_create_files(sb->s_root); + if (rc) { + pr_err("Updating the hypfs tree failed\n"); + hypfs_delete_tree(sb->s_root); + goto out; + } + hypfs_update_update(sb); + rc = count; + iov_iter_advance(from, count); +out: + mutex_unlock(&fs_info->lock); + return rc; +} + +static int hypfs_release(struct inode *inode, struct file *filp) +{ + kfree(filp->private_data); + return 0; +} + +enum { opt_uid, opt_gid, opt_err }; + +static const match_table_t hypfs_tokens = { + {opt_uid, "uid=%u"}, + {opt_gid, "gid=%u"}, + {opt_err, NULL} +}; + +static int hypfs_parse_options(char *options, struct super_block *sb) +{ + char *str; + substring_t args[MAX_OPT_ARGS]; + kuid_t uid; + kgid_t gid; + + if (!options) + return 0; + while ((str = strsep(&options, ",")) != NULL) { + int token, option; + struct hypfs_sb_info *hypfs_info = sb->s_fs_info; + + if (!*str) + continue; + token = match_token(str, hypfs_tokens, args); + switch (token) { + case opt_uid: + if (match_int(&args[0], &option)) + return -EINVAL; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) + return -EINVAL; + hypfs_info->uid = uid; + break; + case opt_gid: + if (match_int(&args[0], &option)) + return -EINVAL; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) + return -EINVAL; + hypfs_info->gid = gid; + break; + case opt_err: + default: + pr_err("%s is not a valid mount option\n", str); + return -EINVAL; + } + } + return 0; +} + +static int hypfs_show_options(struct seq_file *s, struct dentry *root) +{ + struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info; + + seq_printf(s, ",uid=%u", from_kuid_munged(&init_user_ns, hypfs_info->uid)); + seq_printf(s, ",gid=%u", from_kgid_munged(&init_user_ns, hypfs_info->gid)); + return 0; +} + +static int hypfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *root_inode; + struct dentry *root_dentry; + int rc = 0; + struct hypfs_sb_info *sbi; + + sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + mutex_init(&sbi->lock); + sbi->uid = current_uid(); + sbi->gid = current_gid(); + sb->s_fs_info = sbi; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = HYPFS_MAGIC; + sb->s_op = &hypfs_s_ops; + if (hypfs_parse_options(data, sb)) + return -EINVAL; + root_inode = hypfs_make_inode(sb, S_IFDIR | 0755); + if (!root_inode) + return -ENOMEM; + root_inode->i_op = &simple_dir_inode_operations; + root_inode->i_fop = &simple_dir_operations; + sb->s_root = root_dentry = d_make_root(root_inode); + if (!root_dentry) + return -ENOMEM; + if (MACHINE_IS_VM) + rc = hypfs_vm_create_files(root_dentry); + else + rc = hypfs_diag_create_files(root_dentry); + if (rc) + return rc; + sbi->update_file = hypfs_create_update_file(root_dentry); + if (IS_ERR(sbi->update_file)) + return PTR_ERR(sbi->update_file); + hypfs_update_update(sb); + pr_info("Hypervisor filesystem mounted\n"); + return 0; +} + +static struct dentry *hypfs_mount(struct file_system_type *fst, int flags, + const char *devname, void *data) +{ + return mount_single(fst, flags, data, hypfs_fill_super); +} + +static void hypfs_kill_super(struct super_block *sb) +{ + struct hypfs_sb_info *sb_info = sb->s_fs_info; + + if (sb->s_root) + hypfs_delete_tree(sb->s_root); + if (sb_info->update_file) + hypfs_remove(sb_info->update_file); + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; + kill_litter_super(sb); +} + +static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, + char *data, umode_t mode) +{ + struct dentry *dentry; + struct inode *inode; + + mutex_lock(&d_inode(parent)->i_mutex); + dentry = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(dentry)) { + dentry = ERR_PTR(-ENOMEM); + goto fail; + } + inode = hypfs_make_inode(parent->d_sb, mode); + if (!inode) { + dput(dentry); + dentry = ERR_PTR(-ENOMEM); + goto fail; + } + if (S_ISREG(mode)) { + inode->i_fop = &hypfs_file_ops; + if (data) + inode->i_size = strlen(data); + else + inode->i_size = 0; + } else if (S_ISDIR(mode)) { + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + inc_nlink(d_inode(parent)); + } else + BUG(); + inode->i_private = data; + d_instantiate(dentry, inode); + dget(dentry); +fail: + mutex_unlock(&d_inode(parent)->i_mutex); + return dentry; +} + +struct dentry *hypfs_mkdir(struct dentry *parent, const char *name) +{ + struct dentry *dentry; + + dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE); + if (IS_ERR(dentry)) + return dentry; + hypfs_add_dentry(dentry); + return dentry; +} + +static struct dentry *hypfs_create_update_file(struct dentry *dir) +{ + struct dentry *dentry; + + dentry = hypfs_create_file(dir, "update", NULL, + S_IFREG | UPDATE_FILE_MODE); + /* + * We do not put the update file on the 'delete' list with + * hypfs_add_dentry(), since it should not be removed when the tree + * is updated. + */ + return dentry; +} + +struct dentry *hypfs_create_u64(struct dentry *dir, + const char *name, __u64 value) +{ + char *buffer; + char tmp[TMP_SIZE]; + struct dentry *dentry; + + snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value); + buffer = kstrdup(tmp, GFP_KERNEL); + if (!buffer) + return ERR_PTR(-ENOMEM); + dentry = + hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); + if (IS_ERR(dentry)) { + kfree(buffer); + return ERR_PTR(-ENOMEM); + } + hypfs_add_dentry(dentry); + return dentry; +} + +struct dentry *hypfs_create_str(struct dentry *dir, + const char *name, char *string) +{ + char *buffer; + struct dentry *dentry; + + buffer = kmalloc(strlen(string) + 2, GFP_KERNEL); + if (!buffer) + return ERR_PTR(-ENOMEM); + sprintf(buffer, "%s\n", string); + dentry = + hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); + if (IS_ERR(dentry)) { + kfree(buffer); + return ERR_PTR(-ENOMEM); + } + hypfs_add_dentry(dentry); + return dentry; +} + +static const struct file_operations hypfs_file_ops = { + .open = hypfs_open, + .release = hypfs_release, + .read_iter = hypfs_read_iter, + .write_iter = hypfs_write_iter, + .llseek = no_llseek, +}; + +static struct file_system_type hypfs_type = { + .owner = THIS_MODULE, + .name = "s390_hypfs", + .mount = hypfs_mount, + .kill_sb = hypfs_kill_super +}; +MODULE_ALIAS_FS("s390_hypfs"); + +static const struct super_operations hypfs_s_ops = { + .statfs = simple_statfs, + .evict_inode = hypfs_evict_inode, + .show_options = hypfs_show_options, +}; + +static int __init hypfs_init(void) +{ + int rc; + + rc = hypfs_dbfs_init(); + if (rc) + return rc; + if (hypfs_diag_init()) { + rc = -ENODATA; + goto fail_dbfs_exit; + } + if (hypfs_vm_init()) { + rc = -ENODATA; + goto fail_hypfs_diag_exit; + } + if (hypfs_sprp_init()) { + rc = -ENODATA; + goto fail_hypfs_vm_exit; + } + if (hypfs_diag0c_init()) { + rc = -ENODATA; + goto fail_hypfs_sprp_exit; + } + rc = sysfs_create_mount_point(hypervisor_kobj, "s390"); + if (rc) + goto fail_hypfs_diag0c_exit; + rc = register_filesystem(&hypfs_type); + if (rc) + goto fail_filesystem; + return 0; + +fail_filesystem: + sysfs_remove_mount_point(hypervisor_kobj, "s390"); +fail_hypfs_diag0c_exit: + hypfs_diag0c_exit(); +fail_hypfs_sprp_exit: + hypfs_sprp_exit(); +fail_hypfs_vm_exit: + hypfs_vm_exit(); +fail_hypfs_diag_exit: + hypfs_diag_exit(); +fail_dbfs_exit: + hypfs_dbfs_exit(); + pr_err("Initialization of hypfs failed with rc=%i\n", rc); + return rc; +} + +static void __exit hypfs_exit(void) +{ + unregister_filesystem(&hypfs_type); + sysfs_remove_mount_point(hypervisor_kobj, "s390"); + hypfs_diag0c_exit(); + hypfs_sprp_exit(); + hypfs_vm_exit(); + hypfs_diag_exit(); + hypfs_dbfs_exit(); +} + +module_init(hypfs_init) +module_exit(hypfs_exit) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael Holzheu <holzheu@de.ibm.com>"); +MODULE_DESCRIPTION("s390 Hypervisor Filesystem"); |