From d26f4ddb48463e2ff798859505af1cc520e75685 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Fabian=20Silva=20Delgado?= Date: Wed, 23 Nov 2016 01:35:55 -0300 Subject: Linux-libre 4.8.10-gnu --- include/linux/acpi.h | 1 + include/linux/backing-dev-defs.h | 2 + include/linux/blk_types.h | 16 +++- include/linux/blkdev.h | 21 +++++- include/linux/frontswap.h | 5 +- include/linux/fs.h | 3 + include/linux/sunrpc/svc_rdma.h | 9 +++ include/linux/wbt.h | 141 ++++++++++++++++++++++++++++++++++++ include/linux/writeback.h | 10 +++ include/net/ip.h | 1 - include/net/ip6_tunnel.h | 1 + include/net/sock.h | 4 +- include/net/tcp.h | 1 + include/trace/events/wbt.h | 153 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/atm_zatm.h | 1 - 15 files changed, 361 insertions(+), 8 deletions(-) create mode 100644 include/linux/wbt.h create mode 100644 include/trace/events/wbt.h (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c5eaf2f80..67d1d3ebb 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -318,6 +318,7 @@ struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); bool acpi_isa_irq_available(int irq); +void acpi_penalize_sci_irq(int irq, int trigger, int polarity); void acpi_pci_irq_disable (struct pci_dev *dev); extern int ec_read(u8 addr, u8 *val); diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index c357f27d5..dc5f76d7f 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -116,6 +116,8 @@ struct bdi_writeback { struct list_head work_list; struct delayed_work dwork; /* work item used for writeback */ + unsigned long dirty_sleep; /* last wait */ + struct list_head bdi_node; /* anchored at bdi->wb_list */ #ifdef CONFIG_CGROUP_WRITEBACK diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 436f43f87..95fbfa1fe 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -155,6 +155,7 @@ enum rq_flag_bits { __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_PREFLUSH, /* request for cache flush */ + __REQ_BG, /* background activity */ /* bio only flags */ __REQ_RAHEAD, /* read ahead, can fail anytime */ @@ -198,7 +199,7 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \ - REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE) + REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE | REQ_BG) #define REQ_CLONE_MASK REQ_COMMON_MASK /* This mask is used for both bio and request merge checking */ @@ -223,6 +224,7 @@ enum rq_flag_bits { #define REQ_COPY_USER (1ULL << __REQ_COPY_USER) #define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) #define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ) +#define REQ_BG (1ULL << __REQ_BG) #define REQ_IO_STAT (1ULL << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE) #define REQ_PM (1ULL << __REQ_PM) @@ -264,4 +266,16 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) return cookie & ((1u << BLK_QC_T_SHIFT) - 1); } +#define BLK_RQ_STAT_BATCH 64 + +struct blk_rq_stat { + s64 mean; + u64 min; + u64 max; + s32 nr_samples; + s32 nr_batch; + u64 batch; + s64 time; +}; + #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9de483535..17d4bc5d4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -24,6 +24,7 @@ #include #include #include +#include struct module; struct scsi_ioctl_command; @@ -37,10 +38,11 @@ struct bsg_job; struct blkcg_gq; struct blk_flush_queue; struct pr_ops; +struct rq_wb; #define BLKDEV_MIN_RQ 4 #ifdef CONFIG_PCK_INTERACTIVE -#define BLKDEV_MAX_RQ 16 +#define BLKDEV_MAX_RQ 512 #else #define BLKDEV_MAX_RQ 128 /* Default maximum */ #endif @@ -155,6 +157,7 @@ struct request { struct gendisk *rq_disk; struct hd_struct *part; unsigned long start_time; + struct wb_issue_stat wb_stat; #ifdef CONFIG_BLK_CGROUP struct request_list *rl; /* rl this rq is alloced from */ unsigned long long start_time_ns; @@ -306,6 +309,8 @@ struct request_queue { int nr_rqs[2]; /* # allocated [a]sync rqs */ int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ + struct rq_wb *rq_wb; + /* * If blkcg is not used, @q->root_rl serves all requests. If blkcg * is used, root blkg allocates from @q->root_rl and all other @@ -331,6 +336,8 @@ struct request_queue { struct blk_mq_ctx __percpu *queue_ctx; unsigned int nr_queues; + unsigned int queue_depth; + /* hw dispatch queues */ struct blk_mq_hw_ctx **queue_hw_ctx; unsigned int nr_hw_queues; @@ -416,6 +423,9 @@ struct request_queue { unsigned int nr_sorted; unsigned int in_flight[2]; + + struct blk_rq_stat rq_stats[2]; + /* * Number of active block driver functions for which blk_drain_queue() * must wait. Must be incremented around functions that unlock the @@ -687,6 +697,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) return false; } +static inline unsigned int blk_queue_depth(struct request_queue *q) +{ + if (q->queue_depth) + return q->queue_depth; + + return q->nr_requests; +} + /* * q->prep_rq_fn return values */ @@ -1003,6 +1021,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); +extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index c46d2aa16..1d18af034 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -106,8 +106,9 @@ static inline void frontswap_invalidate_area(unsigned type) static inline void frontswap_init(unsigned type, unsigned long *map) { - if (frontswap_enabled()) - __frontswap_init(type, map); +#ifdef CONFIG_FRONTSWAP + __frontswap_init(type, map); +#endif } #endif /* _LINUX_FRONTSWAP_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 1b5999474..6f180afdd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -189,6 +189,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded * by a cache flush and data is guaranteed to be on * non-volatile media on completion. + * WRITE_BG Background write. This is for background activity like + * the periodic flush and background threshold writeback * */ #define RW_MASK REQ_OP_WRITE @@ -202,6 +204,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH) #define WRITE_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FUA) #define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA) +#define WRITE_BG (REQ_NOIDLE | REQ_BG) /* * Attribute flags. These should be or-ed together to figure out what diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d6917b896..3584bc886 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -86,6 +86,7 @@ struct svc_rdma_op_ctxt { unsigned long flags; enum dma_data_direction direction; int count; + unsigned int mapped_sges; struct ib_sge sge[RPCSVC_MAXPAGES]; struct page *pages[RPCSVC_MAXPAGES]; }; @@ -193,6 +194,14 @@ struct svcxprt_rdma { #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD +/* Track DMA maps for this transport and context */ +static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt) +{ + ctxt->mapped_sges++; + atomic_inc(&rdma->sc_dma_used); +} + /* svc_rdma_backchannel.c */ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, diff --git a/include/linux/wbt.h b/include/linux/wbt.h new file mode 100644 index 000000000..68ba75e3a --- /dev/null +++ b/include/linux/wbt.h @@ -0,0 +1,141 @@ +#ifndef WB_THROTTLE_H +#define WB_THROTTLE_H + +#include +#include +#include +#include + +enum wbt_flags { + WBT_TRACKED = 1, /* write, tracked for throttling */ + WBT_READ = 2, /* read */ + WBT_KSWAPD = 4, /* write, from kswapd */ + + WBT_NR_BITS = 3, /* number of bits */ +}; + +enum { + /* + * Set aside 3 bits for state, rest is a time stamp + */ + ISSUE_STAT_SHIFT = 64 - WBT_NR_BITS, + ISSUE_STAT_MASK = ~((1ULL << ISSUE_STAT_SHIFT) - 1), + ISSUE_STAT_TIME_MASK = ~ISSUE_STAT_MASK, + + WBT_NUM_RWQ = 2, +}; + +struct wb_issue_stat { + u64 time; +}; + +static inline void wbt_issue_stat_set_time(struct wb_issue_stat *stat) +{ + stat->time = (stat->time & ISSUE_STAT_MASK) | + (ktime_to_ns(ktime_get()) & ISSUE_STAT_TIME_MASK); +} + +static inline u64 wbt_issue_stat_get_time(struct wb_issue_stat *stat) +{ + return stat->time & ISSUE_STAT_TIME_MASK; +} + +static inline void wbt_clear_state(struct wb_issue_stat *stat) +{ + stat->time &= ISSUE_STAT_TIME_MASK; +} + +static inline enum wbt_flags wbt_stat_to_mask(struct wb_issue_stat *stat) +{ + return (stat->time & ISSUE_STAT_MASK) >> ISSUE_STAT_SHIFT; +} + +static inline void wbt_track(struct wb_issue_stat *stat, enum wbt_flags wb_acct) +{ + stat->time |= ((u64) wb_acct) << ISSUE_STAT_SHIFT; +} + +static inline bool wbt_is_tracked(struct wb_issue_stat *stat) +{ + return (stat->time >> ISSUE_STAT_SHIFT) & WBT_TRACKED; +} + +static inline bool wbt_is_read(struct wb_issue_stat *stat) +{ + return (stat->time >> ISSUE_STAT_SHIFT) & WBT_READ; +} + +struct wb_stat_ops { + void (*get)(void *, struct blk_rq_stat *); + bool (*is_current)(struct blk_rq_stat *); + void (*clear)(void *); +}; + +struct rq_wait { + wait_queue_head_t wait; + atomic_t inflight; +}; + +struct rq_wb { + /* + * Settings that govern how we throttle + */ + unsigned int wb_background; /* background writeback */ + unsigned int wb_normal; /* normal writeback */ + unsigned int wb_max; /* max throughput writeback */ + int scale_step; + bool scaled_max; + + /* + * Number of consecutive periods where we don't have enough + * information to make a firm scale up/down decision. + */ + unsigned int unknown_cnt; + + u64 win_nsec; /* default window size */ + u64 cur_win_nsec; /* current window size */ + + struct timer_list window_timer; + + s64 sync_issue; + void *sync_cookie; + + unsigned int wc; + unsigned int queue_depth; + + unsigned long last_issue; /* last non-throttled issue */ + unsigned long last_comp; /* last non-throttled comp */ + unsigned long min_lat_nsec; + struct backing_dev_info *bdi; + struct rq_wait rq_wait[WBT_NUM_RWQ]; + + struct wb_stat_ops *stat_ops; + void *ops_data; +}; + +static inline unsigned int wbt_inflight(struct rq_wb *rwb) +{ + unsigned int i, ret = 0; + + for (i = 0; i < WBT_NUM_RWQ; i++) + ret += atomic_read(&rwb->rq_wait[i].inflight); + + return ret; +} + +struct backing_dev_info; + +void __wbt_done(struct rq_wb *, enum wbt_flags); +void wbt_done(struct rq_wb *, struct wb_issue_stat *); +enum wbt_flags wbt_wait(struct rq_wb *, unsigned int, spinlock_t *); +struct rq_wb *wbt_init(struct backing_dev_info *, struct wb_stat_ops *, void *); +void wbt_exit(struct rq_wb *); +void wbt_update_limits(struct rq_wb *); +void wbt_requeue(struct rq_wb *, struct wb_issue_stat *); +void wbt_issue(struct rq_wb *, struct wb_issue_stat *); +void wbt_disable(struct rq_wb *); + +void wbt_set_queue_depth(struct rq_wb *, unsigned int); +void wbt_set_write_cache(struct rq_wb *, bool); + +#endif diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fc1e16c25..e53abf2bf 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -100,6 +100,16 @@ struct writeback_control { #endif }; +static inline int wbc_to_write_flags(struct writeback_control *wbc) +{ + if (wbc->sync_mode == WB_SYNC_ALL) + return WRITE_SYNC; + else if (wbc->for_kupdate || wbc->for_background) + return WRITE_BG; + + return 0; +} + /* * A wb_domain represents a domain that wb's (bdi_writeback's) belong to * and are measured against each other in. There always is one global diff --git a/include/net/ip.h b/include/net/ip.h index 156b0c11b..0ccf6daf6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -47,7 +47,6 @@ struct inet_skb_parm { #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) -#define IPSKB_FRAG_SEGS BIT(7) u16 frag_max_size; }; diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 43a5a0e45..b01d5d1d7 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -145,6 +145,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, { int pkt_len, err; + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); if (unlikely(net_xmit_eval(err))) diff --git a/include/net/sock.h b/include/net/sock.h index 8741988e6..c26eab962 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1587,11 +1587,11 @@ static inline void sock_put(struct sock *sk) void sock_gen_put(struct sock *sk); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, - unsigned int trim_cap); + unsigned int trim_cap, bool refcounted); static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) { - return __sk_receive_skb(sk, skb, nested, 1); + return __sk_receive_skb(sk, skb, nested, 1, true); } static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) diff --git a/include/net/tcp.h b/include/net/tcp.h index a73218c3a..eb89754be 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1170,6 +1170,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) } bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); +int tcp_filter(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h new file mode 100644 index 000000000..926c7ee0e --- /dev/null +++ b/include/trace/events/wbt.h @@ -0,0 +1,153 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM wbt + +#if !defined(_TRACE_WBT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_WBT_H + +#include +#include + +/** + * wbt_stat - trace stats for blk_wb + * @stat: array of read/write stats + */ +TRACE_EVENT(wbt_stat, + + TP_PROTO(struct backing_dev_info *bdi, struct blk_rq_stat *stat), + + TP_ARGS(bdi, stat), + + TP_STRUCT__entry( + __array(char, name, 32) + __field(s64, rmean) + __field(u64, rmin) + __field(u64, rmax) + __field(s64, rnr_samples) + __field(s64, rtime) + __field(s64, wmean) + __field(u64, wmin) + __field(u64, wmax) + __field(s64, wnr_samples) + __field(s64, wtime) + ), + + TP_fast_assign( + strncpy(__entry->name, dev_name(bdi->dev), 32); + __entry->rmean = stat[0].mean; + __entry->rmin = stat[0].min; + __entry->rmax = stat[0].max; + __entry->rnr_samples = stat[0].nr_samples; + __entry->wmean = stat[1].mean; + __entry->wmin = stat[1].min; + __entry->wmax = stat[1].max; + __entry->wnr_samples = stat[1].nr_samples; + ), + + TP_printk("%s: rmean=%llu, rmin=%llu, rmax=%llu, rsamples=%llu, " + "wmean=%llu, wmin=%llu, wmax=%llu, wsamples=%llu\n", + __entry->name, __entry->rmean, __entry->rmin, __entry->rmax, + __entry->rnr_samples, __entry->wmean, __entry->wmin, + __entry->wmax, __entry->wnr_samples) +); + +/** + * wbt_lat - trace latency event + * @lat: latency trigger + */ +TRACE_EVENT(wbt_lat, + + TP_PROTO(struct backing_dev_info *bdi, unsigned long lat), + + TP_ARGS(bdi, lat), + + TP_STRUCT__entry( + __array(char, name, 32) + __field(unsigned long, lat) + ), + + TP_fast_assign( + strncpy(__entry->name, dev_name(bdi->dev), 32); + __entry->lat = div_u64(lat, 1000); + ), + + TP_printk("%s: latency %lluus\n", __entry->name, + (unsigned long long) __entry->lat) +); + +/** + * wbt_step - trace wb event step + * @msg: context message + * @step: the current scale step count + * @window: the current monitoring window + * @bg: the current background queue limit + * @normal: the current normal writeback limit + * @max: the current max throughput writeback limit + */ +TRACE_EVENT(wbt_step, + + TP_PROTO(struct backing_dev_info *bdi, const char *msg, + int step, unsigned long window, unsigned int bg, + unsigned int normal, unsigned int max), + + TP_ARGS(bdi, msg, step, window, bg, normal, max), + + TP_STRUCT__entry( + __array(char, name, 32) + __field(const char *, msg) + __field(int, step) + __field(unsigned long, window) + __field(unsigned int, bg) + __field(unsigned int, normal) + __field(unsigned int, max) + ), + + TP_fast_assign( + strncpy(__entry->name, dev_name(bdi->dev), 32); + __entry->msg = msg; + __entry->step = step; + __entry->window = div_u64(window, 1000); + __entry->bg = bg; + __entry->normal = normal; + __entry->max = max; + ), + + TP_printk("%s: %s: step=%d, window=%luus, background=%u, normal=%u, max=%u\n", + __entry->name, __entry->msg, __entry->step, __entry->window, + __entry->bg, __entry->normal, __entry->max) +); + +/** + * wbt_timer - trace wb timer event + * @status: timer state status + * @step: the current scale step count + * @inflight: tracked writes inflight + */ +TRACE_EVENT(wbt_timer, + + TP_PROTO(struct backing_dev_info *bdi, unsigned int status, + int step, unsigned int inflight), + + TP_ARGS(bdi, status, step, inflight), + + TP_STRUCT__entry( + __array(char, name, 32) + __field(unsigned int, status) + __field(int, step) + __field(unsigned int, inflight) + ), + + TP_fast_assign( + strncpy(__entry->name, dev_name(bdi->dev), 32); + __entry->status = status; + __entry->step = step; + __entry->inflight = inflight; + ), + + TP_printk("%s: status=%u, step=%d, inflight=%u\n", __entry->name, + __entry->status, __entry->step, __entry->inflight) +); + +#endif /* _TRACE_WBT_H */ + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h index 5cd4d4d2d..9c9c6ad55 100644 --- a/include/uapi/linux/atm_zatm.h +++ b/include/uapi/linux/atm_zatm.h @@ -14,7 +14,6 @@ #include #include -#include #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) /* get pool statistics */ -- cgit v1.2.3