summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c96
-rw-r--r--net/9p/client.c10
-rw-r--r--net/9p/trans_rdma.c4
-rw-r--r--net/9p/trans_virtio.c1
-rw-r--r--net/Kconfig3
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/common.c4
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/pvc.c2
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c35
-rw-r--r--net/ax25/ax25_in.c3
-rw-r--r--net/ax25/ax25_ip.c1
-rw-r--r--net/ax25/ax25_out.c1
-rw-r--r--net/ax25/ax25_subr.c1
-rw-r--r--net/ax25/ax25_uid.c1
-rw-r--r--net/batman-adv/Makefile6
-rw-r--r--net/batman-adv/bat_algo.h2
-rw-r--r--net/batman-adv/bat_iv_ogm.c210
-rw-r--r--net/batman-adv/bitarray.c6
-rw-r--r--net/batman-adv/bitarray.h8
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c56
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h12
-rw-r--r--net/batman-adv/debugfs.c47
-rw-r--r--net/batman-adv/debugfs.h43
-rw-r--r--net/batman-adv/distributed-arp-table.c65
-rw-r--r--net/batman-adv/distributed-arp-table.h15
-rw-r--r--net/batman-adv/fragmentation.c44
-rw-r--r--net/batman-adv/fragmentation.h11
-rw-r--r--net/batman-adv/gateway_client.c43
-rw-r--r--net/batman-adv/gateway_client.h10
-rw-r--r--net/batman-adv/gateway_common.c13
-rw-r--r--net/batman-adv/gateway_common.h9
-rw-r--r--net/batman-adv/hard-interface.c40
-rw-r--r--net/batman-adv/hard-interface.h13
-rw-r--r--net/batman-adv/hash.c8
-rw-r--r--net/batman-adv/hash.h33
-rw-r--r--net/batman-adv/icmp_socket.c35
-rw-r--r--net/batman-adv/icmp_socket.h9
-rw-r--r--net/batman-adv/main.c103
-rw-r--r--net/batman-adv/main.h40
-rw-r--r--net/batman-adv/multicast.c31
-rw-r--r--net/batman-adv/multicast.h8
-rw-r--r--net/batman-adv/network-coding.c53
-rw-r--r--net/batman-adv/network-coding.h15
-rw-r--r--net/batman-adv/originator.c36
-rw-r--r--net/batman-adv/originator.h28
-rw-r--r--net/batman-adv/packet.h5
-rw-r--r--net/batman-adv/routing.c38
-rw-r--r--net/batman-adv/routing.h12
-rw-r--r--net/batman-adv/send.c40
-rw-r--r--net/batman-adv/send.h15
-rw-r--r--net/batman-adv/soft-interface.c75
-rw-r--r--net/batman-adv/soft-interface.h13
-rw-r--r--net/batman-adv/sysfs.c62
-rw-r--r--net/batman-adv/sysfs.h12
-rw-r--r--net/batman-adv/translation-table.c123
-rw-r--r--net/batman-adv/translation-table.h11
-rw-r--r--net/batman-adv/types.h33
-rw-r--r--net/bluetooth/6lowpan.c15
-rw-r--r--net/bluetooth/Makefile3
-rw-r--r--net/bluetooth/bnep/sock.c2
-rw-r--r--net/bluetooth/cmtp/sock.c2
-rw-r--r--net/bluetooth/hci_conn.c4
-rw-r--r--net/bluetooth/hci_core.c153
-rw-r--r--net/bluetooth/hci_event.c113
-rw-r--r--net/bluetooth/hci_sock.c6
-rw-r--r--net/bluetooth/hidp/core.c1
-rw-r--r--net/bluetooth/hidp/sock.c2
-rw-r--r--net/bluetooth/l2cap_core.c17
-rw-r--r--net/bluetooth/l2cap_sock.c10
-rw-r--r--net/bluetooth/mgmt.c576
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c28
-rw-r--r--net/bluetooth/sco.c13
-rw-r--r--net/bluetooth/smp.c158
-rw-r--r--net/bridge/Makefile2
-rw-r--r--net/bridge/br.c22
-rw-r--r--net/bridge/br_fdb.c38
-rw-r--r--net/bridge/br_forward.c28
-rw-r--r--net/bridge/br_if.c4
-rw-r--r--net/bridge/br_mdb.c18
-rw-r--r--net/bridge/br_multicast.c353
-rw-r--r--net/bridge/br_netfilter_hooks.c (renamed from net/bridge/br_netfilter.c)322
-rw-r--r--net/bridge/br_netfilter_ipv6.c245
-rw-r--r--net/bridge/br_netlink.c40
-rw-r--r--net/bridge/br_private.h13
-rw-r--r--net/bridge/br_stp.c18
-rw-r--r--net/bridge/br_stp_if.c15
-rw-r--r--net/bridge/br_stp_timer.c4
-rw-r--r--net/bridge/br_sysfs_if.c2
-rw-r--r--net/bridge/br_vlan.c60
-rw-r--r--net/bridge/netfilter/ebt_stp.c6
-rw-r--r--net/bridge/netfilter/ebtables.c4
-rw-r--r--net/caif/caif_socket.c21
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/can/gw.c68
-rw-r--r--net/ceph/ceph_common.c66
-rw-r--r--net/ceph/crush/crush.c13
-rw-r--r--net/ceph/crush/crush_ln_table.h32
-rw-r--r--net/ceph/crush/hash.c8
-rw-r--r--net/ceph/crush/mapper.c148
-rw-r--r--net/ceph/messenger.c29
-rw-r--r--net/ceph/mon_client.c13
-rw-r--r--net/ceph/osd_client.c42
-rw-r--r--net/ceph/pagevec.c5
-rw-r--r--net/core/datagram.c57
-rw-r--r--net/core/dev.c263
-rw-r--r--net/core/dst.c4
-rw-r--r--net/core/ethtool.c13
-rw-r--r--net/core/filter.c261
-rw-r--r--net/core/flow_dissector.c658
-rw-r--r--net/core/gen_estimator.c13
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/net-sysfs.c10
-rw-r--r--net/core/net_namespace.c133
-rw-r--r--net/core/netclassid_cgroup.c3
-rw-r--r--net/core/netevent.c5
-rw-r--r--net/core/pktgen.c124
-rw-r--r--net/core/request_sock.c8
-rw-r--r--net/core/rtnetlink.c279
-rw-r--r--net/core/secure_seq.c2
-rw-r--r--net/core/skbuff.c388
-rw-r--r--net/core/sock.c64
-rw-r--r--net/core/sock_diag.c85
-rw-r--r--net/core/stream.c6
-rw-r--r--net/core/utils.c12
-rw-r--r--net/dccp/diag.c1
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/decnet/af_decnet.c8
-rw-r--r--net/dsa/dsa.c6
-rw-r--r--net/dsa/slave.c59
-rw-r--r--net/ethernet/eth.c15
-rw-r--r--net/ieee802154/6lowpan/core.c28
-rw-r--r--net/ieee802154/6lowpan/reassembly.c6
-rw-r--r--net/ieee802154/6lowpan/tx.c5
-rw-r--r--net/ieee802154/core.c2
-rw-r--r--net/ieee802154/nl-mac.c39
-rw-r--r--net/ieee802154/nl-phy.c10
-rw-r--r--net/ieee802154/nl802154.c316
-rw-r--r--net/ieee802154/rdev-ops.h23
-rw-r--r--net/ieee802154/socket.c22
-rw-r--r--net/ieee802154/trace.h38
-rw-r--r--net/ipv4/Kconfig24
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/arp.c16
-rw-r--r--net/ipv4/datagram.c16
-rw-r--r--net/ipv4/devinet.c16
-rw-r--r--net/ipv4/esp4.c201
-rw-r--r--net/ipv4/fib_frontend.c29
-rw-r--r--net/ipv4/fib_lookup.h1
-rw-r--r--net/ipv4/fib_rules.c5
-rw-r--r--net/ipv4/fib_semantics.c138
-rw-r--r--net/ipv4/fib_trie.c84
-rw-r--r--net/ipv4/geneve_core.c (renamed from net/ipv4/geneve.c)10
-rw-r--r--net/ipv4/igmp.c165
-rw-r--r--net/ipv4/inet_connection_sock.c21
-rw-r--r--net/ipv4/inet_diag.c60
-rw-r--r--net/ipv4/inet_fragment.c40
-rw-r--r--net/ipv4/inet_hashtables.c56
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_forward.c18
-rw-r--r--net/ipv4/ip_fragment.c64
-rw-r--r--net/ipv4/ip_output.c88
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv4/ip_tunnel.c8
-rw-r--r--net/ipv4/ip_tunnel_core.c20
-rw-r--r--net/ipv4/ipip.c3
-rw-r--r--net/ipv4/netfilter.c9
-rw-r--r--net/ipv4/netfilter/Kconfig3
-rw-r--r--net/ipv4/netfilter/arp_tables.c111
-rw-r--r--net/ipv4/netfilter/ip_tables.c99
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c5
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c7
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c2
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/route.c35
-rw-r--r--net/ipv4/syncookies.c10
-rw-r--r--net/ipv4/sysctl_net_ipv4.c15
-rw-r--r--net/ipv4/tcp.c104
-rw-r--r--net/ipv4/tcp_cdg.c433
-rw-r--r--net/ipv4/tcp_dctcp.c26
-rw-r--r--net/ipv4/tcp_diag.c6
-rw-r--r--net/ipv4/tcp_input.c142
-rw-r--r--net/ipv4/tcp_ipv4.c17
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_offload.c4
-rw-r--r--net/ipv4/tcp_output.c106
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/udp.c13
-rw-r--r--net/ipv4/udp_diag.c2
-rw-r--r--net/ipv4/udp_tunnel.c8
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/datagram.c20
-rw-r--r--net/ipv6/esp6.c201
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/inet6_hashtables.c8
-rw-r--r--net/ipv6/ip6_fib.c27
-rw-r--r--net/ipv6/ip6_flowlabel.c4
-rw-r--r--net/ipv6/ip6_gre.c1
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_output.c60
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6_udp_tunnel.c6
-rw-r--r--net/ipv6/mcast_snoop.c216
-rw-r--r--net/ipv6/ndisc.c8
-rw-r--r--net/ipv6/netfilter.c2
-rw-r--r--net/ipv6/netfilter/Kconfig3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c100
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c19
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c6
-rw-r--r--net/ipv6/output_core.c14
-rw-r--r--net/ipv6/raw.c11
-rw-r--r--net/ipv6/reassembly.c8
-rw-r--r--net/ipv6/route.c599
-rw-r--r--net/ipv6/syncookies.c19
-rw-r--r--net/ipv6/sysctl_net_ipv6.c8
-rw-r--r--net/ipv6/tcp_ipv6.c18
-rw-r--r--net/ipv6/xfrm6_policy.c20
-rw-r--r--net/ipx/af_ipx.c2
-rw-r--r--net/irda/af_irda.c2
-rw-r--r--net/irda/timer.c4
-rw-r--r--net/iucv/af_iucv.c10
-rw-r--r--net/key/af_key.c49
-rw-r--r--net/l2tp/l2tp_core.c15
-rw-r--r--net/l2tp/l2tp_ppp.c4
-rw-r--r--net/llc/af_llc.c6
-rw-r--r--net/llc/llc_conn.c6
-rw-r--r--net/mac80211/Kconfig16
-rw-r--r--net/mac80211/aes_ccm.c33
-rw-r--r--net/mac80211/aes_gcm.c33
-rw-r--r--net/mac80211/aes_gmac.c14
-rw-r--r--net/mac80211/agg-tx.c4
-rw-r--r--net/mac80211/cfg.c213
-rw-r--r--net/mac80211/chan.c10
-rw-r--r--net/mac80211/debugfs.c177
-rw-r--r--net/mac80211/debugfs_key.c17
-rw-r--r--net/mac80211/debugfs_sta.c85
-rw-r--r--net/mac80211/driver-ops.h13
-rw-r--r--net/mac80211/ethtool.c3
-rw-r--r--net/mac80211/ibss.c5
-rw-r--r--net/mac80211/ieee80211_i.h36
-rw-r--r--net/mac80211/iface.c99
-rw-r--r--net/mac80211/key.c96
-rw-r--r--net/mac80211/key.h7
-rw-r--r--net/mac80211/led.c268
-rw-r--r--net/mac80211/led.h44
-rw-r--r--net/mac80211/main.c31
-rw-r--r--net/mac80211/mesh_hwmp.c35
-rw-r--r--net/mac80211/mesh_plink.c49
-rw-r--r--net/mac80211/mlme.c247
-rw-r--r--net/mac80211/offchannel.c2
-rw-r--r--net/mac80211/pm.c20
-rw-r--r--net/mac80211/rate.c18
-rw-r--r--net/mac80211/rate.h14
-rw-r--r--net/mac80211/rc80211_minstrel.c11
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c2
-rw-r--r--net/mac80211/rx.c227
-rw-r--r--net/mac80211/scan.c18
-rw-r--r--net/mac80211/sta_info.c24
-rw-r--r--net/mac80211/sta_info.h44
-rw-r--r--net/mac80211/status.c163
-rw-r--r--net/mac80211/tdls.c58
-rw-r--r--net/mac80211/trace.h42
-rw-r--r--net/mac80211/tx.c553
-rw-r--r--net/mac80211/util.c6
-rw-r--r--net/mac80211/wpa.c10
-rw-r--r--net/mac802154/Kconfig1
-rw-r--r--net/mac802154/Makefile4
-rw-r--r--net/mac802154/cfg.c101
-rw-r--r--net/mac802154/driver-ops.h96
-rw-r--r--net/mac802154/ieee802154_i.h9
-rw-r--r--net/mac802154/iface.c156
-rw-r--r--net/mac802154/llsec.c44
-rw-r--r--net/mac802154/mac_cmd.c42
-rw-r--r--net/mac802154/main.c32
-rw-r--r--net/mac802154/mib.c63
-rw-r--r--net/mac802154/rx.c13
-rw-r--r--net/mac802154/trace.c9
-rw-r--r--net/mac802154/trace.h272
-rw-r--r--net/mac802154/util.c5
-rw-r--r--net/mpls/mpls_gso.c2
-rw-r--r--net/netfilter/Kconfig31
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/core.c38
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h44
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c44
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c59
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c27
-rw-r--r--net/netfilter/ipset/ip_set_core.c387
-rw-r--r--net/netfilter/ipset/ip_set_getport.c19
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h736
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c72
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c87
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c98
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c91
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c96
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c30
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c73
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c250
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c146
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c86
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c176
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c422
-rw-r--r--net/netfilter/ipset/pfxlen.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c78
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c32
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c60
-rw-r--r--net/netfilter/nf_conntrack_core.c72
-rw-r--r--net/netfilter/nf_conntrack_expect.c3
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c8
-rw-r--r--net/netfilter/nf_internals.h1
-rw-r--r--net/netfilter/nf_queue.c19
-rw-r--r--net/netfilter/nf_synproxy_core.c14
-rw-r--r--net/netfilter/nf_tables_api.c117
-rw-r--r--net/netfilter/nf_tables_core.c7
-rw-r--r--net/netfilter/nf_tables_netdev.c258
-rw-r--r--net/netfilter/nfnetlink.c38
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue_core.c63
-rw-r--r--net/netfilter/nft_compat.c2
-rw-r--r--net/netfilter/x_tables.c55
-rw-r--r--net/netfilter/xt_CT.c15
-rw-r--r--net/netfilter/xt_IDLETIMER.c1
-rw-r--r--net/netfilter/xt_TCPMSS.c6
-rw-r--r--net/netfilter/xt_TEE.c1
-rw-r--r--net/netfilter/xt_addrtype.c2
-rw-r--r--net/netfilter/xt_mark.c1
-rw-r--r--net/netfilter/xt_set.c47
-rw-r--r--net/netfilter/xt_socket.c59
-rw-r--r--net/netlink/af_netlink.c258
-rw-r--r--net/netrom/af_netrom.c4
-rw-r--r--net/netrom/nr_route.c1
-rw-r--r--net/nfc/af_nfc.c2
-rw-r--r--net/nfc/llcp.h2
-rw-r--r--net/nfc/llcp_core.c2
-rw-r--r--net/nfc/llcp_sock.c8
-rw-r--r--net/nfc/nci/Kconfig7
-rw-r--r--net/nfc/nci/Makefile3
-rw-r--r--net/nfc/nci/core.c105
-rw-r--r--net/nfc/nci/hci.c11
-rw-r--r--net/nfc/nci/ntf.c10
-rw-r--r--net/nfc/nci/rsp.c10
-rw-r--r--net/nfc/nci/uart.c494
-rw-r--r--net/nfc/netlink.c55
-rw-r--r--net/nfc/nfc.h2
-rw-r--r--net/nfc/rawsock.c4
-rw-r--r--net/openvswitch/Kconfig2
-rw-r--r--net/openvswitch/actions.c39
-rw-r--r--net/openvswitch/datapath.c20
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow.c4
-rw-r--r--net/openvswitch/flow_netlink.c2
-rw-r--r--net/openvswitch/flow_table.c2
-rw-r--r--net/openvswitch/vport-geneve.c5
-rw-r--r--net/packet/af_packet.c199
-rw-r--r--net/packet/internal.h13
-rw-r--r--net/phonet/af_phonet.c2
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/rds/af_rds.c52
-rw-r--r--net/rds/bind.c4
-rw-r--r--net/rds/ib.h23
-rw-r--r--net/rds/ib_cm.c43
-rw-r--r--net/rds/ib_recv.c4
-rw-r--r--net/rds/ib_send.c55
-rw-r--r--net/rds/info.c2
-rw-r--r--net/rds/iw_cm.c7
-rw-r--r--net/rds/iw_send.c18
-rw-r--r--net/rds/rdma_transport.c34
-rw-r--r--net/rds/rds.h9
-rw-r--r--net/rds/transport.c23
-rw-r--r--net/rfkill/core.c12
-rw-r--r--net/rfkill/rfkill-gpio.c24
-rw-r--r--net/rose/af_rose.c7
-rw-r--r--net/rose/rose_link.c1
-rw-r--r--net/rose/rose_route.c1
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-local.c4
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c16
-rw-r--r--net/sched/act_bpf.c59
-rw-r--r--net/sched/act_mirred.c4
-rw-r--r--net/sched/act_pedit.c10
-rw-r--r--net/sched/cls_bpf.c18
-rw-r--r--net/sched/cls_flow.c33
-rw-r--r--net/sched/cls_flower.c691
-rw-r--r--net/sched/cls_u32.c13
-rw-r--r--net/sched/em_ipset.c4
-rw-r--r--net/sched/sch_api.c17
-rw-r--r--net/sched/sch_choke.c33
-rw-r--r--net/sched/sch_codel.c15
-rw-r--r--net/sched/sch_fq_codel.c61
-rw-r--r--net/sched/sch_gred.c28
-rw-r--r--net/sched/sch_hhf.c19
-rw-r--r--net/sched/sch_ingress.c59
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/sched/sch_plug.c1
-rw-r--r--net/sched/sch_qfq.c3
-rw-r--r--net/sched/sch_sfb.c24
-rw-r--r--net/sched/sch_sfq.c29
-rw-r--r--net/sctp/ipv6.c7
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/sm_make_chunk.c7
-rw-r--r--net/sctp/sm_sideeffect.c2
-rw-r--r--net/sctp/socket.c12
-rw-r--r--net/socket.c7
-rw-r--r--net/sunrpc/Kconfig28
-rw-r--r--net/sunrpc/Makefile5
-rw-r--r--net/sunrpc/auth.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c8
-rw-r--r--net/sunrpc/backchannel_rqst.c132
-rw-r--r--net/sunrpc/bc_svc.c63
-rw-r--r--net/sunrpc/clnt.c114
-rw-r--r--net/sunrpc/debugfs.c78
-rw-r--r--net/sunrpc/svc.c38
-rw-r--r--net/sunrpc/xprt.c7
-rw-r--r--net/sunrpc/xprtrdma/Makefile14
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c120
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c229
-rw-r--r--net/sunrpc/xprtrdma/module.c46
-rw-r--r--net/sunrpc/xprtrdma/physical_ops.c14
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c140
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c16
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c119
-rw-r--r--net/sunrpc/xprtrdma/transport.c56
-rw-r--r--net/sunrpc/xprtrdma/verbs.c348
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h49
-rw-r--r--net/sunrpc/xprtsock.c182
-rw-r--r--net/switchdev/switchdev.c955
-rw-r--r--net/tipc/addr.c7
-rw-r--r--net/tipc/addr.h8
-rw-r--r--net/tipc/bcast.c46
-rw-r--r--net/tipc/bcast.h1
-rw-r--r--net/tipc/bearer.c20
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/core.c4
-rw-r--r--net/tipc/core.h37
-rw-r--r--net/tipc/link.c313
-rw-r--r--net/tipc/link.h60
-rw-r--r--net/tipc/msg.c51
-rw-r--r--net/tipc/msg.h37
-rw-r--r--net/tipc/name_table.c34
-rw-r--r--net/tipc/net.c1
-rw-r--r--net/tipc/netlink_compat.c137
-rw-r--r--net/tipc/node.c3
-rw-r--r--net/tipc/node.h2
-rw-r--r--net/tipc/server.c6
-rw-r--r--net/tipc/socket.c11
-rw-r--r--net/tipc/subscr.c242
-rw-r--r--net/tipc/subscr.h18
-rw-r--r--net/unix/af_unix.c267
-rw-r--r--net/vmw_vsock/af_vsock.c7
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/wireless/chan.c100
-rw-r--r--net/wireless/core.h1
-rw-r--r--net/wireless/nl80211.c21
-rw-r--r--net/wireless/reg.c12
-rw-r--r--net/wireless/sme.c4
-rw-r--r--net/wireless/sysfs.c14
-rw-r--r--net/wireless/trace.h11
-rw-r--r--net/wireless/util.c3
-rw-r--r--net/x25/af_x25.c8
-rw-r--r--net/xfrm/xfrm_algo.c28
-rw-r--r--net/xfrm/xfrm_input.c12
-rw-r--r--net/xfrm/xfrm_output.c12
-rw-r--r--net/xfrm/xfrm_policy.c42
-rw-r--r--net/xfrm/xfrm_state.c4
-rw-r--r--net/xfrm/xfrm_user.c40
480 files changed, 17314 insertions, 8657 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 59555f0f8..d2cd9de4b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -618,6 +618,92 @@ out:
return err;
}
+static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ struct sk_buff *p, **pp = NULL;
+ struct vlan_hdr *vhdr;
+ unsigned int hlen, off_vlan;
+ const struct packet_offload *ptype;
+ __be16 type;
+ int flush = 1;
+
+ off_vlan = skb_gro_offset(skb);
+ hlen = off_vlan + sizeof(*vhdr);
+ vhdr = skb_gro_header_fast(skb, off_vlan);
+ if (skb_gro_header_hard(skb, hlen)) {
+ vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
+ if (unlikely(!vhdr))
+ goto out;
+ }
+
+ type = vhdr->h_vlan_encapsulated_proto;
+
+ rcu_read_lock();
+ ptype = gro_find_receive_by_type(type);
+ if (!ptype)
+ goto out_unlock;
+
+ flush = 0;
+
+ for (p = *head; p; p = p->next) {
+ struct vlan_hdr *vhdr2;
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ vhdr2 = (struct vlan_hdr *)(p->data + off_vlan);
+ if (compare_vlan_header(vhdr, vhdr2))
+ NAPI_GRO_CB(p)->same_flow = 0;
+ }
+
+ skb_gro_pull(skb, sizeof(*vhdr));
+ skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
+ pp = ptype->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
+}
+
+static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
+ __be16 type = vhdr->h_vlan_encapsulated_proto;
+ struct packet_offload *ptype;
+ int err = -ENOENT;
+
+ rcu_read_lock();
+ ptype = gro_find_complete_by_type(type);
+ if (ptype)
+ err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
+
+ rcu_read_unlock();
+ return err;
+}
+
+static struct packet_offload vlan_packet_offloads[] __read_mostly = {
+ {
+ .type = cpu_to_be16(ETH_P_8021Q),
+ .priority = 10,
+ .callbacks = {
+ .gro_receive = vlan_gro_receive,
+ .gro_complete = vlan_gro_complete,
+ },
+ },
+ {
+ .type = cpu_to_be16(ETH_P_8021AD),
+ .priority = 10,
+ .callbacks = {
+ .gro_receive = vlan_gro_receive,
+ .gro_complete = vlan_gro_complete,
+ },
+ },
+};
+
static int __net_init vlan_init_net(struct net *net)
{
struct vlan_net *vn = net_generic(net, vlan_net_id);
@@ -645,6 +731,7 @@ static struct pernet_operations vlan_net_ops = {
static int __init vlan_proto_init(void)
{
int err;
+ unsigned int i;
pr_info("%s v%s\n", vlan_fullname, vlan_version);
@@ -668,6 +755,9 @@ static int __init vlan_proto_init(void)
if (err < 0)
goto err5;
+ for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
+ dev_add_offload(&vlan_packet_offloads[i]);
+
vlan_ioctl_set(vlan_ioctl_handler);
return 0;
@@ -685,7 +775,13 @@ err0:
static void __exit vlan_cleanup_module(void)
{
+ unsigned int i;
+
vlan_ioctl_set(NULL);
+
+ for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
+ dev_remove_offload(&vlan_packet_offloads[i]);
+
vlan_netlink_fini();
unregister_netdevice_notifier(&vlan_notifier_block);
diff --git a/net/9p/client.c b/net/9p/client.c
index 81925b923..ea79ee9a7 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1541,6 +1541,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
struct p9_client *clnt = fid->clnt;
struct p9_req_t *req;
int total = 0;
+ *err = 0;
p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n",
fid->fid, (unsigned long long) offset, (int)iov_iter_count(to));
@@ -1583,6 +1584,10 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
p9_free_req(clnt, req);
break;
}
+ if (rsize < count) {
+ pr_err("bogus RREAD count (%d > %d)\n", count, rsize);
+ count = rsize;
+ }
p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
if (!count) {
@@ -1616,6 +1621,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
struct p9_client *clnt = fid->clnt;
struct p9_req_t *req;
int total = 0;
+ *err = 0;
p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
fid->fid, (unsigned long long) offset,
@@ -1650,6 +1656,10 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
p9_free_req(clnt, req);
break;
}
+ if (rsize < count) {
+ pr_err("bogus RWRITE count (%d > %d)\n", count, rsize);
+ count = rsize;
+ }
p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 3533d2a53..37a78d20c 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -648,6 +648,7 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr;
struct ib_device_attr devattr;
+ struct ib_cq_init_attr cq_attr = {};
/* Parse the transport specific mount options */
err = parse_opts(args, &opts);
@@ -705,9 +706,10 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
goto error;
/* Create the Completion Queue */
+ cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1;
rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
cq_event_handler, client,
- opts.sq_depth + opts.rq_depth + 1, 0);
+ &cq_attr);
if (IS_ERR(rdma->cq))
goto error;
ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 9dd49ca67..6e70ddb15 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -704,6 +704,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
mutex_unlock(&virtio_9p_lock);
+ vdev->config->reset(vdev);
vdev->config->del_vqs(vdev);
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
diff --git a/net/Kconfig b/net/Kconfig
index 44dd5786e..57a7c5af3 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -45,6 +45,9 @@ config COMPAT_NETLINK_MESSAGES
Newly written code should NEVER need this option but do
compat-independent messages instead!
+config NET_INGRESS
+ bool
+
menu "Networking options"
source "net/packet/Kconfig"
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 3b7ad43c7..d5871ac49 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1030,7 +1030,7 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
goto out;
rc = -ENOMEM;
- sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto);
+ sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern);
if (!sk)
goto out;
rc = 0;
diff --git a/net/atm/common.c b/net/atm/common.c
index ed0466637..49a872db7 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -141,7 +141,7 @@ static struct proto vcc_proto = {
.release_cb = vcc_release_cb,
};
-int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
+int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern)
{
struct sock *sk;
struct atm_vcc *vcc;
@@ -149,7 +149,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
sock->sk = NULL;
if (sock->type == SOCK_STREAM)
return -EINVAL;
- sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto);
+ sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, kern);
if (!sk)
return -ENOMEM;
sock_init_data(sock, sk);
diff --git a/net/atm/common.h b/net/atm/common.h
index 4d6f5b206..959436b87 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -10,7 +10,7 @@
#include <linux/poll.h> /* for poll_table */
-int vcc_create(struct net *net, struct socket *sock, int protocol, int family);
+int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern);
int vcc_release(struct socket *sock);
int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index ae0324021..040207ec3 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -136,7 +136,7 @@ static int pvc_create(struct net *net, struct socket *sock, int protocol,
return -EAFNOSUPPORT;
sock->ops = &pvc_proto_ops;
- return vcc_create(net, sock, protocol, PF_ATMPVC);
+ return vcc_create(net, sock, protocol, PF_ATMPVC, kern);
}
static const struct net_proto_family pvc_family_ops = {
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 1ba23f501..3fa0a9ee9 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -660,7 +660,7 @@ static int svc_create(struct net *net, struct socket *sock, int protocol,
return -EAFNOSUPPORT;
sock->ops = &svc_proto_ops;
- error = vcc_create(net, sock, protocol, AF_ATMSVC);
+ error = vcc_create(net, sock, protocol, AF_ATMSVC, kern);
if (error)
return error;
ATM_SD(sock)->local.sas_family = AF_ATMSVC;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 330c1f4a5..ae3a47f9d 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -40,7 +40,6 @@
#include <linux/notifier.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
-#include <linux/netfilter.h>
#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/spinlock.h>
@@ -58,7 +57,7 @@ static const struct proto_ops ax25_proto_ops;
static void ax25_free_sock(struct sock *sk)
{
- ax25_cb_put(ax25_sk(sk));
+ ax25_cb_put(sk_to_ax25(sk));
}
/*
@@ -307,7 +306,7 @@ void ax25_destroy_socket(ax25_cb *ax25)
while ((skb = skb_dequeue(&ax25->sk->sk_receive_queue)) != NULL) {
if (skb->sk != ax25->sk) {
/* A pending connection */
- ax25_cb *sax25 = ax25_sk(skb->sk);
+ ax25_cb *sax25 = sk_to_ax25(skb->sk);
/* Queue the unaccepted socket for death */
sock_orphan(skb->sk);
@@ -552,7 +551,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
switch (optname) {
case AX25_WINDOW:
@@ -698,7 +697,7 @@ static int ax25_getsockopt(struct socket *sock, int level, int optname,
length = min_t(unsigned int, maxlen, sizeof(int));
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
switch (optname) {
case AX25_WINDOW:
@@ -797,7 +796,7 @@ out:
static struct proto ax25_proto = {
.name = "AX25",
.owner = THIS_MODULE,
- .obj_size = sizeof(struct sock),
+ .obj_size = sizeof(struct ax25_sock),
};
static int ax25_create(struct net *net, struct socket *sock, int protocol,
@@ -855,11 +854,11 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto);
+ sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, kern);
if (sk == NULL)
return -ENOMEM;
- ax25 = sk->sk_protinfo = ax25_create_cb();
+ ax25 = ax25_sk(sk)->cb = ax25_create_cb();
if (!ax25) {
sk_free(sk);
return -ENOMEM;
@@ -881,7 +880,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
struct sock *sk;
ax25_cb *ax25, *oax25;
- sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot);
+ sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot, 0);
if (sk == NULL)
return NULL;
@@ -911,7 +910,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
sk->sk_state = TCP_ESTABLISHED;
sock_copy_flags(sk, osk);
- oax25 = ax25_sk(osk);
+ oax25 = sk_to_ax25(osk);
ax25->modulus = oax25->modulus;
ax25->backoff = oax25->backoff;
@@ -939,7 +938,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
}
}
- sk->sk_protinfo = ax25;
+ ax25_sk(sk)->cb = ax25;
sk->sk_destruct = ax25_free_sock;
ax25->sk = sk;
@@ -957,7 +956,7 @@ static int ax25_release(struct socket *sock)
sock_hold(sk);
sock_orphan(sk);
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (sk->sk_type == SOCK_SEQPACKET) {
switch (ax25->state) {
@@ -1067,7 +1066,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (!sock_flag(sk, SOCK_ZAPPED)) {
err = -EINVAL;
goto out;
@@ -1114,7 +1113,7 @@ static int __must_check ax25_connect(struct socket *sock,
struct sockaddr *uaddr, int addr_len, int flags)
{
struct sock *sk = sock->sk;
- ax25_cb *ax25 = ax25_sk(sk), *ax25t;
+ ax25_cb *ax25 = sk_to_ax25(sk), *ax25t;
struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
ax25_digi *digi = NULL;
int ct = 0, err = 0;
@@ -1395,7 +1394,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
memset(fsa, 0, sizeof(*fsa));
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (peer != 0) {
if (sk->sk_state != TCP_ESTABLISHED) {
@@ -1447,7 +1446,7 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
return -EINVAL;
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (sock_flag(sk, SOCK_ZAPPED)) {
err = -EADDRNOTAVAIL;
@@ -1622,7 +1621,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (skb == NULL)
goto out;
- if (!ax25_sk(sk)->pidincl)
+ if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
skb_reset_transport_header(skb);
@@ -1763,7 +1762,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCAX25GETINFO:
case SIOCAX25GETINFOOLD: {
- ax25_cb *ax25 = ax25_sk(sk);
+ ax25_cb *ax25 = sk_to_ax25(sk);
struct ax25_info_struct ax25_info;
ax25_info.t1 = ax25->t1 / HZ;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 7ed8ab724..bb5a0e4e9 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -23,7 +23,6 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <linux/netfilter.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <asm/uaccess.h>
@@ -354,7 +353,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
return 0;
}
- ax25 = ax25_sk(make);
+ ax25 = sk_to_ax25(make);
skb_set_owner_r(skb, make);
skb_queue_head(&sk->sk_receive_queue, skb);
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 7c646bb2c..b563a3f5f 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -31,7 +31,6 @@
#include <linux/notifier.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
-#include <linux/netfilter.h>
#include <linux/sysctl.h>
#include <net/ip.h>
#include <net/arp.h>
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index be2acab9b..8ddd41baa 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -24,7 +24,6 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <linux/netfilter.h>
#include <net/sock.h>
#include <asm/uaccess.h>
#include <linux/fcntl.h>
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 1997538a5..3b78e8473 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -264,6 +264,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
{
ax25_clear_queues(ax25);
+ ax25_stop_heartbeat(ax25);
ax25_stop_t1timer(ax25);
ax25_stop_t2timer(ax25);
ax25_stop_t3timer(ax25);
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 71c4badbc..4ad2fb7bc 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -34,7 +34,6 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stat.h>
-#include <linux/netfilter.h>
#include <linux/sysctl.h>
#include <linux/export.h>
#include <net/ip.h>
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index eb7d8c038..21434ab79 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
#
@@ -20,7 +20,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
batman-adv-y += bat_iv_ogm.o
batman-adv-y += bitarray.o
batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
-batman-adv-y += debugfs.o
+batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o
batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o
batman-adv-y += fragmentation.o
batman-adv-y += gateway_client.o
@@ -29,6 +29,7 @@ batman-adv-y += hard-interface.o
batman-adv-y += hash.o
batman-adv-y += icmp_socket.o
batman-adv-y += main.o
+batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
batman-adv-y += originator.o
batman-adv-y += routing.o
@@ -36,4 +37,3 @@ batman-adv-y += send.o
batman-adv-y += soft-interface.o
batman-adv-y += sysfs.o
batman-adv-y += translation-table.o
-batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 4e49666f8..4e59cf3eb 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 00e00e09b..753383c22 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -15,20 +15,50 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "bat_algo.h"
#include "main.h"
-#include "translation-table.h"
+
+#include <linux/atomic.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/cache.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#include "bitarray.h"
+#include "hard-interface.h"
+#include "hash.h"
+#include "network-coding.h"
#include "originator.h"
+#include "packet.h"
#include "routing.h"
-#include "gateway_common.h"
-#include "gateway_client.h"
-#include "hard-interface.h"
#include "send.h"
-#include "bat_algo.h"
-#include "network-coding.h"
+#include "translation-table.h"
/**
* enum batadv_dup_status - duplicate status
- * @BATADV_NO_DUP: the packet is a duplicate
+ * @BATADV_NO_DUP: the packet is no duplicate
* @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
* neighbor)
* @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
@@ -55,7 +85,7 @@ static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
}
/**
- * batadv_ring_buffer_set - compute the average of all non-zero values stored
+ * batadv_ring_buffer_avg - compute the average of all non-zero values stored
* in the given ring buffer
* @lq_recv: pointer to the ring buffer
*
@@ -64,7 +94,9 @@ static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
{
const uint8_t *ptr;
- uint16_t count = 0, i = 0, sum = 0;
+ uint16_t count = 0;
+ uint16_t i = 0;
+ uint16_t sum = 0;
ptr = lq_recv;
@@ -308,7 +340,6 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
struct batadv_ogm_packet *batadv_ogm_packet;
unsigned char *ogm_buff;
uint32_t random_seqno;
- int res = -ENOMEM;
/* randomize initial seqno to avoid collision */
get_random_bytes(&random_seqno, sizeof(random_seqno));
@@ -317,7 +348,7 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN;
ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC);
if (!ogm_buff)
- goto out;
+ return -ENOMEM;
hard_iface->bat_iv.ogm_buff = ogm_buff;
@@ -329,10 +360,7 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
batadv_ogm_packet->reserved = 0;
batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE;
- res = 0;
-
-out:
- return res;
+ return 0;
}
static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
@@ -396,8 +424,8 @@ static uint8_t batadv_hop_penalty(uint8_t tq,
}
/* is there another aggregated packet here? */
-static int batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
- __be16 tvlv_len)
+static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
+ __be16 tvlv_len)
{
int next_buff_pos = 0;
@@ -413,7 +441,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
struct batadv_hard_iface *hard_iface)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- char *fwd_str;
+ const char *fwd_str;
uint8_t packet_num;
int16_t buff_pos;
struct batadv_ogm_packet *batadv_ogm_packet;
@@ -451,7 +479,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
batadv_ogm_packet->orig,
ntohl(batadv_ogm_packet->seqno),
batadv_ogm_packet->tq, batadv_ogm_packet->ttl,
- (batadv_ogm_packet->flags & BATADV_DIRECTLINK ?
+ ((batadv_ogm_packet->flags & BATADV_DIRECTLINK) ?
"on" : "off"),
hard_iface->net_dev->name,
hard_iface->net_dev->dev_addr);
@@ -548,58 +576,62 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
* - the send time is within our MAX_AGGREGATION_MS time
* - the resulting packet wont be bigger than
* MAX_AGGREGATION_BYTES
+ * otherwise aggregation is not possible
*/
- if (time_before(send_time, forw_packet->send_time) &&
- time_after_eq(aggregation_end_time, forw_packet->send_time) &&
- (aggregated_bytes <= BATADV_MAX_AGGREGATION_BYTES)) {
- /* check aggregation compatibility
- * -> direct link packets are broadcasted on
- * their interface only
- * -> aggregate packet if the current packet is
- * a "global" packet as well as the base
- * packet
- */
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
-
- /* packet is not leaving on the same interface. */
- if (forw_packet->if_outgoing != if_outgoing)
- goto out;
+ if (!time_before(send_time, forw_packet->send_time) ||
+ !time_after_eq(aggregation_end_time, forw_packet->send_time))
+ return false;
+
+ if (aggregated_bytes > BATADV_MAX_AGGREGATION_BYTES)
+ return false;
+
+ /* packet is not leaving on the same interface. */
+ if (forw_packet->if_outgoing != if_outgoing)
+ return false;
+
+ /* check aggregation compatibility
+ * -> direct link packets are broadcasted on
+ * their interface only
+ * -> aggregate packet if the current packet is
+ * a "global" packet as well as the base
+ * packet
+ */
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ return false;
- /* packets without direct link flag and high TTL
- * are flooded through the net
- */
- if ((!directlink) &&
- (!(batadv_ogm_packet->flags & BATADV_DIRECTLINK)) &&
- (batadv_ogm_packet->ttl != 1) &&
-
- /* own packets originating non-primary
- * interfaces leave only that interface
- */
- ((!forw_packet->own) ||
- (forw_packet->if_incoming == primary_if))) {
- res = true;
- goto out;
- }
+ /* packets without direct link flag and high TTL
+ * are flooded through the net
+ */
+ if (!directlink &&
+ !(batadv_ogm_packet->flags & BATADV_DIRECTLINK) &&
+ batadv_ogm_packet->ttl != 1 &&
+
+ /* own packets originating non-primary
+ * interfaces leave only that interface
+ */
+ (!forw_packet->own ||
+ forw_packet->if_incoming == primary_if)) {
+ res = true;
+ goto out;
+ }
- /* if the incoming packet is sent via this one
- * interface only - we still can aggregate
- */
- if ((directlink) &&
- (new_bat_ogm_packet->ttl == 1) &&
- (forw_packet->if_incoming == if_incoming) &&
-
- /* packets from direct neighbors or
- * own secondary interface packets
- * (= secondary interface packets in general)
- */
- (batadv_ogm_packet->flags & BATADV_DIRECTLINK ||
- (forw_packet->own &&
- forw_packet->if_incoming != primary_if))) {
- res = true;
- goto out;
- }
+ /* if the incoming packet is sent via this one
+ * interface only - we still can aggregate
+ */
+ if (directlink &&
+ new_bat_ogm_packet->ttl == 1 &&
+ forw_packet->if_incoming == if_incoming &&
+
+ /* packets from direct neighbors or
+ * own secondary interface packets
+ * (= secondary interface packets in general)
+ */
+ (batadv_ogm_packet->flags & BATADV_DIRECTLINK ||
+ (forw_packet->own &&
+ forw_packet->if_incoming != primary_if))) {
+ res = true;
+ goto out;
}
out:
@@ -642,19 +674,16 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"batman packet queue full\n");
- goto out;
+ goto out_free_outgoing;
}
}
forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
- if (!forw_packet_aggr) {
- if (!own_packet)
- atomic_inc(&bat_priv->batman_queue_left);
- goto out;
- }
+ if (!forw_packet_aggr)
+ goto out_nomem;
- if ((atomic_read(&bat_priv->aggregated_ogms)) &&
- (packet_len < BATADV_MAX_AGGREGATION_BYTES))
+ if (atomic_read(&bat_priv->aggregated_ogms) &&
+ packet_len < BATADV_MAX_AGGREGATION_BYTES)
skb_size = BATADV_MAX_AGGREGATION_BYTES;
else
skb_size = packet_len;
@@ -662,12 +691,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
skb_size += ETH_HLEN;
forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
- if (!forw_packet_aggr->skb) {
- if (!own_packet)
- atomic_inc(&bat_priv->batman_queue_left);
- kfree(forw_packet_aggr);
- goto out;
- }
+ if (!forw_packet_aggr->skb)
+ goto out_free_forw_packet;
forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
@@ -699,7 +724,12 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
send_time - jiffies);
return;
-out:
+out_free_forw_packet:
+ kfree(forw_packet_aggr);
+out_nomem:
+ if (!own_packet)
+ atomic_inc(&bat_priv->batman_queue_left);
+out_free_outgoing:
batadv_hardif_free_ref(if_outgoing);
out_free_incoming:
batadv_hardif_free_ref(if_incoming);
@@ -752,13 +782,13 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
unsigned long max_aggregation_jiffies;
batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff;
- direct_link = batadv_ogm_packet->flags & BATADV_DIRECTLINK ? 1 : 0;
+ direct_link = !!(batadv_ogm_packet->flags & BATADV_DIRECTLINK);
max_aggregation_jiffies = msecs_to_jiffies(BATADV_MAX_AGGREGATION_MS);
/* find position for the packet in the forward queue */
spin_lock_bh(&bat_priv->forw_bat_list_lock);
/* own packets are not to be aggregated */
- if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
+ if (atomic_read(&bat_priv->aggregated_ogms) && !own_packet) {
hlist_for_each_entry(forw_packet_pos,
&bat_priv->forw_bat_list, list) {
if (batadv_iv_ogm_can_aggregate(batadv_ogm_packet,
@@ -1034,9 +1064,10 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
batadv_orig_node_free_ref(orig_tmp);
if (!neigh_node)
goto unlock;
- } else
+ } else {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Updating existing last-hop neighbor of originator\n");
+ }
rcu_read_unlock();
neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
@@ -1081,7 +1112,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
* won't consider it either
*/
if (router_ifinfo &&
- (neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg)) {
+ neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg) {
orig_node_tmp = router->orig_node;
spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
if_num = router->if_incoming->if_num;
@@ -1356,8 +1387,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
out:
spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
batadv_orig_node_free_ref(orig_node);
- if (orig_ifinfo)
- batadv_orig_ifinfo_free_ref(orig_ifinfo);
+ batadv_orig_ifinfo_free_ref(orig_ifinfo);
return ret;
}
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index e3da07a64..cf68c3283 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
@@ -15,10 +15,10 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
#include "bitarray.h"
+#include "main.h"
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
/* shift the packet array by n places. */
static void batadv_bitmap_shift_left(unsigned long *seq_bits, int32_t n)
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 2acaafe60..0c2456225 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
@@ -18,6 +18,12 @@
#ifndef _NET_BATMAN_ADV_BITARRAY_H_
#define _NET_BATMAN_ADV_BITARRAY_H_
+#include "main.h"
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+
/* Returns 1 if the corresponding bit in the given seq_bits indicates true
* and curr_seqno is within range of last_seqno. Otherwise returns 0.
*/
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ac4b96ecc..ba0609292 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*
@@ -15,19 +15,41 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
-#include "hash.h"
-#include "hard-interface.h"
-#include "originator.h"
#include "bridge_loop_avoidance.h"
-#include "translation-table.h"
-#include "send.h"
+#include "main.h"
-#include <linux/etherdevice.h>
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
#include <linux/crc16.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
#include <linux/if_arp.h>
-#include <net/arp.h>
+#include <linux/if_ether.h>
#include <linux/if_vlan.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
+#include <net/arp.h>
+
+#include "hard-interface.h"
+#include "hash.h"
+#include "originator.h"
+#include "packet.h"
+#include "translation-table.h"
static const uint8_t batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05};
@@ -42,12 +64,8 @@ static inline uint32_t batadv_choose_claim(const void *data, uint32_t size)
struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
uint32_t hash = 0;
- hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr));
- hash = batadv_hash_bytes(hash, &claim->vid, sizeof(claim->vid));
-
- hash += (hash << 3);
- hash ^= (hash >> 11);
- hash += (hash << 15);
+ hash = jhash(&claim->addr, sizeof(claim->addr), hash);
+ hash = jhash(&claim->vid, sizeof(claim->vid), hash);
return hash % size;
}
@@ -59,12 +77,8 @@ static inline uint32_t batadv_choose_backbone_gw(const void *data,
const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
uint32_t hash = 0;
- hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr));
- hash = batadv_hash_bytes(hash, &claim->vid, sizeof(claim->vid));
-
- hash += (hash << 3);
- hash ^= (hash >> 11);
- hash += (hash << 15);
+ hash = jhash(&claim->addr, sizeof(claim->addr), hash);
+ hash = jhash(&claim->vid, sizeof(claim->vid), hash);
return hash % size;
}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 43c985d92..028269038 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*
@@ -18,6 +18,16 @@
#ifndef _NET_BATMAN_ADV_BLA_H_
#define _NET_BATMAN_ADV_BLA_H_
+#include "main.h"
+
+#include <linux/types.h>
+
+struct batadv_hard_iface;
+struct batadv_orig_node;
+struct batadv_priv;
+struct seq_file;
+struct sk_buff;
+
#ifdef CONFIG_BATMAN_ADV_BLA
int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid, bool is_bcast);
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index a4972874c..c4c1e8030 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -15,21 +15,42 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "debugfs.h"
#include "main.h"
+#include <linux/compiler.h>
#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/fcntl.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/poll.h>
+#include <linux/printk.h>
+#include <linux/sched.h> /* for linux/wait.h */
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/stddef.h>
+#include <linux/stringify.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+#include <stdarg.h>
-#include "debugfs.h"
-#include "translation-table.h"
-#include "originator.h"
-#include "hard-interface.h"
-#include "gateway_common.h"
-#include "gateway_client.h"
-#include "soft-interface.h"
-#include "icmp_socket.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
+#include "gateway_client.h"
+#include "icmp_socket.h"
#include "network-coding.h"
+#include "originator.h"
+#include "translation-table.h"
static struct dentry *batadv_debugfs;
@@ -482,11 +503,7 @@ rem_attr:
debugfs_remove_recursive(hard_iface->debug_dir);
hard_iface->debug_dir = NULL;
out:
-#ifdef CONFIG_DEBUG_FS
return -ENOMEM;
-#else
- return 0;
-#endif /* CONFIG_DEBUG_FS */
}
/**
@@ -541,11 +558,7 @@ rem_attr:
debugfs_remove_recursive(bat_priv->debug_dir);
bat_priv->debug_dir = NULL;
out:
-#ifdef CONFIG_DEBUG_FS
return -ENOMEM;
-#else
- return 0;
-#endif /* CONFIG_DEBUG_FS */
}
void batadv_debugfs_del_meshif(struct net_device *dev)
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 37c4d6ddd..187acdc85 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -18,8 +18,17 @@
#ifndef _NET_BATMAN_ADV_DEBUGFS_H_
#define _NET_BATMAN_ADV_DEBUGFS_H_
+#include "main.h"
+
+#include <linux/kconfig.h>
+
+struct batadv_hard_iface;
+struct net_device;
+
#define BATADV_DEBUGFS_SUBDIR "batman_adv"
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
void batadv_debugfs_init(void);
void batadv_debugfs_destroy(void);
int batadv_debugfs_add_meshif(struct net_device *dev);
@@ -27,4 +36,36 @@ void batadv_debugfs_del_meshif(struct net_device *dev);
int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface);
void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface);
+#else
+
+static inline void batadv_debugfs_init(void)
+{
+}
+
+static inline void batadv_debugfs_destroy(void)
+{
+}
+
+static inline int batadv_debugfs_add_meshif(struct net_device *dev)
+{
+ return 0;
+}
+
+static inline void batadv_debugfs_del_meshif(struct net_device *dev)
+{
+}
+
+static inline
+int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
+{
+ return 0;
+}
+
+static inline
+void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
+{
+}
+
+#endif
+
#endif /* _NET_BATMAN_ADV_DEBUGFS_H_ */
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index aad022dd1..6d0b471ee 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
@@ -15,18 +15,36 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include <linux/if_ether.h>
+#include "distributed-arp-table.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
#include <linux/if_arp.h>
+#include <linux/if_ether.h>
#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
#include <net/arp.h>
-#include "main.h"
-#include "hash.h"
-#include "distributed-arp-table.h"
#include "hard-interface.h"
+#include "hash.h"
#include "originator.h"
#include "send.h"
-#include "types.h"
#include "translation-table.h"
static void batadv_dat_purge(struct work_struct *work);
@@ -206,9 +224,22 @@ static uint32_t batadv_hash_dat(const void *data, uint32_t size)
{
uint32_t hash = 0;
const struct batadv_dat_entry *dat = data;
+ const unsigned char *key;
+ uint32_t i;
- hash = batadv_hash_bytes(hash, &dat->ip, sizeof(dat->ip));
- hash = batadv_hash_bytes(hash, &dat->vid, sizeof(dat->vid));
+ key = (const unsigned char *)&dat->ip;
+ for (i = 0; i < sizeof(dat->ip); i++) {
+ hash += key[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ key = (const unsigned char *)&dat->vid;
+ for (i = 0; i < sizeof(dat->vid); i++) {
+ hash += key[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
hash += (hash << 3);
hash ^= (hash >> 11);
@@ -1107,6 +1138,9 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to check
* @hdr_size: size of the encapsulation header
+ *
+ * Returns true if the packet was snooped and consumed by DAT. False if the
+ * packet has to be delivered to the interface
*/
bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
@@ -1114,7 +1148,7 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
uint16_t type;
__be32 ip_src, ip_dst;
uint8_t *hw_src, *hw_dst;
- bool ret = false;
+ bool dropped = false;
unsigned short vid;
if (!atomic_read(&bat_priv->distributed_arp_table))
@@ -1143,12 +1177,17 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
/* if this REPLY is directed to a client of mine, let's deliver the
* packet to the interface
*/
- ret = !batadv_is_my_client(bat_priv, hw_dst, vid);
+ dropped = !batadv_is_my_client(bat_priv, hw_dst, vid);
+
+ /* if this REPLY is sent on behalf of a client of mine, let's drop the
+ * packet because the client will reply by itself
+ */
+ dropped |= batadv_is_my_client(bat_priv, hw_src, vid);
out:
- if (ret)
+ if (dropped)
kfree_skb(skb);
- /* if ret == false -> packet has to be delivered to the interface */
- return ret;
+ /* if dropped == false -> deliver to the interface */
+ return dropped;
}
/**
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 2fe0764c6..3181507eb 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
@@ -18,12 +18,19 @@
#ifndef _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_
#define _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_
-#ifdef CONFIG_BATMAN_ADV_DAT
+#include "main.h"
+
+#include <linux/compiler.h>
+#include <linux/netdevice.h>
+#include <linux/types.h>
-#include "types.h"
#include "originator.h"
+#include "packet.h"
-#include <linux/if_arp.h>
+struct seq_file;
+struct sk_buff;
+
+#ifdef CONFIG_BATMAN_ADV_DAT
/* BATADV_DAT_ADDR_MAX - maximum address value in the DHT space */
#define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0)
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 3d1dcaa3e..c0f0d01ab 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*
@@ -15,12 +15,28 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
#include "fragmentation.h"
-#include "send.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#include "hard-interface.h"
#include "originator.h"
+#include "packet.h"
#include "routing.h"
-#include "hard-interface.h"
+#include "send.h"
#include "soft-interface.h"
/**
@@ -161,6 +177,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
hlist_add_head(&frag_entry_new->list, &chain->head);
chain->size = skb->len - hdr_size;
chain->timestamp = jiffies;
+ chain->total_size = ntohs(frag_packet->total_size);
ret = true;
goto out;
}
@@ -195,9 +212,11 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
out:
if (chain->size > batadv_frag_size_limit() ||
- ntohs(frag_packet->total_size) > batadv_frag_size_limit()) {
+ chain->total_size != ntohs(frag_packet->total_size) ||
+ chain->total_size > batadv_frag_size_limit()) {
/* Clear chain if total size of either the list or the packet
- * exceeds the maximum size of one merged packet.
+ * exceeds the maximum size of one merged packet. Don't allow
+ * packets to have different total_size.
*/
batadv_frag_clear_chain(&chain->head);
chain->size = 0;
@@ -228,19 +247,13 @@ err:
* Returns the merged skb or NULL on error.
*/
static struct sk_buff *
-batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb)
+batadv_frag_merge_packets(struct hlist_head *chain)
{
struct batadv_frag_packet *packet;
struct batadv_frag_list_entry *entry;
struct sk_buff *skb_out = NULL;
int size, hdr_size = sizeof(struct batadv_frag_packet);
- /* Make sure incoming skb has non-bogus data. */
- packet = (struct batadv_frag_packet *)skb->data;
- size = ntohs(packet->total_size);
- if (size > batadv_frag_size_limit())
- goto free;
-
/* Remove first entry, as this is the destination for the rest of the
* fragments.
*/
@@ -249,6 +262,9 @@ batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb)
skb_out = entry->skb;
kfree(entry);
+ packet = (struct batadv_frag_packet *)skb_out->data;
+ size = ntohs(packet->total_size);
+
/* Make room for the rest of the fragments. */
if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
kfree_skb(skb_out);
@@ -304,7 +320,7 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb,
if (hlist_empty(&head))
goto out;
- skb_out = batadv_frag_merge_packets(&head, *skb);
+ skb_out = batadv_frag_merge_packets(&head);
if (!skb_out)
goto out_err;
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index d848cf667..8b9877e70 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*
@@ -18,6 +18,15 @@
#ifndef _NET_BATMAN_ADV_FRAGMENTATION_H_
#define _NET_BATMAN_ADV_FRAGMENTATION_H_
+#include "main.h"
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+struct sk_buff;
+
void batadv_frag_purge_orig(struct batadv_orig_node *orig,
bool (*check_cb)(struct batadv_frag_table_entry *));
bool batadv_frag_skb_fwd(struct sk_buff *skb,
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 090828cf1..cffa92dd9 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -15,18 +15,38 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
-#include "sysfs.h"
#include "gateway_client.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/udp.h>
+
#include "gateway_common.h"
#include "hard-interface.h"
#include "originator.h"
-#include "translation-table.h"
+#include "packet.h"
#include "routing.h"
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/udp.h>
-#include <linux/if_vlan.h>
+#include "sysfs.h"
+#include "translation-table.h"
/* These are the offsets of the "hw type" and "hw address length" in the dhcp
* packet starting at the beginning of the dhcp header
@@ -419,6 +439,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
INIT_HLIST_NODE(&gw_node->list);
gw_node->orig_node = orig_node;
+ gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
+ gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
atomic_set(&gw_node->refcount, 1);
spin_lock_bh(&bat_priv->gw.list_lock);
@@ -733,11 +755,6 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr)))
return BATADV_DHCP_NO;
- /* skb->data might have been reallocated by pskb_may_pull() */
- ethhdr = eth_hdr(skb);
- if (ntohs(ethhdr->h_proto) == ETH_P_8021Q)
- ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN);
-
udphdr = (struct udphdr *)(skb->data + *header_len);
*header_len += sizeof(*udphdr);
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 7ee53bb7d..89565b451 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -18,6 +18,14 @@
#ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
#define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
+#include "main.h"
+
+#include <linux/types.h>
+
+struct batadv_tvlv_gateway_data;
+struct seq_file;
+struct sk_buff;
+
void batadv_gw_check_client_stop(struct batadv_priv *bat_priv);
void batadv_gw_reselect(struct batadv_priv *bat_priv);
void batadv_gw_election(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 88a1bc380..39cf44cce 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -15,9 +15,18 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
#include "gateway_common.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+
#include "gateway_client.h"
+#include "packet.h"
/**
* batadv_parse_gw_bandwidth - parse supplied string buffer to extract download
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index aa5116561..bd5c812ce 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -18,6 +18,13 @@
#ifndef _NET_BATMAN_ADV_GATEWAY_COMMON_H_
#define _NET_BATMAN_ADV_GATEWAY_COMMON_H_
+#include "main.h"
+
+#include <linux/types.h>
+
+struct batadv_priv;
+struct net_device;
+
enum batadv_gw_modes {
BATADV_GW_MODE_OFF,
BATADV_GW_MODE_CLIENT,
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index baf1f9843..f4a15d2e5 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -15,22 +15,36 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
-#include "distributed-arp-table.h"
#include "hard-interface.h"
-#include "soft-interface.h"
-#include "send.h"
-#include "translation-table.h"
-#include "routing.h"
-#include "sysfs.h"
-#include "debugfs.h"
-#include "originator.h"
-#include "hash.h"
-#include "bridge_loop_avoidance.h"
-#include "gateway_client.h"
+#include "main.h"
+#include <linux/bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
+#include <linux/if.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <net/net_namespace.h>
+
+#include "bridge_loop_avoidance.h"
+#include "debugfs.h"
+#include "distributed-arp-table.h"
+#include "gateway_client.h"
+#include "originator.h"
+#include "packet.h"
+#include "send.h"
+#include "soft-interface.h"
+#include "sysfs.h"
+#include "translation-table.h"
void batadv_hardif_free_rcu(struct rcu_head *rcu)
{
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 1918cd50b..5a3142051 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -18,6 +18,17 @@
#ifndef _NET_BATMAN_ADV_HARD_INTERFACE_H_
#define _NET_BATMAN_ADV_HARD_INTERFACE_H_
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+struct net_device;
+
enum batadv_hard_if_state {
BATADV_IF_NOT_IN_USE,
BATADV_IF_TO_BE_REMOVED,
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 7c1c63080..e89f3146b 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
@@ -15,8 +15,12 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
#include "hash.h"
+#include "main.h"
+
+#include <linux/fs.h>
+#include <linux/lockdep.h>
+#include <linux/slab.h>
/* clears the hash */
static void batadv_hash_init(struct batadv_hashtable *hash)
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 539fc1266..5065f50c9 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
@@ -18,7 +18,16 @@
#ifndef _NET_BATMAN_ADV_HASH_H_
#define _NET_BATMAN_ADV_HASH_H_
+#include "main.h"
+
+#include <linux/compiler.h>
#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+struct lock_class_key;
/* callback to a compare function. should compare 2 element datas for their
* keys, return 0 if same and not 0 if not same
@@ -80,28 +89,6 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash,
}
/**
- * batadv_hash_bytes - hash some bytes and add them to the previous hash
- * @hash: previous hash value
- * @data: data to be hashed
- * @size: number of bytes to be hashed
- *
- * Returns the new hash value.
- */
-static inline uint32_t batadv_hash_bytes(uint32_t hash, const void *data,
- uint32_t size)
-{
- const unsigned char *key = data;
- int i;
-
- for (i = 0; i < size; i++) {
- hash += key[i];
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
- return hash;
-}
-
-/**
* batadv_hash_add - adds data to the hashtable
* @hash: storage hash table
* @compare: callback to determine if 2 hash elements are identical
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 161ef8f17..07061bcba 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -15,14 +15,39 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "icmp_socket.h"
#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/export.h>
+#include <linux/fcntl.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <linux/poll.h>
+#include <linux/printk.h>
+#include <linux/sched.h> /* for linux/wait.h */
+#include <linux/skbuff.h>
#include <linux/slab.h>
-#include "icmp_socket.h"
-#include "send.h"
-#include "hash.h"
-#include "originator.h"
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+
#include "hard-interface.h"
+#include "originator.h"
+#include "packet.h"
+#include "send.h"
static struct batadv_socket_client *batadv_socket_client_hash[256];
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 0c33950aa..7de7fce4b 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -18,6 +18,13 @@
#ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_
#define _NET_BATMAN_ADV_ICMP_SOCKET_H_
+#include "main.h"
+
+#include <linux/types.h>
+
+struct batadv_icmp_header;
+struct batadv_priv;
+
#define BATADV_ICMP_SOCKET "socket"
void batadv_socket_init(void);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 12fc77bef..8457097f1 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -15,31 +15,53 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/byteorder/generic.h>
#include <linux/crc32c.h>
-#include <linux/highmem.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
#include <linux/if_vlan.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
#include <net/dsfield.h>
-#include "main.h"
-#include "sysfs.h"
+#include <net/rtnetlink.h>
+
+#include "bat_algo.h"
+#include "bridge_loop_avoidance.h"
#include "debugfs.h"
+#include "distributed-arp-table.h"
+#include "gateway_client.h"
+#include "gateway_common.h"
+#include "hard-interface.h"
+#include "icmp_socket.h"
+#include "multicast.h"
+#include "network-coding.h"
+#include "originator.h"
+#include "packet.h"
#include "routing.h"
#include "send.h"
-#include "originator.h"
#include "soft-interface.h"
-#include "icmp_socket.h"
#include "translation-table.h"
-#include "hard-interface.h"
-#include "gateway_client.h"
-#include "bridge_loop_avoidance.h"
-#include "distributed-arp-table.h"
-#include "multicast.h"
-#include "gateway_common.h"
-#include "hash.h"
-#include "bat_algo.h"
-#include "network-coding.h"
-#include "fragmentation.h"
/* List manipulations on hardif_list have to be rtnl_lock()'ed,
* list traversals just rcu-locked
@@ -209,10 +231,13 @@ void batadv_mesh_free(struct net_device *soft_iface)
* interfaces in the current mesh
* @bat_priv: the bat priv with all the soft interface information
* @addr: the address to check
+ *
+ * Returns 'true' if the mac address was found, false otherwise.
*/
-int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr)
+bool batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr)
{
const struct batadv_hard_iface *hard_iface;
+ bool is_my_mac = false;
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
@@ -223,12 +248,12 @@ int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr)
continue;
if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) {
- rcu_read_unlock();
- return 1;
+ is_my_mac = true;
+ break;
}
}
rcu_read_unlock();
- return 0;
+ return is_my_mac;
}
/**
@@ -510,14 +535,12 @@ static struct batadv_algo_ops *batadv_algo_get(char *name)
int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
{
struct batadv_algo_ops *bat_algo_ops_tmp;
- int ret;
bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name);
if (bat_algo_ops_tmp) {
pr_info("Trying to register already registered routing algorithm: %s\n",
bat_algo_ops->name);
- ret = -EEXIST;
- goto out;
+ return -EEXIST;
}
/* all algorithms must implement all ops (for now) */
@@ -531,32 +554,26 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
!bat_algo_ops->bat_neigh_is_equiv_or_better) {
pr_info("Routing algo '%s' does not implement required ops\n",
bat_algo_ops->name);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
INIT_HLIST_NODE(&bat_algo_ops->list);
hlist_add_head(&bat_algo_ops->list, &batadv_algo_list);
- ret = 0;
-out:
- return ret;
+ return 0;
}
int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
{
struct batadv_algo_ops *bat_algo_ops;
- int ret = -EINVAL;
bat_algo_ops = batadv_algo_get(name);
if (!bat_algo_ops)
- goto out;
+ return -EINVAL;
bat_priv->bat_algo_ops = bat_algo_ops;
- ret = 0;
-out:
- return ret;
+ return 0;
}
int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
@@ -819,15 +836,15 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC);
/* keep old buffer if kmalloc should fail */
- if (new_buff) {
- memcpy(new_buff, *packet_buff, min_packet_len);
- kfree(*packet_buff);
- *packet_buff = new_buff;
- *packet_buff_len = min_packet_len + additional_packet_len;
- return true;
- }
+ if (!new_buff)
+ return false;
+
+ memcpy(new_buff, *packet_buff, min_packet_len);
+ kfree(*packet_buff);
+ *packet_buff = new_buff;
+ *packet_buff_len = min_packet_len + additional_packet_len;
- return false;
+ return true;
}
/**
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 4d2318829..41d27c787 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2015.0"
+#define BATADV_SOURCE_VERSION "2015.1"
#endif
/* B.A.T.M.A.N. parameters */
@@ -44,7 +44,7 @@
#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */
#define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */
#define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */
-#define BATADV_DAT_ENTRY_TIMEOUT (5*60000) /* 5 mins in milliseconds */
+#define BATADV_DAT_ENTRY_TIMEOUT (5 * 60000) /* 5 mins in milliseconds */
/* sliding packet range of received originator messages in sequence numbers
* (should be a multiple of our word size)
*/
@@ -163,28 +163,26 @@ enum batadv_uev_type {
/* Kernel headers */
-#include <linux/mutex.h> /* mutex */
-#include <linux/module.h> /* needed by all modules */
-#include <linux/netdevice.h> /* netdevice */
-#include <linux/etherdevice.h> /* ethernet address classification */
-#include <linux/if_ether.h> /* ethernet header */
-#include <linux/poll.h> /* poll_table */
-#include <linux/kthread.h> /* kernel threads */
-#include <linux/pkt_sched.h> /* schedule types */
-#include <linux/workqueue.h> /* workqueue */
+#include <linux/atomic.h>
+#include <linux/bitops.h> /* for packet.h */
+#include <linux/compiler.h>
+#include <linux/cpumask.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h> /* for packet.h */
+#include <linux/netdevice.h>
+#include <linux/printk.h>
+#include <linux/types.h>
#include <linux/percpu.h>
-#include <linux/slab.h>
-#include <net/sock.h> /* struct sock */
-#include <net/addrconf.h> /* ipv6 address stuff */
-#include <linux/ip.h>
-#include <net/rtnetlink.h>
#include <linux/jiffies.h>
-#include <linux/seq_file.h>
#include <linux/if_vlan.h>
#include "types.h"
-#define BATADV_PRINT_VID(vid) (vid & BATADV_VLAN_HAS_TAG ? \
+struct batadv_ogm_packet;
+struct seq_file;
+struct sk_buff;
+
+#define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \
(int)(vid & VLAN_VID_MASK) : -1)
extern char batadv_routing_algo[];
@@ -195,7 +193,7 @@ extern struct workqueue_struct *batadv_event_workqueue;
int batadv_mesh_init(struct net_device *soft_iface);
void batadv_mesh_free(struct net_device *soft_iface);
-int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
+bool batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
struct batadv_hard_iface *
batadv_seq_print_text_primary_if_get(struct seq_file *seq);
int batadv_max_header_len(void);
@@ -279,7 +277,7 @@ static inline void _batadv_dbg(int type __always_unused,
*
* note: can't use ether_addr_equal() as it requires aligned memory
*/
-static inline int batadv_compare_eth(const void *data1, const void *data2)
+static inline bool batadv_compare_eth(const void *data1, const void *data2)
{
return ether_addr_equal_unaligned(data1, data2);
}
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index b24e4bb64..7aa480b7e 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*
@@ -15,10 +15,33 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
#include "multicast.h"
-#include "originator.h"
-#include "hard-interface.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+
+#include "packet.h"
#include "translation-table.h"
/**
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 3a44ebdb4..beb6e56c6 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*
@@ -18,6 +18,12 @@
#ifndef _NET_BATMAN_ADV_MULTICAST_H_
#define _NET_BATMAN_ADV_MULTICAST_H_
+#include "main.h"
+
+struct batadv_orig_node;
+struct batadv_priv;
+struct sk_buff;
+
/**
* batadv_forw_mode - the way a packet should be forwarded as
* @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 127cc4d73..f0a50f31d 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*
@@ -15,15 +15,44 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "network-coding.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/init.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
-#include "main.h"
+#include "hard-interface.h"
#include "hash.h"
-#include "network-coding.h"
-#include "send.h"
#include "originator.h"
-#include "hard-interface.h"
+#include "packet.h"
#include "routing.h"
+#include "send.h"
static struct lock_class_key batadv_nc_coding_hash_lock_class_key;
static struct lock_class_key batadv_nc_decoding_hash_lock_class_key;
@@ -155,7 +184,7 @@ err:
*/
void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
{
- atomic_set(&bat_priv->network_coding, 1);
+ atomic_set(&bat_priv->network_coding, 0);
bat_priv->nc.min_tq = 200;
bat_priv->nc.max_fwd_delay = 10;
bat_priv->nc.max_buffer_time = 200;
@@ -275,7 +304,7 @@ static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv,
* max_buffer time
*/
return batadv_has_timed_out(nc_path->last_valid,
- bat_priv->nc.max_buffer_time*10);
+ bat_priv->nc.max_buffer_time * 10);
}
/**
@@ -453,14 +482,8 @@ static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size)
const struct batadv_nc_path *nc_path = data;
uint32_t hash = 0;
- hash = batadv_hash_bytes(hash, &nc_path->prev_hop,
- sizeof(nc_path->prev_hop));
- hash = batadv_hash_bytes(hash, &nc_path->next_hop,
- sizeof(nc_path->next_hop));
-
- hash += (hash << 3);
- hash ^= (hash >> 11);
- hash += (hash << 15);
+ hash = jhash(&nc_path->prev_hop, sizeof(nc_path->prev_hop), hash);
+ hash = jhash(&nc_path->next_hop, sizeof(nc_path->next_hop), hash);
return hash % size;
}
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 358c0d686..5b79aa8c6 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*
@@ -18,6 +18,19 @@
#ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_
#define _NET_BATMAN_ADV_NETWORK_CODING_H_
+#include "main.h"
+
+#include <linux/types.h>
+
+struct batadv_nc_node;
+struct batadv_neigh_node;
+struct batadv_ogm_packet;
+struct batadv_orig_node;
+struct batadv_priv;
+struct net_device;
+struct seq_file;
+struct sk_buff;
+
#ifdef CONFIG_BATMAN_ADV_NC
void batadv_nc_status_update(struct net_device *net_dev);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 90e805aba..018b7495a 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -15,19 +15,31 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "originator.h"
#include "main.h"
+
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
#include "distributed-arp-table.h"
-#include "originator.h"
-#include "hash.h"
-#include "translation-table.h"
-#include "routing.h"
+#include "fragmentation.h"
#include "gateway_client.h"
#include "hard-interface.h"
-#include "soft-interface.h"
-#include "bridge_loop_avoidance.h"
-#include "network-coding.h"
-#include "fragmentation.h"
+#include "hash.h"
#include "multicast.h"
+#include "network-coding.h"
+#include "routing.h"
+#include "translation-table.h"
/* hash class keys */
static struct lock_class_key batadv_orig_hash_lock_class_key;
@@ -197,13 +209,19 @@ static void batadv_neigh_node_free_rcu(struct rcu_head *rcu)
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
struct batadv_neigh_ifinfo *neigh_ifinfo;
+ struct batadv_algo_ops *bao;
neigh_node = container_of(rcu, struct batadv_neigh_node, rcu);
+ bao = neigh_node->orig_node->bat_priv->bat_algo_ops;
hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
&neigh_node->ifinfo_list, list) {
batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo);
}
+
+ if (bao->bat_neigh_free)
+ bao->bat_neigh_free(neigh_node);
+
batadv_hardif_free_ref_now(neigh_node->if_incoming);
kfree(neigh_node);
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index aa4a43696..79734d302 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -18,8 +18,21 @@
#ifndef _NET_BATMAN_ADV_ORIGINATOR_H_
#define _NET_BATMAN_ADV_ORIGINATOR_H_
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/if_ether.h>
+#include <linux/jhash.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+
#include "hash.h"
+struct seq_file;
+
int batadv_compare_orig(const struct hlist_node *node, const void *data2);
int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
@@ -75,20 +88,9 @@ void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan);
*/
static inline uint32_t batadv_choose_orig(const void *data, uint32_t size)
{
- const unsigned char *key = data;
uint32_t hash = 0;
- size_t i;
-
- for (i = 0; i < 6; i++) {
- hash += key[i];
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
-
- hash += (hash << 3);
- hash ^= (hash >> 11);
- hash += (hash << 15);
+ hash = jhash(data, ETH_ALEN, hash);
return hash % size;
}
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index b81fbbf21..9e747c08d 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -18,6 +18,9 @@
#ifndef _NET_BATMAN_ADV_PACKET_H_
#define _NET_BATMAN_ADV_PACKET_H_
+#include <asm/byteorder.h>
+#include <linux/types.h>
+
/**
* enum batadv_packettype - types for batman-adv encapsulated packets
* @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index da83982bf..c360c0cd1 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -15,20 +15,36 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
#include "routing.h"
-#include "send.h"
-#include "soft-interface.h"
-#include "hard-interface.h"
-#include "icmp_socket.h"
-#include "translation-table.h"
-#include "originator.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/jiffies.h>
+#include <linux/netdevice.h>
+#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+
+#include "bitarray.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
-#include "network-coding.h"
#include "fragmentation.h"
-
-#include <linux/if_vlan.h>
+#include "hard-interface.h"
+#include "icmp_socket.h"
+#include "network-coding.h"
+#include "originator.h"
+#include "packet.h"
+#include "send.h"
+#include "soft-interface.h"
+#include "translation-table.h"
static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 557d3d12a..6bc29d33a 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -18,6 +18,16 @@
#ifndef _NET_BATMAN_ADV_ROUTING_H_
#define _NET_BATMAN_ADV_ROUTING_H_
+#include "main.h"
+
+#include <linux/types.h>
+
+struct batadv_hard_iface;
+struct batadv_neigh_node;
+struct batadv_orig_node;
+struct batadv_priv;
+struct sk_buff;
+
bool batadv_check_management_packet(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface,
int header_len);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 3d64ed20c..0a01992e6 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -15,19 +15,37 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "send.h"
#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/if.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/workqueue.h>
+
#include "distributed-arp-table.h"
-#include "send.h"
-#include "routing.h"
-#include "translation-table.h"
-#include "soft-interface.h"
-#include "hard-interface.h"
-#include "gateway_common.h"
+#include "fragmentation.h"
#include "gateway_client.h"
-#include "originator.h"
+#include "hard-interface.h"
#include "network-coding.h"
-#include "fragmentation.h"
-#include "multicast.h"
+#include "originator.h"
+#include "routing.h"
+#include "soft-interface.h"
+#include "translation-table.h"
static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
@@ -255,8 +273,8 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
unsigned short vid)
{
- struct ethhdr *ethhdr;
struct batadv_unicast_packet *unicast_packet;
+ struct ethhdr *ethhdr;
int ret = NET_XMIT_DROP;
if (!orig_node)
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 38d0ec183..0536835fe 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -18,6 +18,19 @@
#ifndef _NET_BATMAN_ADV_SEND_H_
#define _NET_BATMAN_ADV_SEND_H_
+#include "main.h"
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "packet.h"
+
+struct batadv_hard_iface;
+struct batadv_orig_node;
+struct batadv_priv;
+struct sk_buff;
+struct work_struct;
+
int batadv_send_skb_packet(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface,
const uint8_t *dst_addr);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 5ec31d7de..a2fc843c2 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -15,26 +15,50 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
#include "soft-interface.h"
-#include "hard-interface.h"
-#include "distributed-arp-table.h"
-#include "routing.h"
-#include "send.h"
-#include "debugfs.h"
-#include "translation-table.h"
-#include "hash.h"
-#include "gateway_common.h"
-#include "gateway_client.h"
-#include "sysfs.h"
-#include "originator.h"
-#include <linux/slab.h>
-#include <linux/ethtool.h>
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
#include <linux/if_vlan.h>
-#include "multicast.h"
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
#include "bridge_loop_avoidance.h"
+#include "debugfs.h"
+#include "distributed-arp-table.h"
+#include "gateway_client.h"
+#include "gateway_common.h"
+#include "hard-interface.h"
+#include "multicast.h"
#include "network-coding.h"
+#include "packet.h"
+#include "send.h"
+#include "sysfs.h"
+#include "translation-table.h"
static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
static void batadv_get_drvinfo(struct net_device *dev,
@@ -105,6 +129,7 @@ static struct net_device_stats *batadv_interface_stats(struct net_device *dev)
static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
+ struct batadv_softif_vlan *vlan;
struct sockaddr *addr = p;
uint8_t old_addr[ETH_ALEN];
@@ -115,12 +140,17 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
ether_addr_copy(dev->dev_addr, addr->sa_data);
/* only modify transtable if it has been initialized before */
- if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) {
- batadv_tt_local_remove(bat_priv, old_addr, BATADV_NO_FLAGS,
+ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+ return 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+ batadv_tt_local_remove(bat_priv, old_addr, vlan->vid,
"mac address changed", false);
- batadv_tt_local_add(dev, addr->sa_data, BATADV_NO_FLAGS,
+ batadv_tt_local_add(dev, addr->sa_data, vlan->vid,
BATADV_NULL_IFINDEX, BATADV_NO_MARK);
}
+ rcu_read_unlock();
return 0;
}
@@ -449,6 +479,9 @@ out:
*/
void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan)
{
+ if (!vlan)
+ return;
+
if (atomic_dec_and_test(&vlan->refcount)) {
spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock);
hlist_del_rcu(&vlan->list);
@@ -732,7 +765,7 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->aggregated_ogms, 1);
atomic_set(&bat_priv->bonding, 0);
#ifdef CONFIG_BATMAN_ADV_BLA
- atomic_set(&bat_priv->bridge_loop_avoidance, 0);
+ atomic_set(&bat_priv->bridge_loop_avoidance, 1);
#endif
#ifdef CONFIG_BATMAN_ADV_DAT
atomic_set(&bat_priv->distributed_arp_table, 1);
@@ -818,7 +851,7 @@ static int batadv_softif_slave_add(struct net_device *dev,
int ret = -EINVAL;
hard_iface = batadv_hardif_get_by_netdev(slave_dev);
- if (!hard_iface || hard_iface->soft_iface != NULL)
+ if (!hard_iface || hard_iface->soft_iface)
goto out;
ret = batadv_hardif_enable_interface(hard_iface, dev->name);
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index dbab22fd8..578e8a663 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -18,6 +18,17 @@
#ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_
#define _NET_BATMAN_ADV_SOFT_INTERFACE_H_
+#include "main.h"
+
+#include <net/rtnetlink.h>
+
+struct batadv_hard_iface;
+struct batadv_orig_node;
+struct batadv_priv;
+struct batadv_softif_vlan;
+struct net_device;
+struct sk_buff;
+
int batadv_skb_head_push(struct sk_buff *skb, unsigned int len);
void batadv_interface_rx(struct net_device *soft_iface,
struct sk_buff *skb, struct batadv_hard_iface *recv_if,
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index a75dc12f9..d6a312a82 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -15,16 +15,35 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
#include "sysfs.h"
-#include "translation-table.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+
#include "distributed-arp-table.h"
-#include "network-coding.h"
-#include "originator.h"
+#include "gateway_client.h"
+#include "gateway_common.h"
#include "hard-interface.h"
+#include "network-coding.h"
+#include "packet.h"
#include "soft-interface.h"
-#include "gateway_common.h"
-#include "gateway_client.h"
static struct net_device *batadv_kobj_to_netdev(struct kobject *obj)
{
@@ -151,7 +170,7 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \
static BATADV_ATTR(_name, _mode, batadv_show_##_name, \
batadv_store_##_name)
-#define BATADV_ATTR_SIF_STORE_UINT(_name, _min, _max, _post_func) \
+#define BATADV_ATTR_SIF_STORE_UINT(_name, _var, _min, _max, _post_func) \
ssize_t batadv_store_##_name(struct kobject *kobj, \
struct attribute *attr, char *buff, \
size_t count) \
@@ -161,24 +180,24 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \
\
return __batadv_store_uint_attr(buff, count, _min, _max, \
_post_func, attr, \
- &bat_priv->_name, net_dev); \
+ &bat_priv->_var, net_dev); \
}
-#define BATADV_ATTR_SIF_SHOW_UINT(_name) \
+#define BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \
ssize_t batadv_show_##_name(struct kobject *kobj, \
struct attribute *attr, char *buff) \
{ \
struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \
\
- return sprintf(buff, "%i\n", atomic_read(&bat_priv->_name)); \
+ return sprintf(buff, "%i\n", atomic_read(&bat_priv->_var)); \
} \
/* Use this, if you are going to set [name] in the soft-interface
* (bat_priv) to an unsigned integer value
*/
-#define BATADV_ATTR_SIF_UINT(_name, _mode, _min, _max, _post_func) \
- static BATADV_ATTR_SIF_STORE_UINT(_name, _min, _max, _post_func)\
- static BATADV_ATTR_SIF_SHOW_UINT(_name) \
+#define BATADV_ATTR_SIF_UINT(_name, _var, _mode, _min, _max, _post_func)\
+ static BATADV_ATTR_SIF_STORE_UINT(_name, _var, _min, _max, _post_func)\
+ static BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \
static BATADV_ATTR(_name, _mode, batadv_show_##_name, \
batadv_store_##_name)
@@ -540,19 +559,20 @@ BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu);
static BATADV_ATTR(routing_algo, S_IRUGO, batadv_show_bat_algo, NULL);
static BATADV_ATTR(gw_mode, S_IRUGO | S_IWUSR, batadv_show_gw_mode,
batadv_store_gw_mode);
-BATADV_ATTR_SIF_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * BATADV_JITTER,
- INT_MAX, NULL);
-BATADV_ATTR_SIF_UINT(hop_penalty, S_IRUGO | S_IWUSR, 0, BATADV_TQ_MAX_VALUE,
- NULL);
-BATADV_ATTR_SIF_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, BATADV_TQ_MAX_VALUE,
- batadv_post_gw_reselect);
+BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR,
+ 2 * BATADV_JITTER, INT_MAX, NULL);
+BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
+ BATADV_TQ_MAX_VALUE, NULL);
+BATADV_ATTR_SIF_UINT(gw_sel_class, gw_sel_class, S_IRUGO | S_IWUSR, 1,
+ BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect);
static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
batadv_store_gw_bwidth);
#ifdef CONFIG_BATMAN_ADV_MCAST
BATADV_ATTR_SIF_BOOL(multicast_mode, S_IRUGO | S_IWUSR, NULL);
#endif
#ifdef CONFIG_BATMAN_ADV_DEBUG
-BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL);
+BATADV_ATTR_SIF_UINT(log_level, log_level, S_IRUGO | S_IWUSR, 0,
+ BATADV_DBG_ALL, NULL);
#endif
#ifdef CONFIG_BATMAN_ADV_NC
BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR,
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index b715b60db..2294583f7 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
@@ -18,6 +18,16 @@
#ifndef _NET_BATMAN_ADV_SYSFS_H_
#define _NET_BATMAN_ADV_SYSFS_H_
+#include "main.h"
+
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+struct batadv_priv;
+struct batadv_softif_vlan;
+struct kobject;
+struct net_device;
+
#define BATADV_SYSFS_IF_MESH_SUBDIR "mesh"
#define BATADV_SYSFS_IF_BAT_SUBDIR "batman_adv"
/**
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 07b263a43..5809b39c1 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*
@@ -15,18 +15,41 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
#include "translation-table.h"
-#include "soft-interface.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
+#include <linux/crc32c.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
+#include <net/net_namespace.h>
+
+#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
-#include "send.h"
#include "hash.h"
-#include "originator.h"
-#include "routing.h"
-#include "bridge_loop_avoidance.h"
#include "multicast.h"
-
-#include <linux/crc32c.h>
+#include "originator.h"
+#include "packet.h"
+#include "soft-interface.h"
/* hash class keys */
static struct lock_class_key batadv_tt_local_hash_lock_class_key;
@@ -67,12 +90,8 @@ static inline uint32_t batadv_choose_tt(const void *data, uint32_t size)
uint32_t hash = 0;
tt = (struct batadv_tt_common_entry *)data;
- hash = batadv_hash_bytes(hash, &tt->addr, ETH_ALEN);
- hash = batadv_hash_bytes(hash, &tt->vid, sizeof(tt->vid));
-
- hash += (hash << 3);
- hash ^= (hash >> 11);
- hash += (hash << 15);
+ hash = jhash(&tt->addr, ETH_ALEN, hash);
+ hash = jhash(&tt->vid, sizeof(tt->vid), hash);
return hash % size;
}
@@ -575,6 +594,12 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
/* increase the refcounter of the related vlan */
vlan = batadv_softif_vlan_get(bat_priv, vid);
+ if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d",
+ addr, BATADV_PRINT_VID(vid))) {
+ kfree(tt_local);
+ tt_local = NULL;
+ goto out;
+ }
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n",
@@ -954,17 +979,17 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
" * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n",
tt_common_entry->addr,
BATADV_PRINT_VID(tt_common_entry->vid),
- (tt_common_entry->flags &
- BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
+ ((tt_common_entry->flags &
+ BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
no_purge ? 'P' : '.',
- (tt_common_entry->flags &
- BATADV_TT_CLIENT_NEW ? 'N' : '.'),
- (tt_common_entry->flags &
- BATADV_TT_CLIENT_PENDING ? 'X' : '.'),
- (tt_common_entry->flags &
- BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
- (tt_common_entry->flags &
- BATADV_TT_CLIENT_ISOLA ? 'I' : '.'),
+ ((tt_common_entry->flags &
+ BATADV_TT_CLIENT_NEW) ? 'N' : '.'),
+ ((tt_common_entry->flags &
+ BATADV_TT_CLIENT_PENDING) ? 'X' : '.'),
+ ((tt_common_entry->flags &
+ BATADV_TT_CLIENT_WIFI) ? 'W' : '.'),
+ ((tt_common_entry->flags &
+ BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
no_purge ? 0 : last_seen_secs,
no_purge ? 0 : last_seen_msecs,
vlan->tt.crc);
@@ -1015,6 +1040,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
struct batadv_tt_local_entry *tt_local_entry;
uint16_t flags, curr_flags = BATADV_NO_FLAGS;
struct batadv_softif_vlan *vlan;
+ void *tt_entry_exists;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!tt_local_entry)
@@ -1042,11 +1068,22 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
* immediately purge it
*/
batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL);
- hlist_del_rcu(&tt_local_entry->common.hash_entry);
+
+ tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash,
+ batadv_compare_tt,
+ batadv_choose_tt,
+ &tt_local_entry->common);
+ if (!tt_entry_exists)
+ goto out;
+
+ /* extra call to free the local tt entry */
batadv_tt_local_entry_free_ref(tt_local_entry);
/* decrease the reference held for this vlan */
vlan = batadv_softif_vlan_get(bat_priv, vid);
+ if (!vlan)
+ goto out;
+
batadv_softif_vlan_free_ref(vlan);
batadv_softif_vlan_free_ref(vlan);
@@ -1147,8 +1184,10 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
/* decrease the reference held for this vlan */
vlan = batadv_softif_vlan_get(bat_priv,
tt_common_entry->vid);
- batadv_softif_vlan_free_ref(vlan);
- batadv_softif_vlan_free_ref(vlan);
+ if (vlan) {
+ batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
+ }
batadv_tt_local_entry_free_ref(tt_local);
}
@@ -1528,10 +1567,10 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
BATADV_PRINT_VID(tt_global_entry->common.vid),
best_entry->ttvn, best_entry->orig_node->orig,
last_ttvn, vlan->tt.crc,
- (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
- (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
- (flags & BATADV_TT_CLIENT_ISOLA ? 'I' : '.'),
- (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.'));
+ ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
+ ((flags & BATADV_TT_CLIENT_WIFI) ? 'W' : '.'),
+ ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
+ ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.'));
batadv_orig_node_vlan_free_ref(vlan);
}
@@ -1560,10 +1599,10 @@ print_list:
BATADV_PRINT_VID(tt_global_entry->common.vid),
orig_entry->ttvn, orig_entry->orig_node->orig,
last_ttvn, vlan->tt.crc,
- (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
- (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
- (flags & BATADV_TT_CLIENT_ISOLA ? 'I' : '.'),
- (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.'));
+ ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
+ ((flags & BATADV_TT_CLIENT_WIFI) ? 'W' : '.'),
+ ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
+ ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.'));
batadv_orig_node_vlan_free_ref(vlan);
}
@@ -2529,7 +2568,7 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_REQUEST from %pM for ttvn: %u (%pM) [%c]\n",
req_src, tt_data->ttvn, req_dst,
- (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
+ ((tt_data->flags & BATADV_TT_FULL_TABLE) ? 'F' : '.'));
/* Let's get the orig node of the REAL destination */
req_dst_orig_node = batadv_orig_hash_find(bat_priv, req_dst);
@@ -2660,7 +2699,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_REQUEST from %pM for ttvn: %u (me) [%c]\n",
req_src, tt_data->ttvn,
- (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
+ ((tt_data->flags & BATADV_TT_FULL_TABLE) ? 'F' : '.'));
spin_lock_bh(&bat_priv->tt.commit_lock);
@@ -2899,7 +2938,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n",
resp_src, tt_data->ttvn, num_entries,
- (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
+ ((tt_data->flags & BATADV_TT_FULL_TABLE) ? 'F' : '.'));
orig_node = batadv_orig_hash_find(bat_priv, resp_src);
if (!orig_node)
@@ -3188,8 +3227,10 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
/* decrease the reference held for this vlan */
vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
- batadv_softif_vlan_free_ref(vlan);
- batadv_softif_vlan_free_ref(vlan);
+ if (vlan) {
+ batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
+ }
batadv_tt_local_entry_free_ref(tt_local);
}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index ad84d7b89..6acc25d3a 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*
@@ -18,6 +18,15 @@
#ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
#define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
+#include "main.h"
+
+#include <linux/types.h>
+
+struct batadv_orig_node;
+struct batadv_priv;
+struct net_device;
+struct seq_file;
+
int batadv_tt_init(struct batadv_priv *bat_priv);
bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
unsigned short vid, int ifindex, uint32_t mark);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 9398c3fb4..67d634836 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -18,9 +18,23 @@
#ifndef _NET_BATMAN_ADV_TYPES_H_
#define _NET_BATMAN_ADV_TYPES_H_
+#ifndef _NET_BATMAN_ADV_MAIN_H_
+#error only "main.h" can be included directly
+#endif
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h> /* for linux/wait.h */
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
#include "packet.h"
-#include "bitarray.h"
-#include <linux/kernel.h>
+
+struct seq_file;
#ifdef CONFIG_BATMAN_ADV_DAT
@@ -132,6 +146,7 @@ struct batadv_orig_ifinfo {
* @timestamp: time (jiffie) of last received fragment
* @seqno: sequence number of the fragments in the list
* @size: accumulated size of packets in list
+ * @total_size: expected size of the assembled packet
*/
struct batadv_frag_table_entry {
struct hlist_head head;
@@ -139,6 +154,7 @@ struct batadv_frag_table_entry {
unsigned long timestamp;
uint16_t seqno;
uint16_t size;
+ uint16_t total_size;
};
/**
@@ -181,9 +197,10 @@ struct batadv_orig_node_vlan {
/**
* struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members
- * @bcast_own: bitfield containing the number of our OGMs this orig_node
- * rebroadcasted "back" to us (relative to last_real_seqno)
- * @bcast_own_sum: counted result of bcast_own
+ * @bcast_own: set of bitfields (one per hard interface) where each one counts
+ * the number of our OGMs this orig_node rebroadcasted "back" to us (relative
+ * to last_real_seqno). Every bitfield is BATADV_TQ_LOCAL_WINDOW_SIZE bits long.
+ * @bcast_own_sum: sum of bcast_own
* @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
* neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count
*/
@@ -1118,6 +1135,8 @@ struct batadv_forw_packet {
* @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better
* than neigh2 for their respective outgoing interface from the metric
* prospective
+ * @bat_neigh_free: free the resources allocated by the routing algorithm for a
+ * neigh_node object
* @bat_orig_print: print the originator table (optional)
* @bat_orig_free: free the resources allocated by the routing algorithm for an
* orig_node object
@@ -1135,6 +1154,7 @@ struct batadv_algo_ops {
void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface);
void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface);
void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
+ /* neigh_node handling API */
int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1,
struct batadv_hard_iface *if_outgoing1,
struct batadv_neigh_node *neigh2,
@@ -1144,6 +1164,7 @@ struct batadv_algo_ops {
struct batadv_hard_iface *if_outgoing1,
struct batadv_neigh_node *neigh2,
struct batadv_hard_iface *if_outgoing2);
+ void (*bat_neigh_free)(struct batadv_neigh_node *neigh);
/* orig_node handling API */
void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq,
struct batadv_hard_iface *hard_iface);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 1742b849f..2fb7b3064 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -192,7 +192,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev,
if (ipv6_addr_any(nexthop))
return NULL;
} else {
- nexthop = rt6_nexthop(rt);
+ nexthop = rt6_nexthop(rt, daddr);
/* We need to remember the address because it is needed
* by bt_xmit() when sending the packet. In bt_xmit(), the
@@ -856,7 +856,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev)
set_dev_addr(netdev, &chan->src, chan->src_type);
netdev->netdev_ops = &netdev_ops;
- SET_NETDEV_DEV(netdev, &chan->conn->hcon->dev);
+ SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev);
SET_NETDEV_DEVTYPE(netdev, &bt_type);
err = register_netdev(netdev);
@@ -928,7 +928,7 @@ static void delete_netdev(struct work_struct *work)
unregister_netdev(entry->netdev);
- /* The entry pointer is deleted in device_event() */
+ /* The entry pointer is deleted by the netdev destructor. */
}
static void chan_close_cb(struct l2cap_chan *chan)
@@ -937,7 +937,7 @@ static void chan_close_cb(struct l2cap_chan *chan)
struct lowpan_dev *dev = NULL;
struct lowpan_peer *peer;
int err = -ENOENT;
- bool last = false, removed = true;
+ bool last = false, remove = true;
BT_DBG("chan %p conn %p", chan, chan->conn);
@@ -948,7 +948,7 @@ static void chan_close_cb(struct l2cap_chan *chan)
/* If conn is set, then the netdev is also there and we should
* not remove it.
*/
- removed = false;
+ remove = false;
}
spin_lock(&devices_lock);
@@ -977,7 +977,7 @@ static void chan_close_cb(struct l2cap_chan *chan)
ifdown(dev->netdev);
- if (!removed) {
+ if (remove) {
INIT_WORK(&entry->delete_netdev, delete_netdev);
schedule_work(&entry->delete_netdev);
}
@@ -1208,8 +1208,6 @@ static void disconnect_all_peers(void)
list_del_rcu(&peer->list);
kfree_rcu(peer, rcu);
-
- module_put(THIS_MODULE);
}
spin_unlock(&devices_lock);
}
@@ -1418,7 +1416,6 @@ static int device_event(struct notifier_block *unused,
BT_DBG("Unregistered netdev %s %p",
netdev->name, netdev);
list_del(&entry->list);
- kfree(entry);
break;
}
}
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 9a8ea232d..29c12ae72 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -12,9 +12,10 @@ obj-$(CONFIG_BT_6LOWPAN) += bluetooth_6lowpan.o
bluetooth_6lowpan-y := 6lowpan.o
bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
- hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \
+ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \
a2mp.o amp.o ecc.o hci_request.o mgmt_util.o
+bluetooth-$(CONFIG_BT_BREDR) += sco.o
bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o
bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index bde2bdd9e..b5116fa98 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -202,7 +202,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index d82787d41..ce86a7bae 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -205,7 +205,7 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index ee5e59839..2c48bf0b5 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -276,7 +276,7 @@ u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
}
void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
- __u8 ltk[16])
+ __u8 ltk[16], __u8 key_size)
{
struct hci_dev *hdev = conn->hdev;
struct hci_cp_le_start_enc cp;
@@ -288,7 +288,7 @@ void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
cp.handle = cpu_to_le16(conn->handle);
cp.rand = rand;
cp.ediv = ediv;
- memcpy(cp.ltk, ltk, sizeof(cp.ltk));
+ memcpy(cp.ltk, ltk, key_size);
hci_send_cmd(hdev, HCI_OP_LE_START_ENC, sizeof(cp), &cp);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c4802f3bd..2f8fb3306 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -94,7 +94,6 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
char buf[32];
size_t buf_size = min(count, (sizeof(buf)-1));
bool enable;
- int err;
if (!test_bit(HCI_UP, &hdev->flags))
return -ENETDOWN;
@@ -121,12 +120,8 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
if (IS_ERR(skb))
return PTR_ERR(skb);
- err = -bt_to_errno(skb->data[0]);
kfree_skb(skb);
- if (err < 0)
- return err;
-
hci_dev_change_flag(hdev, HCI_DUT_MODE);
return count;
@@ -1558,6 +1553,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
BT_DBG("%s %p", hdev->name, hdev);
if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
+ !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
test_bit(HCI_UP, &hdev->flags)) {
/* Execute vendor specific shutdown routine */
if (hdev->shutdown)
@@ -1595,6 +1591,11 @@ static int hci_dev_do_close(struct hci_dev *hdev)
if (hci_dev_test_flag(hdev, HCI_MGMT))
cancel_delayed_work_sync(&hdev->rpa_expired);
+ if (hdev->adv_instance_timeout) {
+ cancel_delayed_work_sync(&hdev->adv_instance_expire);
+ hdev->adv_instance_timeout = 0;
+ }
+
/* Avoid potential lockdep warnings from the *_flush() calls by
* ensuring the workqueue is empty up front.
*/
@@ -2151,6 +2152,17 @@ static void hci_discov_off(struct work_struct *work)
mgmt_discoverable_timeout(hdev);
}
+static void hci_adv_timeout_expire(struct work_struct *work)
+{
+ struct hci_dev *hdev;
+
+ hdev = container_of(work, struct hci_dev, adv_instance_expire.work);
+
+ BT_DBG("%s", hdev->name);
+
+ mgmt_adv_timeout_expired(hdev);
+}
+
void hci_uuids_clear(struct hci_dev *hdev)
{
struct bt_uuid *uuid, *tmp;
@@ -2614,6 +2626,130 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
return 0;
}
+/* This function requires the caller holds hdev->lock */
+struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance)
+{
+ struct adv_info *adv_instance;
+
+ list_for_each_entry(adv_instance, &hdev->adv_instances, list) {
+ if (adv_instance->instance == instance)
+ return adv_instance;
+ }
+
+ return NULL;
+}
+
+/* This function requires the caller holds hdev->lock */
+struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) {
+ struct adv_info *cur_instance;
+
+ cur_instance = hci_find_adv_instance(hdev, instance);
+ if (!cur_instance)
+ return NULL;
+
+ if (cur_instance == list_last_entry(&hdev->adv_instances,
+ struct adv_info, list))
+ return list_first_entry(&hdev->adv_instances,
+ struct adv_info, list);
+ else
+ return list_next_entry(cur_instance, list);
+}
+
+/* This function requires the caller holds hdev->lock */
+int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance)
+{
+ struct adv_info *adv_instance;
+
+ adv_instance = hci_find_adv_instance(hdev, instance);
+ if (!adv_instance)
+ return -ENOENT;
+
+ BT_DBG("%s removing %dMR", hdev->name, instance);
+
+ if (hdev->cur_adv_instance == instance && hdev->adv_instance_timeout) {
+ cancel_delayed_work(&hdev->adv_instance_expire);
+ hdev->adv_instance_timeout = 0;
+ }
+
+ list_del(&adv_instance->list);
+ kfree(adv_instance);
+
+ hdev->adv_instance_cnt--;
+
+ return 0;
+}
+
+/* This function requires the caller holds hdev->lock */
+void hci_adv_instances_clear(struct hci_dev *hdev)
+{
+ struct adv_info *adv_instance, *n;
+
+ if (hdev->adv_instance_timeout) {
+ cancel_delayed_work(&hdev->adv_instance_expire);
+ hdev->adv_instance_timeout = 0;
+ }
+
+ list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) {
+ list_del(&adv_instance->list);
+ kfree(adv_instance);
+ }
+
+ hdev->adv_instance_cnt = 0;
+}
+
+/* This function requires the caller holds hdev->lock */
+int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
+ u16 adv_data_len, u8 *adv_data,
+ u16 scan_rsp_len, u8 *scan_rsp_data,
+ u16 timeout, u16 duration)
+{
+ struct adv_info *adv_instance;
+
+ adv_instance = hci_find_adv_instance(hdev, instance);
+ if (adv_instance) {
+ memset(adv_instance->adv_data, 0,
+ sizeof(adv_instance->adv_data));
+ memset(adv_instance->scan_rsp_data, 0,
+ sizeof(adv_instance->scan_rsp_data));
+ } else {
+ if (hdev->adv_instance_cnt >= HCI_MAX_ADV_INSTANCES ||
+ instance < 1 || instance > HCI_MAX_ADV_INSTANCES)
+ return -EOVERFLOW;
+
+ adv_instance = kzalloc(sizeof(*adv_instance), GFP_KERNEL);
+ if (!adv_instance)
+ return -ENOMEM;
+
+ adv_instance->pending = true;
+ adv_instance->instance = instance;
+ list_add(&adv_instance->list, &hdev->adv_instances);
+ hdev->adv_instance_cnt++;
+ }
+
+ adv_instance->flags = flags;
+ adv_instance->adv_data_len = adv_data_len;
+ adv_instance->scan_rsp_len = scan_rsp_len;
+
+ if (adv_data_len)
+ memcpy(adv_instance->adv_data, adv_data, adv_data_len);
+
+ if (scan_rsp_len)
+ memcpy(adv_instance->scan_rsp_data,
+ scan_rsp_data, scan_rsp_len);
+
+ adv_instance->timeout = timeout;
+ adv_instance->remaining_time = timeout;
+
+ if (duration == 0)
+ adv_instance->duration = HCI_DEFAULT_ADV_DURATION;
+ else
+ adv_instance->duration = duration;
+
+ BT_DBG("%s for %dMR", hdev->name, instance);
+
+ return 0;
+}
+
struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list,
bdaddr_t *bdaddr, u8 type)
{
@@ -3019,6 +3155,9 @@ struct hci_dev *hci_alloc_dev(void)
hdev->manufacturer = 0xffff; /* Default to internal use */
hdev->inq_tx_power = HCI_TX_POWER_INVALID;
hdev->adv_tx_power = HCI_TX_POWER_INVALID;
+ hdev->adv_instance_cnt = 0;
+ hdev->cur_adv_instance = 0x00;
+ hdev->adv_instance_timeout = 0;
hdev->sniff_max_interval = 800;
hdev->sniff_min_interval = 80;
@@ -3060,6 +3199,7 @@ struct hci_dev *hci_alloc_dev(void)
INIT_LIST_HEAD(&hdev->pend_le_conns);
INIT_LIST_HEAD(&hdev->pend_le_reports);
INIT_LIST_HEAD(&hdev->conn_hash.list);
+ INIT_LIST_HEAD(&hdev->adv_instances);
INIT_WORK(&hdev->rx_work, hci_rx_work);
INIT_WORK(&hdev->cmd_work, hci_cmd_work);
@@ -3071,6 +3211,7 @@ struct hci_dev *hci_alloc_dev(void)
INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);
INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
+ INIT_DELAYED_WORK(&hdev->adv_instance_expire, hci_adv_timeout_expire);
skb_queue_head_init(&hdev->rx_q);
skb_queue_head_init(&hdev->cmd_q);
@@ -3082,7 +3223,6 @@ struct hci_dev *hci_alloc_dev(void)
hci_init_sysfs(hdev);
discovery_init(hdev);
- adv_info_init(hdev);
return hdev;
}
@@ -3253,6 +3393,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
hci_smp_ltks_clear(hdev);
hci_smp_irks_clear(hdev);
hci_remote_oob_data_clear(hdev);
+ hci_adv_instances_clear(hdev);
hci_bdaddr_list_clear(&hdev->le_white_list);
hci_conn_params_clear_all(hdev);
hci_discovery_filter_clear(hdev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 7b61be736..32363c2b7 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2603,6 +2603,63 @@ unlock:
hci_dev_unlock(hdev);
}
+static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
+ u16 opcode, struct sk_buff *skb)
+{
+ const struct hci_rp_read_enc_key_size *rp;
+ struct hci_conn *conn;
+ u16 handle;
+
+ BT_DBG("%s status 0x%02x", hdev->name, status);
+
+ if (!skb || skb->len < sizeof(*rp)) {
+ BT_ERR("%s invalid HCI Read Encryption Key Size response",
+ hdev->name);
+ return;
+ }
+
+ rp = (void *)skb->data;
+ handle = le16_to_cpu(rp->handle);
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn)
+ goto unlock;
+
+ /* If we fail to read the encryption key size, assume maximum
+ * (which is the same we do also when this HCI command isn't
+ * supported.
+ */
+ if (rp->status) {
+ BT_ERR("%s failed to read key size for handle %u", hdev->name,
+ handle);
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ } else {
+ conn->enc_key_size = rp->key_size;
+ }
+
+ if (conn->state == BT_CONFIG) {
+ conn->state = BT_CONNECTED;
+ hci_connect_cfm(conn, 0);
+ hci_conn_drop(conn);
+ } else {
+ u8 encrypt;
+
+ if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
+ encrypt = 0x00;
+ else if (test_bit(HCI_CONN_AES_CCM, &conn->flags))
+ encrypt = 0x02;
+ else
+ encrypt = 0x01;
+
+ hci_encrypt_cfm(conn, 0, encrypt);
+ }
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_encrypt_change *ev = (void *) skb->data;
@@ -2650,22 +2707,51 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
goto unlock;
}
- if (conn->state == BT_CONFIG) {
- if (!ev->status)
- conn->state = BT_CONNECTED;
+ /* In Secure Connections Only mode, do not allow any connections
+ * that are not encrypted with AES-CCM using a P-256 authenticated
+ * combination key.
+ */
+ if (hci_dev_test_flag(hdev, HCI_SC_ONLY) &&
+ (!test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
+ conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) {
+ hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
+ hci_conn_drop(conn);
+ goto unlock;
+ }
+
+ /* Try reading the encryption key size for encrypted ACL links */
+ if (!ev->status && ev->encrypt && conn->type == ACL_LINK) {
+ struct hci_cp_read_enc_key_size cp;
+ struct hci_request req;
- /* In Secure Connections Only mode, do not allow any
- * connections that are not encrypted with AES-CCM
- * using a P-256 authenticated combination key.
+ /* Only send HCI_Read_Encryption_Key_Size if the
+ * controller really supports it. If it doesn't, assume
+ * the default size (16).
*/
- if (hci_dev_test_flag(hdev, HCI_SC_ONLY) &&
- (!test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
- conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) {
- hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
- hci_conn_drop(conn);
- goto unlock;
+ if (!(hdev->commands[20] & 0x10)) {
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ goto notify;
}
+ hci_req_init(&req, hdev);
+
+ cp.handle = cpu_to_le16(conn->handle);
+ hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp);
+
+ if (hci_req_run_skb(&req, read_enc_key_size_complete)) {
+ BT_ERR("Sending HCI Read Encryption Key Size failed");
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ goto notify;
+ }
+
+ goto unlock;
+ }
+
+notify:
+ if (conn->state == BT_CONFIG) {
+ if (!ev->status)
+ conn->state = BT_CONNECTED;
+
hci_connect_cfm(conn, ev->status);
hci_conn_drop(conn);
} else
@@ -4955,7 +5041,8 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
goto not_found;
}
- memcpy(cp.ltk, ltk->val, sizeof(ltk->val));
+ memcpy(cp.ltk, ltk->val, ltk->enc_size);
+ memset(cp.ltk + ltk->enc_size, 0, sizeof(cp.ltk) - ltk->enc_size);
cp.handle = cpu_to_le16(conn->handle);
conn->pending_sec_level = smp_ltk_sec_level(ltk);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index e11a5cfda..f2d30d115 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -503,9 +503,9 @@ static int hci_sock_release(struct socket *sock)
if (hdev) {
if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
- mgmt_index_added(hdev);
- hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
hci_dev_close(hdev->id);
+ hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
+ mgmt_index_added(hdev);
}
atomic_dec(&hdev->promisc);
@@ -1389,7 +1389,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &hci_sock_ops;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 9070dfd6b..f1a117f8c 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -915,6 +915,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
session->conn = l2cap_conn_get(conn);
session->user.probe = hidp_session_probe;
session->user.remove = hidp_session_remove;
+ INIT_LIST_HEAD(&session->user.list);
session->ctrl_sock = ctrl_sock;
session->intr_sock = intr_sock;
skb_queue_head_init(&session->ctrl_transmit);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index cb3fdde19..008ba439b 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -235,7 +235,7 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index dad419782..45fffa413 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1601,7 +1601,7 @@ int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user)
hci_dev_lock(hdev);
- if (user->list.next || user->list.prev) {
+ if (!list_empty(&user->list)) {
ret = -EINVAL;
goto out_unlock;
}
@@ -1631,12 +1631,10 @@ void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user)
hci_dev_lock(hdev);
- if (!user->list.next || !user->list.prev)
+ if (list_empty(&user->list))
goto out_unlock;
- list_del(&user->list);
- user->list.next = NULL;
- user->list.prev = NULL;
+ list_del_init(&user->list);
user->remove(conn, user);
out_unlock:
@@ -1650,9 +1648,7 @@ static void l2cap_unregister_all_users(struct l2cap_conn *conn)
while (!list_empty(&conn->users)) {
user = list_first_entry(&conn->users, struct l2cap_user, list);
- list_del(&user->list);
- user->list.next = NULL;
- user->list.prev = NULL;
+ list_del_init(&user->list);
user->remove(conn, user);
}
}
@@ -7442,7 +7438,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
mutex_unlock(&conn->chan_lock);
}
-int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
+void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
{
struct l2cap_conn *conn = hcon->l2cap_data;
struct l2cap_hdr *hdr;
@@ -7485,7 +7481,7 @@ int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
if (len == skb->len) {
/* Complete frame received */
l2cap_recv_frame(conn, skb);
- return 0;
+ return;
}
BT_DBG("Start: total len %d, frag len %d", len, skb->len);
@@ -7544,7 +7540,6 @@ int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
drop:
kfree_skb(skb);
- return 0;
}
static struct hci_cb l2cap_cb = {
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a7278f05e..244287706 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -43,7 +43,7 @@ static struct bt_sock_list l2cap_sk_list = {
static const struct proto_ops l2cap_sock_ops;
static void l2cap_sock_init(struct sock *sk, struct sock *parent);
static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
- int proto, gfp_t prio);
+ int proto, gfp_t prio, int kern);
bool l2cap_is_socket(struct socket *sock)
{
@@ -1193,7 +1193,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
}
sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP,
- GFP_ATOMIC);
+ GFP_ATOMIC, 0);
if (!sk) {
release_sock(parent);
return NULL;
@@ -1523,12 +1523,12 @@ static struct proto l2cap_proto = {
};
static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
- int proto, gfp_t prio)
+ int proto, gfp_t prio, int kern)
{
struct sock *sk;
struct l2cap_chan *chan;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, kern);
if (!sk)
return NULL;
@@ -1574,7 +1574,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &l2cap_sock_ops;
- sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7fd87e713..92720f3fe 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,7 +38,7 @@
#include "mgmt_util.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 9
+#define MGMT_REVISION 10
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -832,6 +832,20 @@ static struct mgmt_pending_cmd *pending_find_data(u16 opcode,
return mgmt_pending_find_data(HCI_CHANNEL_CONTROL, opcode, hdev, data);
}
+static u8 get_current_adv_instance(struct hci_dev *hdev)
+{
+ /* The "Set Advertising" setting supersedes the "Add Advertising"
+ * setting. Here we set the advertising data based on which
+ * setting was set. When neither apply, default to the global settings,
+ * represented by instance "0".
+ */
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
+ !hci_dev_test_flag(hdev, HCI_ADVERTISING))
+ return hdev->cur_adv_instance;
+
+ return 0x00;
+}
+
static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
{
u8 ad_len = 0;
@@ -858,19 +872,25 @@ static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
return ad_len;
}
-static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
+static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
+ u8 *ptr)
{
+ struct adv_info *adv_instance;
+
+ adv_instance = hci_find_adv_instance(hdev, instance);
+ if (!adv_instance)
+ return 0;
+
/* TODO: Set the appropriate entries based on advertising instance flags
* here once flags other than 0 are supported.
*/
- memcpy(ptr, hdev->adv_instance.scan_rsp_data,
- hdev->adv_instance.scan_rsp_len);
+ memcpy(ptr, adv_instance->scan_rsp_data,
+ adv_instance->scan_rsp_len);
- return hdev->adv_instance.scan_rsp_len;
+ return adv_instance->scan_rsp_len;
}
-static void update_scan_rsp_data_for_instance(struct hci_request *req,
- u8 instance)
+static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance)
{
struct hci_dev *hdev = req->hdev;
struct hci_cp_le_set_scan_rsp_data cp;
@@ -882,7 +902,7 @@ static void update_scan_rsp_data_for_instance(struct hci_request *req,
memset(&cp, 0, sizeof(cp));
if (instance)
- len = create_instance_scan_rsp_data(hdev, cp.data);
+ len = create_instance_scan_rsp_data(hdev, instance, cp.data);
else
len = create_default_scan_rsp_data(hdev, cp.data);
@@ -900,21 +920,7 @@ static void update_scan_rsp_data_for_instance(struct hci_request *req,
static void update_scan_rsp_data(struct hci_request *req)
{
- struct hci_dev *hdev = req->hdev;
- u8 instance;
-
- /* The "Set Advertising" setting supersedes the "Add Advertising"
- * setting. Here we set the scan response data based on which
- * setting was set. When neither apply, default to the global settings,
- * represented by instance "0".
- */
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
- !hci_dev_test_flag(hdev, HCI_ADVERTISING))
- instance = 0x01;
- else
- instance = 0x00;
-
- update_scan_rsp_data_for_instance(req, instance);
+ update_inst_scan_rsp_data(req, get_current_adv_instance(req->hdev));
}
static u8 get_adv_discov_flags(struct hci_dev *hdev)
@@ -941,20 +947,6 @@ static u8 get_adv_discov_flags(struct hci_dev *hdev)
return 0;
}
-static u8 get_current_adv_instance(struct hci_dev *hdev)
-{
- /* The "Set Advertising" setting supersedes the "Add Advertising"
- * setting. Here we set the advertising data based on which
- * setting was set. When neither apply, default to the global settings,
- * represented by instance "0".
- */
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
- !hci_dev_test_flag(hdev, HCI_ADVERTISING))
- return 0x01;
-
- return 0x00;
-}
-
static bool get_connectable(struct hci_dev *hdev)
{
struct mgmt_pending_cmd *cmd;
@@ -975,41 +967,65 @@ static bool get_connectable(struct hci_dev *hdev)
static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance)
{
u32 flags;
+ struct adv_info *adv_instance;
- if (instance > 0x01)
- return 0;
+ if (instance == 0x00) {
+ /* Instance 0 always manages the "Tx Power" and "Flags"
+ * fields
+ */
+ flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS;
- if (instance == 0x01)
- return hdev->adv_instance.flags;
+ /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting
+ * corresponds to the "connectable" instance flag.
+ */
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
+ flags |= MGMT_ADV_FLAG_CONNECTABLE;
- /* Instance 0 always manages the "Tx Power" and "Flags" fields */
- flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS;
+ return flags;
+ }
- /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting corresponds
- * to the "connectable" instance flag.
- */
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
- flags |= MGMT_ADV_FLAG_CONNECTABLE;
+ adv_instance = hci_find_adv_instance(hdev, instance);
- return flags;
+ /* Return 0 when we got an invalid instance identifier. */
+ if (!adv_instance)
+ return 0;
+
+ return adv_instance->flags;
}
-static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance)
+static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev)
{
- /* Ignore instance 0 and other unsupported instances */
- if (instance != 0x01)
+ u8 instance = get_current_adv_instance(hdev);
+ struct adv_info *adv_instance;
+
+ /* Ignore instance 0 */
+ if (instance == 0x00)
+ return 0;
+
+ adv_instance = hci_find_adv_instance(hdev, instance);
+ if (!adv_instance)
return 0;
/* TODO: Take into account the "appearance" and "local-name" flags here.
* These are currently being ignored as they are not supported.
*/
- return hdev->adv_instance.scan_rsp_len;
+ return adv_instance->scan_rsp_len;
}
static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
{
+ struct adv_info *adv_instance = NULL;
u8 ad_len = 0, flags = 0;
- u32 instance_flags = get_adv_instance_flags(hdev, instance);
+ u32 instance_flags;
+
+ /* Return 0 when the current instance identifier is invalid. */
+ if (instance) {
+ adv_instance = hci_find_adv_instance(hdev, instance);
+ if (!adv_instance)
+ return 0;
+ }
+
+ instance_flags = get_adv_instance_flags(hdev, instance);
/* The Add Advertising command allows userspace to set both the general
* and limited discoverable flags.
@@ -1043,12 +1059,11 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
}
}
- if (instance) {
- memcpy(ptr, hdev->adv_instance.adv_data,
- hdev->adv_instance.adv_data_len);
-
- ad_len += hdev->adv_instance.adv_data_len;
- ptr += hdev->adv_instance.adv_data_len;
+ if (adv_instance) {
+ memcpy(ptr, adv_instance->adv_data,
+ adv_instance->adv_data_len);
+ ad_len += adv_instance->adv_data_len;
+ ptr += adv_instance->adv_data_len;
}
/* Provide Tx Power only if we can provide a valid value for it */
@@ -1065,7 +1080,7 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
return ad_len;
}
-static void update_adv_data_for_instance(struct hci_request *req, u8 instance)
+static void update_inst_adv_data(struct hci_request *req, u8 instance)
{
struct hci_dev *hdev = req->hdev;
struct hci_cp_le_set_adv_data cp;
@@ -1093,10 +1108,7 @@ static void update_adv_data_for_instance(struct hci_request *req, u8 instance)
static void update_adv_data(struct hci_request *req)
{
- struct hci_dev *hdev = req->hdev;
- u8 instance = get_current_adv_instance(hdev);
-
- update_adv_data_for_instance(req, instance);
+ update_inst_adv_data(req, get_current_adv_instance(req->hdev));
}
int mgmt_update_adv_data(struct hci_dev *hdev)
@@ -1277,7 +1289,7 @@ static void enable_advertising(struct hci_request *req)
if (connectable)
cp.type = LE_ADV_IND;
- else if (get_adv_instance_scan_rsp_len(hdev, instance))
+ else if (get_cur_adv_instance_scan_rsp_len(hdev))
cp.type = LE_ADV_SCAN_IND;
else
cp.type = LE_ADV_NONCONN_IND;
@@ -1459,27 +1471,141 @@ static void advertising_removed(struct sock *sk, struct hci_dev *hdev,
mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk);
}
-static void clear_adv_instance(struct hci_dev *hdev)
+static int schedule_adv_instance(struct hci_request *req, u8 instance,
+ bool force) {
+ struct hci_dev *hdev = req->hdev;
+ struct adv_info *adv_instance = NULL;
+ u16 timeout;
+
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
+ !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))
+ return -EPERM;
+
+ if (hdev->adv_instance_timeout)
+ return -EBUSY;
+
+ adv_instance = hci_find_adv_instance(hdev, instance);
+ if (!adv_instance)
+ return -ENOENT;
+
+ /* A zero timeout means unlimited advertising. As long as there is
+ * only one instance, duration should be ignored. We still set a timeout
+ * in case further instances are being added later on.
+ *
+ * If the remaining lifetime of the instance is more than the duration
+ * then the timeout corresponds to the duration, otherwise it will be
+ * reduced to the remaining instance lifetime.
+ */
+ if (adv_instance->timeout == 0 ||
+ adv_instance->duration <= adv_instance->remaining_time)
+ timeout = adv_instance->duration;
+ else
+ timeout = adv_instance->remaining_time;
+
+ /* The remaining time is being reduced unless the instance is being
+ * advertised without time limit.
+ */
+ if (adv_instance->timeout)
+ adv_instance->remaining_time =
+ adv_instance->remaining_time - timeout;
+
+ hdev->adv_instance_timeout = timeout;
+ queue_delayed_work(hdev->workqueue,
+ &hdev->adv_instance_expire,
+ msecs_to_jiffies(timeout * 1000));
+
+ /* If we're just re-scheduling the same instance again then do not
+ * execute any HCI commands. This happens when a single instance is
+ * being advertised.
+ */
+ if (!force && hdev->cur_adv_instance == instance &&
+ hci_dev_test_flag(hdev, HCI_LE_ADV))
+ return 0;
+
+ hdev->cur_adv_instance = instance;
+ update_adv_data(req);
+ update_scan_rsp_data(req);
+ enable_advertising(req);
+
+ return 0;
+}
+
+static void cancel_adv_timeout(struct hci_dev *hdev)
{
- struct hci_request req;
+ if (hdev->adv_instance_timeout) {
+ hdev->adv_instance_timeout = 0;
+ cancel_delayed_work(&hdev->adv_instance_expire);
+ }
+}
- if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))
- return;
+/* For a single instance:
+ * - force == true: The instance will be removed even when its remaining
+ * lifetime is not zero.
+ * - force == false: the instance will be deactivated but kept stored unless
+ * the remaining lifetime is zero.
+ *
+ * For instance == 0x00:
+ * - force == true: All instances will be removed regardless of their timeout
+ * setting.
+ * - force == false: Only instances that have a timeout will be removed.
+ */
+static void clear_adv_instance(struct hci_dev *hdev, struct hci_request *req,
+ u8 instance, bool force)
+{
+ struct adv_info *adv_instance, *n, *next_instance = NULL;
+ int err;
+ u8 rem_inst;
- if (hdev->adv_instance.timeout)
- cancel_delayed_work(&hdev->adv_instance.timeout_exp);
+ /* Cancel any timeout concerning the removed instance(s). */
+ if (!instance || hdev->cur_adv_instance == instance)
+ cancel_adv_timeout(hdev);
- memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance));
- advertising_removed(NULL, hdev, 1);
- hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE);
+ /* Get the next instance to advertise BEFORE we remove
+ * the current one. This can be the same instance again
+ * if there is only one instance.
+ */
+ if (instance && hdev->cur_adv_instance == instance)
+ next_instance = hci_get_next_instance(hdev, instance);
- if (!hdev_is_powered(hdev) ||
+ if (instance == 0x00) {
+ list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances,
+ list) {
+ if (!(force || adv_instance->timeout))
+ continue;
+
+ rem_inst = adv_instance->instance;
+ err = hci_remove_adv_instance(hdev, rem_inst);
+ if (!err)
+ advertising_removed(NULL, hdev, rem_inst);
+ }
+ hdev->cur_adv_instance = 0x00;
+ } else {
+ adv_instance = hci_find_adv_instance(hdev, instance);
+
+ if (force || (adv_instance && adv_instance->timeout &&
+ !adv_instance->remaining_time)) {
+ /* Don't advertise a removed instance. */
+ if (next_instance &&
+ next_instance->instance == instance)
+ next_instance = NULL;
+
+ err = hci_remove_adv_instance(hdev, instance);
+ if (!err)
+ advertising_removed(NULL, hdev, instance);
+ }
+ }
+
+ if (list_empty(&hdev->adv_instances)) {
+ hdev->cur_adv_instance = 0x00;
+ hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE);
+ }
+
+ if (!req || !hdev_is_powered(hdev) ||
hci_dev_test_flag(hdev, HCI_ADVERTISING))
return;
- hci_req_init(&req, hdev);
- disable_advertising(&req);
- hci_req_run(&req, NULL);
+ if (next_instance)
+ schedule_adv_instance(req, next_instance->instance, false);
}
static int clean_up_hci_state(struct hci_dev *hdev)
@@ -1497,8 +1623,7 @@ static int clean_up_hci_state(struct hci_dev *hdev)
hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
}
- if (hdev->adv_instance.timeout)
- clear_adv_instance(hdev);
+ clear_adv_instance(hdev, NULL, 0x00, false);
if (hci_dev_test_flag(hdev, HCI_LE_ADV))
disable_advertising(&req);
@@ -2453,6 +2578,9 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
val = !!cp->val;
enabled = lmp_host_le_capable(hdev);
+ if (!val)
+ clear_adv_instance(hdev, NULL, 0x00, true);
+
if (!hdev_is_powered(hdev) || val == enabled) {
bool changed = false;
@@ -4087,6 +4215,7 @@ static bool trigger_le_scan(struct hci_request *req, u16 interval, u8 *status)
return false;
}
+ cancel_adv_timeout(hdev);
disable_advertising(req);
}
@@ -4669,6 +4798,9 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
{
struct cmd_lookup match = { NULL, hdev };
struct hci_request req;
+ u8 instance;
+ struct adv_info *adv_instance;
+ int err;
hci_dev_lock(hdev);
@@ -4694,18 +4826,31 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
sock_put(match.sk);
/* If "Set Advertising" was just disabled and instance advertising was
- * set up earlier, then enable the advertising instance.
+ * set up earlier, then re-enable multi-instance advertising.
*/
if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
- !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))
+ !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) ||
+ list_empty(&hdev->adv_instances))
goto unlock;
+ instance = hdev->cur_adv_instance;
+ if (!instance) {
+ adv_instance = list_first_entry_or_null(&hdev->adv_instances,
+ struct adv_info, list);
+ if (!adv_instance)
+ goto unlock;
+
+ instance = adv_instance->instance;
+ }
+
hci_req_init(&req, hdev);
- update_adv_data(&req);
- enable_advertising(&req);
+ err = schedule_adv_instance(&req, instance, true);
+
+ if (!err)
+ err = hci_req_run(&req, enable_advertising_instance);
- if (hci_req_run(&req, enable_advertising_instance) < 0)
+ if (err)
BT_ERR("Failed to re-configure advertising");
unlock:
@@ -4790,10 +4935,15 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
else
hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
+ cancel_adv_timeout(hdev);
+
if (val) {
- /* Switch to instance "0" for the Set Advertising setting. */
- update_adv_data_for_instance(&req, 0);
- update_scan_rsp_data_for_instance(&req, 0);
+ /* Switch to instance "0" for the Set Advertising setting.
+ * We cannot use update_[adv|scan_rsp]_data() here as the
+ * HCI_ADVERTISING flag is not yet set.
+ */
+ update_inst_adv_data(&req, 0x00);
+ update_inst_scan_rsp_data(&req, 0x00);
enable_advertising(&req);
} else {
disable_advertising(&req);
@@ -6781,8 +6931,9 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
{
struct mgmt_rp_read_adv_features *rp;
size_t rp_len;
- int err;
+ int err, i;
bool instance;
+ struct adv_info *adv_instance;
u32 supported_flags;
BT_DBG("%s", hdev->name);
@@ -6795,12 +6946,9 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
rp_len = sizeof(*rp);
- /* Currently only one instance is supported, so just add 1 to the
- * response length.
- */
instance = hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE);
if (instance)
- rp_len++;
+ rp_len += hdev->adv_instance_cnt;
rp = kmalloc(rp_len, GFP_ATOMIC);
if (!rp) {
@@ -6813,14 +6961,18 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
rp->supported_flags = cpu_to_le32(supported_flags);
rp->max_adv_data_len = HCI_MAX_AD_LENGTH;
rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH;
- rp->max_instances = 1;
+ rp->max_instances = HCI_MAX_ADV_INSTANCES;
- /* Currently only one instance is supported, so simply return the
- * current instance number.
- */
if (instance) {
- rp->num_instances = 1;
- rp->instance[0] = 1;
+ i = 0;
+ list_for_each_entry(adv_instance, &hdev->adv_instances, list) {
+ if (i >= hdev->adv_instance_cnt)
+ break;
+
+ rp->instance[i] = adv_instance->instance;
+ i++;
+ }
+ rp->num_instances = hdev->adv_instance_cnt;
} else {
rp->num_instances = 0;
}
@@ -6882,7 +7034,10 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status,
u16 opcode)
{
struct mgmt_pending_cmd *cmd;
+ struct mgmt_cp_add_advertising *cp;
struct mgmt_rp_add_advertising rp;
+ struct adv_info *adv_instance, *n;
+ u8 instance;
BT_DBG("status %d", status);
@@ -6890,16 +7045,32 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status,
cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev);
- if (status) {
+ if (status)
hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE);
- memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance));
- advertising_removed(cmd ? cmd->sk : NULL, hdev, 1);
+
+ list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) {
+ if (!adv_instance->pending)
+ continue;
+
+ if (!status) {
+ adv_instance->pending = false;
+ continue;
+ }
+
+ instance = adv_instance->instance;
+
+ if (hdev->cur_adv_instance == instance)
+ cancel_adv_timeout(hdev);
+
+ hci_remove_adv_instance(hdev, instance);
+ advertising_removed(cmd ? cmd->sk : NULL, hdev, instance);
}
if (!cmd)
goto unlock;
- rp.instance = 0x01;
+ cp = cmd->param;
+ rp.instance = cp->instance;
if (status)
mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
@@ -6914,15 +7085,28 @@ unlock:
hci_dev_unlock(hdev);
}
-static void adv_timeout_expired(struct work_struct *work)
+void mgmt_adv_timeout_expired(struct hci_dev *hdev)
{
- struct hci_dev *hdev = container_of(work, struct hci_dev,
- adv_instance.timeout_exp.work);
+ u8 instance;
+ struct hci_request req;
+
+ hdev->adv_instance_timeout = 0;
- hdev->adv_instance.timeout = 0;
+ instance = get_current_adv_instance(hdev);
+ if (instance == 0x00)
+ return;
hci_dev_lock(hdev);
- clear_adv_instance(hdev);
+ hci_req_init(&req, hdev);
+
+ clear_adv_instance(hdev, &req, instance, false);
+
+ if (list_empty(&hdev->adv_instances))
+ disable_advertising(&req);
+
+ if (!skb_queue_empty(&req.cmd_q))
+ hci_req_run(&req, NULL);
+
hci_dev_unlock(hdev);
}
@@ -6934,7 +7118,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
u32 flags;
u32 supported_flags;
u8 status;
- u16 timeout;
+ u16 timeout, duration;
+ unsigned int prev_instance_cnt = hdev->adv_instance_cnt;
+ u8 schedule_instance = 0;
+ struct adv_info *next_instance;
int err;
struct mgmt_pending_cmd *cmd;
struct hci_request req;
@@ -6948,12 +7135,13 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
flags = __le32_to_cpu(cp->flags);
timeout = __le16_to_cpu(cp->timeout);
+ duration = __le16_to_cpu(cp->duration);
- /* The current implementation only supports adding one instance and only
- * a subset of the specified flags.
+ /* The current implementation only supports a subset of the specified
+ * flags.
*/
supported_flags = get_supported_adv_flags(hdev);
- if (cp->instance != 0x01 || (flags & ~supported_flags))
+ if (flags & ~supported_flags)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
MGMT_STATUS_INVALID_PARAMS);
@@ -6981,38 +7169,51 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- INIT_DELAYED_WORK(&hdev->adv_instance.timeout_exp, adv_timeout_expired);
-
- hdev->adv_instance.flags = flags;
- hdev->adv_instance.adv_data_len = cp->adv_data_len;
- hdev->adv_instance.scan_rsp_len = cp->scan_rsp_len;
-
- if (cp->adv_data_len)
- memcpy(hdev->adv_instance.adv_data, cp->data, cp->adv_data_len);
-
- if (cp->scan_rsp_len)
- memcpy(hdev->adv_instance.scan_rsp_data,
- cp->data + cp->adv_data_len, cp->scan_rsp_len);
-
- if (hdev->adv_instance.timeout)
- cancel_delayed_work(&hdev->adv_instance.timeout_exp);
+ err = hci_add_adv_instance(hdev, cp->instance, flags,
+ cp->adv_data_len, cp->data,
+ cp->scan_rsp_len,
+ cp->data + cp->adv_data_len,
+ timeout, duration);
+ if (err < 0) {
+ err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
+ MGMT_STATUS_FAILED);
+ goto unlock;
+ }
- hdev->adv_instance.timeout = timeout;
+ /* Only trigger an advertising added event if a new instance was
+ * actually added.
+ */
+ if (hdev->adv_instance_cnt > prev_instance_cnt)
+ advertising_added(sk, hdev, cp->instance);
- if (timeout)
- queue_delayed_work(hdev->workqueue,
- &hdev->adv_instance.timeout_exp,
- msecs_to_jiffies(timeout * 1000));
+ hci_dev_set_flag(hdev, HCI_ADVERTISING_INSTANCE);
- if (!hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING_INSTANCE))
- advertising_added(sk, hdev, 1);
+ if (hdev->cur_adv_instance == cp->instance) {
+ /* If the currently advertised instance is being changed then
+ * cancel the current advertising and schedule the next
+ * instance. If there is only one instance then the overridden
+ * advertising data will be visible right away.
+ */
+ cancel_adv_timeout(hdev);
+
+ next_instance = hci_get_next_instance(hdev, cp->instance);
+ if (next_instance)
+ schedule_instance = next_instance->instance;
+ } else if (!hdev->adv_instance_timeout) {
+ /* Immediately advertise the new instance if no other
+ * instance is currently being advertised.
+ */
+ schedule_instance = cp->instance;
+ }
- /* If the HCI_ADVERTISING flag is set or the device isn't powered then
- * we have no HCI communication to make. Simply return.
+ /* If the HCI_ADVERTISING flag is set or the device isn't powered or
+ * there is no instance to be advertised then we have no HCI
+ * communication to make. Simply return.
*/
if (!hdev_is_powered(hdev) ||
- hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
- rp.instance = 0x01;
+ hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
+ !schedule_instance) {
+ rp.instance = cp->instance;
err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
goto unlock;
@@ -7030,11 +7231,11 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
hci_req_init(&req, hdev);
- update_adv_data(&req);
- update_scan_rsp_data(&req);
- enable_advertising(&req);
+ err = schedule_adv_instance(&req, schedule_instance, true);
+
+ if (!err)
+ err = hci_req_run(&req, add_advertising_complete);
- err = hci_req_run(&req, add_advertising_complete);
if (err < 0)
mgmt_pending_remove(cmd);
@@ -7048,6 +7249,7 @@ static void remove_advertising_complete(struct hci_dev *hdev, u8 status,
u16 opcode)
{
struct mgmt_pending_cmd *cmd;
+ struct mgmt_cp_remove_advertising *cp;
struct mgmt_rp_remove_advertising rp;
BT_DBG("status %d", status);
@@ -7062,7 +7264,8 @@ static void remove_advertising_complete(struct hci_dev *hdev, u8 status,
if (!cmd)
goto unlock;
- rp.instance = 1;
+ cp = cmd->param;
+ rp.instance = cp->instance;
mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, MGMT_STATUS_SUCCESS,
&rp, sizeof(rp));
@@ -7077,21 +7280,21 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
{
struct mgmt_cp_remove_advertising *cp = data;
struct mgmt_rp_remove_advertising rp;
- int err;
struct mgmt_pending_cmd *cmd;
struct hci_request req;
+ int err;
BT_DBG("%s", hdev->name);
- /* The current implementation only allows modifying instance no 1. A
- * value of 0 indicates that all instances should be cleared.
- */
- if (cp->instance > 1)
- return mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING,
- MGMT_STATUS_INVALID_PARAMS);
-
hci_dev_lock(hdev);
+ if (cp->instance && !hci_find_adv_instance(hdev, cp->instance)) {
+ err = mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_REMOVE_ADVERTISING,
+ MGMT_STATUS_INVALID_PARAMS);
+ goto unlock;
+ }
+
if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) ||
pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) ||
pending_find(MGMT_OP_SET_LE, hdev)) {
@@ -7106,21 +7309,21 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (hdev->adv_instance.timeout)
- cancel_delayed_work(&hdev->adv_instance.timeout_exp);
-
- memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance));
+ hci_req_init(&req, hdev);
- advertising_removed(sk, hdev, 1);
+ clear_adv_instance(hdev, &req, cp->instance, true);
- hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE);
+ if (list_empty(&hdev->adv_instances))
+ disable_advertising(&req);
- /* If the HCI_ADVERTISING flag is set or the device isn't powered then
- * we have no HCI communication to make. Simply return.
+ /* If no HCI commands have been collected so far or the HCI_ADVERTISING
+ * flag is set or the device isn't powered then we have no HCI
+ * communication to make. Simply return.
*/
- if (!hdev_is_powered(hdev) ||
+ if (skb_queue_empty(&req.cmd_q) ||
+ !hdev_is_powered(hdev) ||
hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
- rp.instance = 1;
+ rp.instance = cp->instance;
err = mgmt_cmd_complete(sk, hdev->id,
MGMT_OP_REMOVE_ADVERTISING,
MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
@@ -7134,9 +7337,6 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- hci_req_init(&req, hdev);
- disable_advertising(&req);
-
err = hci_req_run(&req, remove_advertising_complete);
if (err < 0)
mgmt_pending_remove(cmd);
@@ -7361,6 +7561,7 @@ static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode)
static int powered_update_hci(struct hci_dev *hdev)
{
struct hci_request req;
+ struct adv_info *adv_instance;
u8 link_sec;
hci_req_init(&req, hdev);
@@ -7400,14 +7601,27 @@ static int powered_update_hci(struct hci_dev *hdev)
* advertising data. This also applies to the case
* where BR/EDR was toggled during the AUTO_OFF phase.
*/
- if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
+ if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) &&
+ (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
+ !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) {
update_adv_data(&req);
update_scan_rsp_data(&req);
}
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
- hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
+ hdev->cur_adv_instance == 0x00 &&
+ !list_empty(&hdev->adv_instances)) {
+ adv_instance = list_first_entry(&hdev->adv_instances,
+ struct adv_info, list);
+ hdev->cur_adv_instance = adv_instance->instance;
+ }
+
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
enable_advertising(&req);
+ else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
+ hdev->cur_adv_instance)
+ schedule_adv_instance(&req, hdev->cur_adv_instance,
+ true);
restart_le_actions(&req);
}
@@ -7577,7 +7791,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
memset(&ev, 0, sizeof(ev));
/* Devices using resolvable or non-resolvable random addresses
- * without providing an indentity resolving key don't require
+ * without providing an identity resolving key don't require
* to store long term keys. Their addresses will change the
* next time around.
*
@@ -7603,7 +7817,12 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
if (key->type == SMP_LTK)
ev.key.master = 1;
- memcpy(ev.key.val, key->val, sizeof(key->val));
+ /* Make sure we copy only the significant bytes based on the
+ * encryption key size, and set the rest of the value to zeroes.
+ */
+ memcpy(ev.key.val, key->val, key->enc_size);
+ memset(ev.key.val + key->enc_size, 0,
+ sizeof(ev.key.val) - key->enc_size);
mgmt_event(MGMT_EV_NEW_LONG_TERM_KEY, hdev, &ev, sizeof(ev), NULL);
}
@@ -7617,7 +7836,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk)
/* For identity resolving keys from devices that are already
* using a public address or static random address, do not
* ask for storing this key. The identity resolving key really
- * is only mandatory for devices using resovlable random
+ * is only mandatory for devices using resolvable random
* addresses.
*
* Storing all identity resolving keys has the downside that
@@ -7646,7 +7865,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk,
memset(&ev, 0, sizeof(ev));
/* Devices using resolvable or non-resolvable random addresses
- * without providing an indentity resolving key don't require
+ * without providing an identity resolving key don't require
* to store signature resolving keys. Their addresses will change
* the next time around.
*
@@ -8387,13 +8606,24 @@ static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
void mgmt_reenable_advertising(struct hci_dev *hdev)
{
struct hci_request req;
+ u8 instance;
if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))
return;
+ instance = get_current_adv_instance(hdev);
+
hci_req_init(&req, hdev);
- enable_advertising(&req);
+
+ if (instance) {
+ schedule_adv_instance(&req, instance, true);
+ } else {
+ update_adv_data(&req);
+ update_scan_rsp_data(&req);
+ enable_advertising(&req);
+ }
+
hci_req_run(&req, adv_enable_complete);
}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 4fea24275..29709fbfd 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -200,7 +200,7 @@ static int rfcomm_l2sock_create(struct socket **sock)
BT_DBG("");
- err = sock_create_kern(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock);
+ err = sock_create_kern(&init_net, PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock);
if (!err) {
struct sock *sk = (*sock)->sk;
sk->sk_data_ready = rfcomm_l2data_ready;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 825e8fb51..7511df723 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -269,12 +269,12 @@ static struct proto rfcomm_proto = {
.obj_size = sizeof(struct rfcomm_pinfo)
};
-static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
+static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern)
{
struct rfcomm_dlc *d;
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, kern);
if (!sk)
return NULL;
@@ -324,7 +324,7 @@ static int rfcomm_sock_create(struct net *net, struct socket *sock,
sock->ops = &rfcomm_sock_ops;
- sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
@@ -334,16 +334,19 @@ static int rfcomm_sock_create(struct net *net, struct socket *sock,
static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
- struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
+ struct sockaddr_rc sa;
struct sock *sk = sock->sk;
- int chan = sa->rc_channel;
- int err = 0;
-
- BT_DBG("sk %p %pMR", sk, &sa->rc_bdaddr);
+ int len, err = 0;
if (!addr || addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
+ memset(&sa, 0, sizeof(sa));
+ len = min_t(unsigned int, sizeof(sa), addr_len);
+ memcpy(&sa, addr, len);
+
+ BT_DBG("sk %p %pMR", sk, &sa.rc_bdaddr);
+
lock_sock(sk);
if (sk->sk_state != BT_OPEN) {
@@ -358,12 +361,13 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr
write_lock(&rfcomm_sk_list.lock);
- if (chan && __rfcomm_get_listen_sock_by_addr(chan, &sa->rc_bdaddr)) {
+ if (sa.rc_channel &&
+ __rfcomm_get_listen_sock_by_addr(sa.rc_channel, &sa.rc_bdaddr)) {
err = -EADDRINUSE;
} else {
/* Save source address */
- bacpy(&rfcomm_pi(sk)->src, &sa->rc_bdaddr);
- rfcomm_pi(sk)->channel = chan;
+ bacpy(&rfcomm_pi(sk)->src, &sa.rc_bdaddr);
+ rfcomm_pi(sk)->channel = sa.rc_channel;
sk->sk_state = BT_BOUND;
}
@@ -969,7 +973,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
goto done;
}
- sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC);
+ sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC, 0);
if (!sk)
goto done;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 4322c833e..688a040c5 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -460,11 +460,11 @@ static struct proto sco_proto = {
.obj_size = sizeof(struct sco_pinfo)
};
-static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
+static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern)
{
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, kern);
if (!sk)
return NULL;
@@ -501,7 +501,7 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &sco_sock_ops;
- sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
@@ -1026,7 +1026,7 @@ static void sco_conn_ready(struct sco_conn *conn)
bh_lock_sock(parent);
sk = sco_sock_alloc(sock_net(parent), NULL,
- BTPROTO_SCO, GFP_ATOMIC);
+ BTPROTO_SCO, GFP_ATOMIC, 0);
if (!sk) {
bh_unlock_sock(parent);
sco_conn_unlock(conn);
@@ -1110,7 +1110,7 @@ static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
sco_conn_del(hcon, bt_to_errno(reason));
}
-int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
+void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
{
struct sco_conn *conn = hcon->sco_data;
@@ -1121,12 +1121,11 @@ int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
if (skb->len) {
sco_recv_frame(conn, skb);
- return 0;
+ return;
}
drop:
kfree_skb(skb);
- return 0;
}
static struct hci_cb sco_cb = {
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 7b815bcc8..ad82324f7 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -33,6 +33,9 @@
#include "ecc.h"
#include "smp.h"
+#define SMP_DEV(hdev) \
+ ((struct smp_dev *)((struct l2cap_chan *)((hdev)->smp_data))->data)
+
/* Low-level debug macros to be used for stuff that we don't want
* accidentially in dmesg, i.e. the values of the various crypto keys
* and the inputs & outputs of crypto functions.
@@ -81,6 +84,9 @@ struct smp_dev {
u8 local_rand[16];
bool debug_key;
+ u8 min_key_size;
+ u8 max_key_size;
+
struct crypto_blkcipher *tfm_aes;
struct crypto_hash *tfm_cmac;
};
@@ -371,6 +377,8 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
uint8_t tmp[16], data[16];
int err;
+ SMP_DBG("k %16phN r %16phN", k, r);
+
if (!tfm) {
BT_ERR("tfm %p", tfm);
return -EINVAL;
@@ -400,6 +408,8 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
/* Most significant octet of encryptedData corresponds to data[0] */
swap_buf(data, r, 16);
+ SMP_DBG("r %16phN", r);
+
return err;
}
@@ -410,6 +420,10 @@ static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
u8 p1[16], p2[16];
int err;
+ SMP_DBG("k %16phN r %16phN", k, r);
+ SMP_DBG("iat %u ia %6phN rat %u ra %6phN", _iat, ia, _rat, ra);
+ SMP_DBG("preq %7phN pres %7phN", preq, pres);
+
memset(p1, 0, 16);
/* p1 = pres || preq || _rat || _iat */
@@ -418,10 +432,7 @@ static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
memcpy(p1 + 2, preq, 7);
memcpy(p1 + 9, pres, 7);
- /* p2 = padding || ia || ra */
- memcpy(p2, ra, 6);
- memcpy(p2 + 6, ia, 6);
- memset(p2 + 12, 0, 4);
+ SMP_DBG("p1 %16phN", p1);
/* res = r XOR p1 */
u128_xor((u128 *) res, (u128 *) r, (u128 *) p1);
@@ -433,6 +444,13 @@ static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
return err;
}
+ /* p2 = padding || ia || ra */
+ memcpy(p2, ra, 6);
+ memcpy(p2 + 6, ia, 6);
+ memset(p2 + 12, 0, 4);
+
+ SMP_DBG("p2 %16phN", p2);
+
/* res = res XOR p2 */
u128_xor((u128 *) res, (u128 *) res, (u128 *) p2);
@@ -696,7 +714,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
if (rsp == NULL) {
req->io_capability = conn->hcon->io_capability;
req->oob_flag = oob_flag;
- req->max_key_size = SMP_MAX_ENC_KEY_SIZE;
+ req->max_key_size = SMP_DEV(hdev)->max_key_size;
req->init_key_dist = local_dist;
req->resp_key_dist = remote_dist;
req->auth_req = (authreq & AUTH_REQ_MASK(hdev));
@@ -707,7 +725,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
rsp->io_capability = conn->hcon->io_capability;
rsp->oob_flag = oob_flag;
- rsp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
+ rsp->max_key_size = SMP_DEV(hdev)->max_key_size;
rsp->init_key_dist = req->init_key_dist & remote_dist;
rsp->resp_key_dist = req->resp_key_dist & local_dist;
rsp->auth_req = (authreq & AUTH_REQ_MASK(hdev));
@@ -718,10 +736,11 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size)
{
struct l2cap_chan *chan = conn->smp;
+ struct hci_dev *hdev = conn->hcon->hdev;
struct smp_chan *smp = chan->data;
- if ((max_key_size > SMP_MAX_ENC_KEY_SIZE) ||
- (max_key_size < SMP_MIN_ENC_KEY_SIZE))
+ if (max_key_size > SMP_DEV(hdev)->max_key_size ||
+ max_key_size < SMP_MIN_ENC_KEY_SIZE)
return SMP_ENC_KEY_SIZE;
smp->enc_key_size = max_key_size;
@@ -985,13 +1004,10 @@ static u8 smp_random(struct smp_chan *smp)
smp_s1(smp->tfm_aes, smp->tk, smp->rrnd, smp->prnd, stk);
- memset(stk + smp->enc_key_size, 0,
- SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
-
if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags))
return SMP_UNSPECIFIED;
- hci_le_start_enc(hcon, ediv, rand, stk);
+ hci_le_start_enc(hcon, ediv, rand, stk, smp->enc_key_size);
hcon->enc_key_size = smp->enc_key_size;
set_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags);
} else {
@@ -1004,9 +1020,6 @@ static u8 smp_random(struct smp_chan *smp)
smp_s1(smp->tfm_aes, smp->tk, smp->prnd, smp->rrnd, stk);
- memset(stk + smp->enc_key_size, 0,
- SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
-
if (hcon->pending_sec_level == BT_SECURITY_HIGH)
auth = 1;
else
@@ -1144,9 +1157,6 @@ static void sc_add_ltk(struct smp_chan *smp)
else
auth = 0;
- memset(smp->tk + smp->enc_key_size, 0,
- SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
-
smp->ltk = hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type,
key_type, auth, smp->tk, smp->enc_key_size,
0, 0);
@@ -1268,7 +1278,14 @@ static void smp_distribute_keys(struct smp_chan *smp)
__le16 ediv;
__le64 rand;
- get_random_bytes(enc.ltk, sizeof(enc.ltk));
+ /* Make sure we generate only the significant amount of
+ * bytes based on the encryption key size, and set the rest
+ * of the value to zeroes.
+ */
+ get_random_bytes(enc.ltk, smp->enc_key_size);
+ memset(enc.ltk + smp->enc_key_size, 0,
+ sizeof(enc.ltk) - smp->enc_key_size);
+
get_random_bytes(&ediv, sizeof(ediv));
get_random_bytes(&rand, sizeof(rand));
@@ -1688,7 +1705,7 @@ static void build_bredr_pairing_cmd(struct smp_chan *smp,
req->init_key_dist = local_dist;
req->resp_key_dist = remote_dist;
- req->max_key_size = SMP_MAX_ENC_KEY_SIZE;
+ req->max_key_size = conn->hcon->enc_key_size;
smp->remote_key_dist = remote_dist;
@@ -1697,7 +1714,7 @@ static void build_bredr_pairing_cmd(struct smp_chan *smp,
memset(rsp, 0, sizeof(*rsp));
- rsp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
+ rsp->max_key_size = conn->hcon->enc_key_size;
rsp->init_key_dist = req->init_key_dist & remote_dist;
rsp->resp_key_dist = req->resp_key_dist & local_dist;
@@ -2190,7 +2207,7 @@ static bool smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level)
if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags))
return true;
- hci_le_start_enc(hcon, key->ediv, key->rand, key->val);
+ hci_le_start_enc(hcon, key->ediv, key->rand, key->val, key->enc_size);
hcon->enc_key_size = key->enc_size;
/* We never store STKs for master role, so clear this flag */
@@ -2742,7 +2759,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb)
sc_add_ltk(smp);
if (hcon->out) {
- hci_le_start_enc(hcon, 0, 0, smp->tk);
+ hci_le_start_enc(hcon, 0, 0, smp->tk, smp->enc_key_size);
hcon->enc_key_size = smp->enc_key_size;
}
@@ -3124,6 +3141,8 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
smp->tfm_aes = tfm_aes;
smp->tfm_cmac = tfm_cmac;
+ smp->min_key_size = SMP_MIN_ENC_KEY_SIZE;
+ smp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
create_chan:
chan = l2cap_chan_create();
@@ -3246,6 +3265,94 @@ static const struct file_operations force_bredr_smp_fops = {
.llseek = default_llseek,
};
+static ssize_t le_min_key_size_read(struct file *file,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[4];
+
+ snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->min_key_size);
+
+ return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
+}
+
+static ssize_t le_min_key_size_write(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[32];
+ size_t buf_size = min(count, (sizeof(buf) - 1));
+ u8 key_size;
+
+ if (copy_from_user(buf, user_buf, buf_size))
+ return -EFAULT;
+
+ buf[buf_size] = '\0';
+
+ sscanf(buf, "%hhu", &key_size);
+
+ if (key_size > SMP_DEV(hdev)->max_key_size ||
+ key_size < SMP_MIN_ENC_KEY_SIZE)
+ return -EINVAL;
+
+ SMP_DEV(hdev)->min_key_size = key_size;
+
+ return count;
+}
+
+static const struct file_operations le_min_key_size_fops = {
+ .open = simple_open,
+ .read = le_min_key_size_read,
+ .write = le_min_key_size_write,
+ .llseek = default_llseek,
+};
+
+static ssize_t le_max_key_size_read(struct file *file,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[4];
+
+ snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->max_key_size);
+
+ return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
+}
+
+static ssize_t le_max_key_size_write(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[32];
+ size_t buf_size = min(count, (sizeof(buf) - 1));
+ u8 key_size;
+
+ if (copy_from_user(buf, user_buf, buf_size))
+ return -EFAULT;
+
+ buf[buf_size] = '\0';
+
+ sscanf(buf, "%hhu", &key_size);
+
+ if (key_size > SMP_MAX_ENC_KEY_SIZE ||
+ key_size < SMP_DEV(hdev)->min_key_size)
+ return -EINVAL;
+
+ SMP_DEV(hdev)->max_key_size = key_size;
+
+ return count;
+}
+
+static const struct file_operations le_max_key_size_fops = {
+ .open = simple_open,
+ .read = le_max_key_size_read,
+ .write = le_max_key_size_write,
+ .llseek = default_llseek,
+};
+
int smp_register(struct hci_dev *hdev)
{
struct l2cap_chan *chan;
@@ -3270,6 +3377,11 @@ int smp_register(struct hci_dev *hdev)
hdev->smp_data = chan;
+ debugfs_create_file("le_min_key_size", 0644, hdev->debugfs, hdev,
+ &le_min_key_size_fops);
+ debugfs_create_file("le_max_key_size", 0644, hdev->debugfs, hdev,
+ &le_max_key_size_fops);
+
/* If the controller does not support BR/EDR Secure Connections
* feature, then the BR/EDR SMP channel shall not be present.
*
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index fd7ee03c5..a1cda5d47 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -12,6 +12,8 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
bridge-$(subst m,y,$(CONFIG_BRIDGE_NETFILTER)) += br_nf_core.o
+br_netfilter-y := br_netfilter_hooks.o
+br_netfilter-$(subst m,y,$(CONFIG_IPV6)) += br_netfilter_ipv6.o
obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 02c24cf63..a1abe4936 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -121,13 +121,13 @@ static struct notifier_block br_device_notifier = {
.notifier_call = br_device_event
};
-static int br_netdev_switch_event(struct notifier_block *unused,
- unsigned long event, void *ptr)
+static int br_switchdev_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
{
- struct net_device *dev = netdev_switch_notifier_info_to_dev(ptr);
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
struct net_bridge_port *p;
struct net_bridge *br;
- struct netdev_switch_notifier_fdb_info *fdb_info;
+ struct switchdev_notifier_fdb_info *fdb_info;
int err = NOTIFY_DONE;
rtnl_lock();
@@ -138,14 +138,14 @@ static int br_netdev_switch_event(struct notifier_block *unused,
br = p->br;
switch (event) {
- case NETDEV_SWITCH_FDB_ADD:
+ case SWITCHDEV_FDB_ADD:
fdb_info = ptr;
err = br_fdb_external_learn_add(br, p, fdb_info->addr,
fdb_info->vid);
if (err)
err = notifier_from_errno(err);
break;
- case NETDEV_SWITCH_FDB_DEL:
+ case SWITCHDEV_FDB_DEL:
fdb_info = ptr;
err = br_fdb_external_learn_del(br, p, fdb_info->addr,
fdb_info->vid);
@@ -159,8 +159,8 @@ out:
return err;
}
-static struct notifier_block br_netdev_switch_notifier = {
- .notifier_call = br_netdev_switch_event,
+static struct notifier_block br_switchdev_notifier = {
+ .notifier_call = br_switchdev_event,
};
static void __net_exit br_net_exit(struct net *net)
@@ -214,7 +214,7 @@ static int __init br_init(void)
if (err)
goto err_out3;
- err = register_netdev_switch_notifier(&br_netdev_switch_notifier);
+ err = register_switchdev_notifier(&br_switchdev_notifier);
if (err)
goto err_out4;
@@ -235,7 +235,7 @@ static int __init br_init(void)
return 0;
err_out5:
- unregister_netdev_switch_notifier(&br_netdev_switch_notifier);
+ unregister_switchdev_notifier(&br_switchdev_notifier);
err_out4:
unregister_netdevice_notifier(&br_device_notifier);
err_out3:
@@ -253,7 +253,7 @@ static void __exit br_deinit(void)
{
stp_proto_unregister(&br_stp_proto);
br_netlink_fini();
- unregister_netdev_switch_notifier(&br_netdev_switch_notifier);
+ unregister_switchdev_notifier(&br_switchdev_notifier);
unregister_netdevice_notifier(&br_device_notifier);
brioctl_set(NULL);
unregister_pernet_subsys(&br_net_ops);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 659fb9667..9e9875da0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -24,6 +24,7 @@
#include <linux/atomic.h>
#include <asm/unaligned.h>
#include <linux/if_vlan.h>
+#include <net/switchdev.h>
#include "br_private.h"
static struct kmem_cache *br_fdb_cache __read_mostly;
@@ -130,11 +131,27 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
}
}
+static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
+{
+ struct switchdev_obj obj = {
+ .id = SWITCHDEV_OBJ_PORT_FDB,
+ .u.fdb = {
+ .addr = f->addr.addr,
+ .vid = f->vlan_id,
+ },
+ };
+
+ switchdev_port_obj_del(f->dst->dev, &obj);
+}
+
static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
{
if (f->is_static)
fdb_del_hw_addr(br, f->addr.addr);
+ if (f->added_by_external_learn)
+ fdb_del_external_learn(f);
+
hlist_del_rcu(&f->hlist);
fdb_notify(br, f, RTM_DELNEIGH);
call_rcu(&f->rcu, fdb_rcu_free);
@@ -313,9 +330,11 @@ void br_fdb_flush(struct net_bridge *br)
/* Flush all entries referring to a specific port.
* if do_all is set also flush static entries
+ * if vid is set delete all entries that match the vlan_id
*/
void br_fdb_delete_by_port(struct net_bridge *br,
const struct net_bridge_port *p,
+ u16 vid,
int do_all)
{
int i;
@@ -330,8 +349,9 @@ void br_fdb_delete_by_port(struct net_bridge *br,
if (f->dst != p)
continue;
- if (f->is_static && !do_all)
- continue;
+ if (!do_all)
+ if (f->is_static || (vid && f->vlan_id != vid))
+ continue;
if (f->is_local)
fdb_delete_local(br, p, f);
@@ -736,6 +756,12 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
struct net_bridge_fdb_entry *fdb;
bool modified = false;
+ /* If the port cannot learn allow only local and static entries */
+ if (!(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
+ !(source->state == BR_STATE_LEARNING ||
+ source->state == BR_STATE_FORWARDING))
+ return -EPERM;
+
fdb = fdb_find(head, addr, vid);
if (fdb == NULL) {
if (!(flags & NLM_F_CREATE))
@@ -867,13 +893,15 @@ out:
return err;
}
-static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vlan)
+static int fdb_delete_by_addr_and_port(struct net_bridge_port *p,
+ const u8 *addr, u16 vlan)
{
+ struct net_bridge *br = p->br;
struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)];
struct net_bridge_fdb_entry *fdb;
fdb = fdb_find(head, addr, vlan);
- if (!fdb)
+ if (!fdb || fdb->dst != p)
return -ENOENT;
fdb_delete(br, fdb);
@@ -886,7 +914,7 @@ static int __br_fdb_delete(struct net_bridge_port *p,
int err;
spin_lock_bh(&p->br->hash_lock);
- err = fdb_delete_by_addr(p->br, addr, vid);
+ err = fdb_delete_by_addr_and_port(p, addr, vid);
spin_unlock_bh(&p->br->hash_lock);
return err;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index e97572b5d..fa7bfced8 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -37,14 +37,30 @@ static inline int should_deliver(const struct net_bridge_port *p,
int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb)
{
- if (!is_skb_forwardable(skb->dev, skb)) {
- kfree_skb(skb);
- } else {
- skb_push(skb, ETH_HLEN);
- br_drop_fake_rtable(skb);
- dev_queue_xmit(skb);
+ if (!is_skb_forwardable(skb->dev, skb))
+ goto drop;
+
+ skb_push(skb, ETH_HLEN);
+ br_drop_fake_rtable(skb);
+ skb_sender_cpu_clear(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ (skb->protocol == htons(ETH_P_8021Q) ||
+ skb->protocol == htons(ETH_P_8021AD))) {
+ int depth;
+
+ if (!__vlan_get_protocol(skb, skb->protocol, &depth))
+ goto drop;
+
+ skb_set_network_header(skb, depth);
}
+ dev_queue_xmit(skb);
+
+ return 0;
+
+drop:
+ kfree_skb(skb);
return 0;
}
EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 1849d96b3..a538cb119 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -249,7 +249,7 @@ static void del_nbp(struct net_bridge_port *p)
list_del_rcu(&p->list);
nbp_vlan_flush(p);
- br_fdb_delete_by_port(br, p, 1);
+ br_fdb_delete_by_port(br, p, 0, 1);
nbp_update_port_count(br);
netdev_upper_dev_unlink(dev, br->dev);
@@ -278,7 +278,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
del_nbp(p);
}
- br_fdb_delete_by_port(br, NULL, 1);
+ br_fdb_delete_by_port(br, NULL, 0, 1);
br_vlan_flush(br);
del_timer_sync(&br->gc_timer);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index e29ad70b3..c94321955 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -323,6 +323,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
struct net_bridge_mdb_htable *mdb;
+ unsigned long now = jiffies;
int err;
mdb = mlock_dereference(br->mdb, br);
@@ -347,8 +348,9 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
if (unlikely(!p))
return -ENOMEM;
rcu_assign_pointer(*pp, p);
+ if (state == MDB_TEMPORARY)
+ mod_timer(&p->timer, now + br->multicast_membership_interval);
- br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
return 0;
}
@@ -371,6 +373,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
if (!p || p->br != br || p->state == BR_STATE_DISABLED)
return -EINVAL;
+ memset(&ip, 0, sizeof(ip));
ip.proto = entry->addr.proto;
if (ip.proto == htons(ETH_P_IP))
ip.u.ip4 = entry->addr.u.ip4;
@@ -417,20 +420,14 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
if (!netif_running(br->dev) || br->multicast_disabled)
return -EINVAL;
+ memset(&ip, 0, sizeof(ip));
ip.proto = entry->addr.proto;
- if (ip.proto == htons(ETH_P_IP)) {
- if (timer_pending(&br->ip4_other_query.timer))
- return -EBUSY;
-
+ if (ip.proto == htons(ETH_P_IP))
ip.u.ip4 = entry->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- } else {
- if (timer_pending(&br->ip6_other_query.timer))
- return -EBUSY;
-
+ else
ip.u.ip6 = entry->addr.u.ip6;
#endif
- }
spin_lock_bh(&br->multicast_lock);
mdb = mlock_dereference(br->mdb, br);
@@ -448,6 +445,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
if (p->port->state == BR_STATE_DISABLED)
goto unlock;
+ entry->state = p->state;
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index ff667e18b..1285eaf5d 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -37,6 +37,18 @@
static void br_multicast_start_querier(struct net_bridge *br,
struct bridge_mcast_own_query *query);
+static void br_multicast_add_router(struct net_bridge *br,
+ struct net_bridge_port *port);
+static void br_ip4_multicast_leave_group(struct net_bridge *br,
+ struct net_bridge_port *port,
+ __be32 group,
+ __u16 vid);
+#if IS_ENABLED(CONFIG_IPV6)
+static void br_ip6_multicast_leave_group(struct net_bridge *br,
+ struct net_bridge_port *port,
+ const struct in6_addr *group,
+ __u16 vid);
+#endif
unsigned int br_mdb_rehash_seq;
static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
@@ -936,6 +948,8 @@ void br_multicast_enable_port(struct net_bridge_port *port)
#if IS_ENABLED(CONFIG_IPV6)
br_multicast_enable(&port->ip6_own_query);
#endif
+ if (port->multicast_router == 2 && hlist_unhashed(&port->rlist))
+ br_multicast_add_router(br, port);
out:
spin_unlock(&br->multicast_lock);
@@ -975,9 +989,6 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
int err = 0;
__be32 group;
- if (!pskb_may_pull(skb, sizeof(*ih)))
- return -EINVAL;
-
ih = igmpv3_report_hdr(skb);
num = ntohs(ih->ngrec);
len = sizeof(*ih);
@@ -1009,9 +1020,15 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
continue;
}
- err = br_ip4_multicast_add_group(br, port, group, vid);
- if (err)
- break;
+ if ((type == IGMPV3_CHANGE_TO_INCLUDE ||
+ type == IGMPV3_MODE_IS_INCLUDE) &&
+ ntohs(grec->grec_nsrcs) == 0) {
+ br_ip4_multicast_leave_group(br, port, group, vid);
+ } else {
+ err = br_ip4_multicast_add_group(br, port, group, vid);
+ if (err)
+ break;
+ }
}
return err;
@@ -1070,10 +1087,17 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
continue;
}
- err = br_ip6_multicast_add_group(br, port, &grec->grec_mca,
- vid);
- if (err)
- break;
+ if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE ||
+ grec->grec_type == MLD2_MODE_IS_INCLUDE) &&
+ ntohs(*nsrcs) == 0) {
+ br_ip6_multicast_leave_group(br, port, &grec->grec_mca,
+ vid);
+ } else {
+ err = br_ip6_multicast_add_group(br, port,
+ &grec->grec_mca, vid);
+ if (!err)
+ break;
+ }
}
return err;
@@ -1247,25 +1271,14 @@ static int br_ip4_multicast_query(struct net_bridge *br,
max_delay = 10 * HZ;
group = 0;
}
- } else {
- if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) {
- err = -EINVAL;
- goto out;
- }
-
+ } else if (skb->len >= sizeof(*ih3)) {
ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs)
goto out;
max_delay = ih3->code ?
IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1;
- }
-
- /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer
- * all-systems destination addresses (224.0.0.1) for general queries
- */
- if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) {
- err = -EINVAL;
+ } else {
goto out;
}
@@ -1328,12 +1341,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
(port && port->state == BR_STATE_DISABLED))
goto out;
- /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
- if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
- err = -EINVAL;
- goto out;
- }
-
if (skb->len == sizeof(*mld)) {
if (!pskb_may_pull(skb, sizeof(*mld))) {
err = -EINVAL;
@@ -1357,14 +1364,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
is_general_query = group && ipv6_addr_any(group);
- /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer
- * all-nodes destination address (ff02::1) for general queries
- */
- if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) {
- err = -EINVAL;
- goto out;
- }
-
if (is_general_query) {
saddr.proto = htons(ETH_P_IPV6);
saddr.u.ip6 = ip6h->saddr;
@@ -1417,8 +1416,7 @@ br_multicast_leave_group(struct net_bridge *br,
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED) ||
- timer_pending(&other_query->timer))
+ (port && port->state == BR_STATE_DISABLED))
goto out;
mdb = mlock_dereference(br->mdb, br);
@@ -1426,6 +1424,31 @@ br_multicast_leave_group(struct net_bridge *br,
if (!mp)
goto out;
+ if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) {
+ struct net_bridge_port_group __rcu **pp;
+
+ for (pp = &mp->ports;
+ (p = mlock_dereference(*pp, br)) != NULL;
+ pp = &p->next) {
+ if (p->port != port)
+ continue;
+
+ rcu_assign_pointer(*pp, p->next);
+ hlist_del_init(&p->mglist);
+ del_timer(&p->timer);
+ call_rcu_bh(&p->rcu, br_multicast_free_pg);
+ br_mdb_notify(br->dev, port, group, RTM_DELMDB);
+
+ if (!mp->ports && !mp->mglist &&
+ netif_running(br->dev))
+ mod_timer(&mp->timer, jiffies);
+ }
+ goto out;
+ }
+
+ if (timer_pending(&other_query->timer))
+ goto out;
+
if (br->multicast_querier) {
__br_multicast_send_query(br, port, &mp->addr);
@@ -1451,28 +1474,6 @@ br_multicast_leave_group(struct net_bridge *br,
}
}
- if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) {
- struct net_bridge_port_group __rcu **pp;
-
- for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
- pp = &p->next) {
- if (p->port != port)
- continue;
-
- rcu_assign_pointer(*pp, p->next);
- hlist_del_init(&p->mglist);
- del_timer(&p->timer);
- call_rcu_bh(&p->rcu, br_multicast_free_pg);
- br_mdb_notify(br->dev, port, group, RTM_DELMDB);
-
- if (!mp->ports && !mp->mglist &&
- netif_running(br->dev))
- mod_timer(&mp->timer, jiffies);
- }
- goto out;
- }
-
now = jiffies;
time = now + br->multicast_last_member_count *
br->multicast_last_member_interval;
@@ -1556,74 +1557,22 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
struct sk_buff *skb,
u16 vid)
{
- struct sk_buff *skb2 = skb;
- const struct iphdr *iph;
+ struct sk_buff *skb_trimmed = NULL;
struct igmphdr *ih;
- unsigned int len;
- unsigned int offset;
int err;
- /* We treat OOM as packet loss for now. */
- if (!pskb_may_pull(skb, sizeof(*iph)))
- return -EINVAL;
-
- iph = ip_hdr(skb);
-
- if (iph->ihl < 5 || iph->version != 4)
- return -EINVAL;
-
- if (!pskb_may_pull(skb, ip_hdrlen(skb)))
- return -EINVAL;
-
- iph = ip_hdr(skb);
-
- if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
- return -EINVAL;
+ err = ip_mc_check_igmp(skb, &skb_trimmed);
- if (iph->protocol != IPPROTO_IGMP) {
- if (!ipv4_is_local_multicast(iph->daddr))
+ if (err == -ENOMSG) {
+ if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr))
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
return 0;
+ } else if (err < 0) {
+ return err;
}
- len = ntohs(iph->tot_len);
- if (skb->len < len || len < ip_hdrlen(skb))
- return -EINVAL;
-
- if (skb->len > len) {
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
- return -ENOMEM;
-
- err = pskb_trim_rcsum(skb2, len);
- if (err)
- goto err_out;
- }
-
- len -= ip_hdrlen(skb2);
- offset = skb_network_offset(skb2) + ip_hdrlen(skb2);
- __skb_pull(skb2, offset);
- skb_reset_transport_header(skb2);
-
- err = -EINVAL;
- if (!pskb_may_pull(skb2, sizeof(*ih)))
- goto out;
-
- switch (skb2->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!csum_fold(skb2->csum))
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb2->csum = 0;
- if (skb_checksum_complete(skb2))
- goto out;
- }
-
- err = 0;
-
BR_INPUT_SKB_CB(skb)->igmp = 1;
- ih = igmp_hdr(skb2);
+ ih = igmp_hdr(skb);
switch (ih->type) {
case IGMP_HOST_MEMBERSHIP_REPORT:
@@ -1632,21 +1581,19 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
err = br_ip4_multicast_add_group(br, port, ih->group, vid);
break;
case IGMPV3_HOST_MEMBERSHIP_REPORT:
- err = br_ip4_multicast_igmp3_report(br, port, skb2, vid);
+ err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid);
break;
case IGMP_HOST_MEMBERSHIP_QUERY:
- err = br_ip4_multicast_query(br, port, skb2, vid);
+ err = br_ip4_multicast_query(br, port, skb_trimmed, vid);
break;
case IGMP_HOST_LEAVE_MESSAGE:
br_ip4_multicast_leave_group(br, port, ih->group, vid);
break;
}
-out:
- __skb_push(skb2, offset);
-err_out:
- if (skb2 != skb)
- kfree_skb(skb2);
+ if (skb_trimmed && skb_trimmed != skb)
+ kfree_skb(skb_trimmed);
+
return err;
}
@@ -1656,138 +1603,42 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
struct sk_buff *skb,
u16 vid)
{
- struct sk_buff *skb2;
- const struct ipv6hdr *ip6h;
- u8 icmp6_type;
- u8 nexthdr;
- __be16 frag_off;
- unsigned int len;
- int offset;
+ struct sk_buff *skb_trimmed = NULL;
+ struct mld_msg *mld;
int err;
- if (!pskb_may_pull(skb, sizeof(*ip6h)))
- return -EINVAL;
-
- ip6h = ipv6_hdr(skb);
-
- /*
- * We're interested in MLD messages only.
- * - Version is 6
- * - MLD has always Router Alert hop-by-hop option
- * - But we do not support jumbrograms.
- */
- if (ip6h->version != 6)
- return 0;
-
- /* Prevent flooding this packet if there is no listener present */
- if (!ipv6_addr_is_ll_all_nodes(&ip6h->daddr))
- BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
-
- if (ip6h->nexthdr != IPPROTO_HOPOPTS ||
- ip6h->payload_len == 0)
- return 0;
-
- len = ntohs(ip6h->payload_len) + sizeof(*ip6h);
- if (skb->len < len)
- return -EINVAL;
-
- nexthdr = ip6h->nexthdr;
- offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr, &frag_off);
+ err = ipv6_mc_check_mld(skb, &skb_trimmed);
- if (offset < 0 || nexthdr != IPPROTO_ICMPV6)
+ if (err == -ENOMSG) {
+ if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
+ BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
return 0;
-
- /* Okay, we found ICMPv6 header */
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
- return -ENOMEM;
-
- err = -EINVAL;
- if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr)))
- goto out;
-
- len -= offset - skb_network_offset(skb2);
-
- __skb_pull(skb2, offset);
- skb_reset_transport_header(skb2);
- skb_postpull_rcsum(skb2, skb_network_header(skb2),
- skb_network_header_len(skb2));
-
- icmp6_type = icmp6_hdr(skb2)->icmp6_type;
-
- switch (icmp6_type) {
- case ICMPV6_MGM_QUERY:
- case ICMPV6_MGM_REPORT:
- case ICMPV6_MGM_REDUCTION:
- case ICMPV6_MLD2_REPORT:
- break;
- default:
- err = 0;
- goto out;
- }
-
- /* Okay, we found MLD message. Check further. */
- if (skb2->len > len) {
- err = pskb_trim_rcsum(skb2, len);
- if (err)
- goto out;
- err = -EINVAL;
- }
-
- ip6h = ipv6_hdr(skb2);
-
- switch (skb2->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len,
- IPPROTO_ICMPV6, skb2->csum))
- break;
- /*FALLTHROUGH*/
- case CHECKSUM_NONE:
- skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
- &ip6h->daddr,
- skb2->len,
- IPPROTO_ICMPV6, 0));
- if (__skb_checksum_complete(skb2))
- goto out;
+ } else if (err < 0) {
+ return err;
}
- err = 0;
-
BR_INPUT_SKB_CB(skb)->igmp = 1;
+ mld = (struct mld_msg *)skb_transport_header(skb);
- switch (icmp6_type) {
+ switch (mld->mld_type) {
case ICMPV6_MGM_REPORT:
- {
- struct mld_msg *mld;
- if (!pskb_may_pull(skb2, sizeof(*mld))) {
- err = -EINVAL;
- goto out;
- }
- mld = (struct mld_msg *)skb_transport_header(skb2);
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid);
break;
- }
case ICMPV6_MLD2_REPORT:
- err = br_ip6_multicast_mld2_report(br, port, skb2, vid);
+ err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid);
break;
case ICMPV6_MGM_QUERY:
- err = br_ip6_multicast_query(br, port, skb2, vid);
+ err = br_ip6_multicast_query(br, port, skb_trimmed, vid);
break;
case ICMPV6_MGM_REDUCTION:
- {
- struct mld_msg *mld;
- if (!pskb_may_pull(skb2, sizeof(*mld))) {
- err = -EINVAL;
- goto out;
- }
- mld = (struct mld_msg *)skb_transport_header(skb2);
br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid);
- }
+ break;
}
-out:
- kfree_skb(skb2);
+ if (skb_trimmed && skb_trimmed != skb)
+ kfree_skb(skb_trimmed);
+
return err;
}
#endif
@@ -1949,11 +1800,9 @@ out:
int br_multicast_set_router(struct net_bridge *br, unsigned long val)
{
- int err = -ENOENT;
+ int err = -EINVAL;
spin_lock_bh(&br->multicast_lock);
- if (!netif_running(br->dev))
- goto unlock;
switch (val) {
case 0:
@@ -1964,13 +1813,8 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
br->multicast_router = val;
err = 0;
break;
-
- default:
- err = -EINVAL;
- break;
}
-unlock:
spin_unlock_bh(&br->multicast_lock);
return err;
@@ -1979,11 +1823,9 @@ unlock:
int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
{
struct net_bridge *br = p->br;
- int err = -ENOENT;
+ int err = -EINVAL;
spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) || p->state == BR_STATE_DISABLED)
- goto unlock;
switch (val) {
case 0:
@@ -2005,13 +1847,8 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
br_multicast_add_router(br, p);
break;
-
- default:
- err = -EINVAL;
- break;
}
-unlock:
spin_unlock(&br->multicast_lock);
return err;
@@ -2116,15 +1953,11 @@ unlock:
int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
{
- int err = -ENOENT;
+ int err = -EINVAL;
u32 old;
struct net_bridge_mdb_htable *mdb;
spin_lock_bh(&br->multicast_lock);
- if (!netif_running(br->dev))
- goto unlock;
-
- err = -EINVAL;
if (!is_power_of_2(val))
goto unlock;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter_hooks.c
index 60ddfbeb4..c8b9bcfe9 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -34,6 +34,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/addrconf.h>
#include <net/route.h>
#include <net/netfilter/br_netfilter.h>
@@ -110,27 +111,24 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
/* largest possible L2 header, see br_nf_dev_queue_xmit() */
#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
struct brnf_frag_data {
char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
u8 encap_size;
u8 size;
+ u16 vlan_tci;
+ __be16 vlan_proto;
};
static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
#endif
-static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb)
+static void nf_bridge_info_free(struct sk_buff *skb)
{
- return skb->nf_bridge;
-}
-
-static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
-{
- struct net_bridge_port *port;
-
- port = br_port_get_rcu(dev);
- return port ? &port->br->fake_rtable : NULL;
+ if (skb->nf_bridge) {
+ nf_bridge_put(skb->nf_bridge);
+ skb->nf_bridge = NULL;
+ }
}
static inline struct net_device *bridge_parent(const struct net_device *dev)
@@ -141,15 +139,6 @@ static inline struct net_device *bridge_parent(const struct net_device *dev)
return port ? port->br->dev : NULL;
}
-static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
-{
- skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
- if (likely(skb->nf_bridge))
- atomic_set(&(skb->nf_bridge->use), 1);
-
- return skb->nf_bridge;
-}
-
static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
@@ -167,7 +156,7 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
return nf_bridge;
}
-static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
+unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
{
switch (skb->protocol) {
case __cpu_to_be16(ETH_P_8021Q):
@@ -179,14 +168,6 @@ static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
}
}
-static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
-{
- unsigned int len = nf_bridge_encap_header_len(skb);
-
- skb_push(skb, len);
- skb->network_header -= len;
-}
-
static inline void nf_bridge_pull_encap_header(struct sk_buff *skb)
{
unsigned int len = nf_bridge_encap_header_len(skb);
@@ -208,7 +189,7 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
* expected format
*/
-static int br_parse_ip_options(struct sk_buff *skb)
+static int br_validate_ipv4(struct sk_buff *skb)
{
const struct iphdr *iph;
struct net_device *dev = skb->dev;
@@ -256,7 +237,7 @@ drop:
return -1;
}
-static void nf_bridge_update_protocol(struct sk_buff *skb)
+void nf_bridge_update_protocol(struct sk_buff *skb)
{
switch (skb->nf_bridge->orig_proto) {
case BRNF_PROTO_8021Q:
@@ -270,43 +251,12 @@ static void nf_bridge_update_protocol(struct sk_buff *skb)
}
}
-/* PF_BRIDGE/PRE_ROUTING *********************************************/
-/* Undo the changes made for ip6tables PREROUTING and continue the
- * bridge PRE_ROUTING hook. */
-static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
-{
- struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
- struct rtable *rt;
-
- if (nf_bridge->pkt_otherhost) {
- skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->pkt_otherhost = false;
- }
- nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
-
- rt = bridge_parent_rtable(nf_bridge->physindev);
- if (!rt) {
- kfree_skb(skb);
- return 0;
- }
- skb_dst_set_noref(skb, &rt->dst);
-
- skb->dev = nf_bridge->physindev;
- nf_bridge_update_protocol(skb);
- nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
- skb->dev, NULL,
- br_handle_frame_finish, 1);
-
- return 0;
-}
-
/* Obtain the correct destination MAC address, while preserving the original
* source MAC address. If we already know this address, we just copy it. If we
* don't, we use the neighbour framework to find out. In both cases, we make
* sure that br_handle_frame_finish() is called afterwards.
*/
-static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
+int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
{
struct neighbour *neigh;
struct dst_entry *dst;
@@ -346,8 +296,9 @@ free_skb:
return 0;
}
-static bool daddr_was_changed(const struct sk_buff *skb,
- const struct nf_bridge_info *nf_bridge)
+static inline bool
+br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
+ const struct nf_bridge_info *nf_bridge)
{
return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
}
@@ -398,17 +349,15 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
int err;
- int frag_max_size;
- frag_max_size = IPCB(skb)->frag_max_size;
- BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
+ nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
- nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
- if (daddr_was_changed(skb, nf_bridge)) {
+ nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+ if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -486,7 +435,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
}
/* Some common code for IPv4/IPv6 */
-static struct net_device *setup_pre_routing(struct sk_buff *skb)
+struct net_device *setup_pre_routing(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
@@ -509,106 +458,6 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
return skb->dev;
}
-/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
-static int check_hbh_len(struct sk_buff *skb)
-{
- unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
- u32 pkt_len;
- const unsigned char *nh = skb_network_header(skb);
- int off = raw - nh;
- int len = (raw[1] + 1) << 3;
-
- if ((raw + len) - skb->data > skb_headlen(skb))
- goto bad;
-
- off += 2;
- len -= 2;
-
- while (len > 0) {
- int optlen = nh[off + 1] + 2;
-
- switch (nh[off]) {
- case IPV6_TLV_PAD1:
- optlen = 1;
- break;
-
- case IPV6_TLV_PADN:
- break;
-
- case IPV6_TLV_JUMBO:
- if (nh[off + 1] != 4 || (off & 3) != 2)
- goto bad;
- pkt_len = ntohl(*(__be32 *) (nh + off + 2));
- if (pkt_len <= IPV6_MAXPLEN ||
- ipv6_hdr(skb)->payload_len)
- goto bad;
- if (pkt_len > skb->len - sizeof(struct ipv6hdr))
- goto bad;
- if (pskb_trim_rcsum(skb,
- pkt_len + sizeof(struct ipv6hdr)))
- goto bad;
- nh = skb_network_header(skb);
- break;
- default:
- if (optlen > len)
- goto bad;
- break;
- }
- off += optlen;
- len -= optlen;
- }
- if (len == 0)
- return 0;
-bad:
- return -1;
-
-}
-
-/* Replicate the checks that IPv6 does on packet reception and pass the packet
- * to ip6tables, which doesn't support NAT, so things are fairly simple. */
-static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- const struct ipv6hdr *hdr;
- u32 pkt_len;
-
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_DROP;
-
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- return NF_DROP;
-
- hdr = ipv6_hdr(skb);
-
- if (hdr->version != 6)
- return NF_DROP;
-
- pkt_len = ntohs(hdr->payload_len);
-
- if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
- if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
- return NF_DROP;
- if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
- return NF_DROP;
- }
- if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
- return NF_DROP;
-
- nf_bridge_put(skb->nf_bridge);
- if (!nf_bridge_alloc(skb))
- return NF_DROP;
- if (!setup_pre_routing(skb))
- return NF_DROP;
-
- skb->protocol = htons(ETH_P_IPV6);
- NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb,
- skb->dev, NULL,
- br_nf_pre_routing_finish_ipv6);
-
- return NF_STOLEN;
-}
-
/* Direct IPv6 traffic to br_nf_pre_routing_ipv6.
* Replicate the checks that IPv4 does on packet reception.
* Set skb->dev to the bridge device (i.e. parent of the
@@ -648,7 +497,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
nf_bridge_pull_encap_header_rcsum(skb);
- if (br_parse_ip_options(skb))
+ if (br_validate_ipv4(skb))
return NF_DROP;
nf_bridge_put(skb->nf_bridge);
@@ -692,12 +541,12 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
struct net_device *in;
if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
- int frag_max_size;
- if (skb->protocol == htons(ETH_P_IP)) {
- frag_max_size = IPCB(skb)->frag_max_size;
- BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
- }
+ if (skb->protocol == htons(ETH_P_IP))
+ nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
in = nf_bridge->physindev;
if (nf_bridge->pkt_otherhost) {
@@ -760,12 +609,15 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
}
if (pf == NFPROTO_IPV4) {
- int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size;
-
- if (br_parse_ip_options(skb))
+ if (br_validate_ipv4(skb))
return NF_DROP;
+ IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
+ }
- IPCB(skb)->frag_max_size = frag_max;
+ if (pf == NFPROTO_IPV6) {
+ if (br_validate_ipv6(skb))
+ return NF_DROP;
+ IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
nf_bridge->physoutdev = skb->dev;
@@ -815,7 +667,7 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
return NF_STOLEN;
}
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
{
struct brnf_frag_data *data;
@@ -829,56 +681,120 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
return 0;
}
+ if (data->vlan_tci) {
+ skb->vlan_tci = data->vlan_tci;
+ skb->vlan_proto = data->vlan_proto;
+ }
+
skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
__skb_push(skb, data->encap_size);
+ nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(sk, skb);
}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *))
+{
+ unsigned int mtu = ip_skb_dst_mtu(skb);
+ struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt = skb_rtable(skb);
+ struct net_device *dev = rt->dst.dev;
+
+ if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
+ (IPCB(skb)->frag_max_size &&
+ IPCB(skb)->frag_max_size > mtu))) {
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ return ip_do_fragment(sk, skb, output);
+}
+#endif
+
+static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
+{
+ if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
+ return PPPOE_SES_HLEN;
+ return 0;
+}
static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
{
- int ret;
- int frag_max_size;
+ struct nf_bridge_info *nf_bridge;
unsigned int mtu_reserved;
- if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP))
+ mtu_reserved = nf_bridge_mtu_reduction(skb);
+
+ if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
+ nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(sk, skb);
+ }
- mtu_reserved = nf_bridge_mtu_reduction(skb);
+ nf_bridge = nf_bridge_info_get(skb);
+
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
- if (skb->len + mtu_reserved > skb->dev->mtu) {
+ if (skb->protocol == htons(ETH_P_IP)) {
struct brnf_frag_data *data;
- frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
- if (br_parse_ip_options(skb))
- /* Drop invalid packet */
- return NF_DROP;
- IPCB(skb)->frag_max_size = frag_max_size;
+ if (br_validate_ipv4(skb))
+ goto drop;
+
+ IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
nf_bridge_update_protocol(skb);
data = this_cpu_ptr(&brnf_frag_data_storage);
+
+ data->vlan_tci = skb->vlan_tci;
+ data->vlan_proto = skb->vlan_proto;
data->encap_size = nf_bridge_encap_header_len(skb);
data->size = ETH_HLEN + data->encap_size;
skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size);
- ret = ip_fragment(sk, skb, br_nf_push_frag_xmit);
- } else {
- ret = br_dev_queue_push_xmit(sk, skb);
+ return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit);
}
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
+ struct brnf_frag_data *data;
- return ret;
-}
-#else
-static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
-{
- return br_dev_queue_push_xmit(sk, skb);
-}
+ if (br_validate_ipv6(skb))
+ goto drop;
+
+ IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
+
+ nf_bridge_update_protocol(skb);
+
+ data = this_cpu_ptr(&brnf_frag_data_storage);
+ data->encap_size = nf_bridge_encap_header_len(skb);
+ data->size = ETH_HLEN + data->encap_size;
+
+ skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
+ data->size);
+
+ if (v6ops)
+ return v6ops->fragment(sk, skb, br_nf_push_frag_xmit);
+
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
#endif
+ nf_bridge_info_free(skb);
+ return br_dev_queue_push_xmit(sk, skb);
+ drop:
+ kfree_skb(skb);
+ return 0;
+}
/* PF_BRIDGE/POST_ROUTING ********************************************/
static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
@@ -964,6 +880,8 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
nf_bridge->neigh_header,
ETH_HLEN - ETH_ALEN);
skb->dev = nf_bridge->physindev;
+
+ nf_bridge->physoutdev = NULL;
br_handle_frame_finish(NULL, skb);
}
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
new file mode 100644
index 000000000..13b7d1e3d
--- /dev/null
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -0,0 +1,245 @@
+/*
+ * Handle firewalling
+ * Linux ethernet bridge
+ *
+ * Authors:
+ * Lennert Buytenhek <buytenh@gnu.org>
+ * Bart De Schuymer <bdschuym@pandora.be>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Lennert dedicates this file to Kerstin Wurdinger.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_arp.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/route.h>
+#include <net/netfilter/br_netfilter.h>
+
+#include <asm/uaccess.h>
+#include "br_private.h"
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+/* We only check the length. A bridge shouldn't do any hop-by-hop stuff
+ * anyway
+ */
+static int br_nf_check_hbh_len(struct sk_buff *skb)
+{
+ unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
+ u32 pkt_len;
+ const unsigned char *nh = skb_network_header(skb);
+ int off = raw - nh;
+ int len = (raw[1] + 1) << 3;
+
+ if ((raw + len) - skb->data > skb_headlen(skb))
+ goto bad;
+
+ off += 2;
+ len -= 2;
+
+ while (len > 0) {
+ int optlen = nh[off + 1] + 2;
+
+ switch (nh[off]) {
+ case IPV6_TLV_PAD1:
+ optlen = 1;
+ break;
+
+ case IPV6_TLV_PADN:
+ break;
+
+ case IPV6_TLV_JUMBO:
+ if (nh[off + 1] != 4 || (off & 3) != 2)
+ goto bad;
+ pkt_len = ntohl(*(__be32 *)(nh + off + 2));
+ if (pkt_len <= IPV6_MAXPLEN ||
+ ipv6_hdr(skb)->payload_len)
+ goto bad;
+ if (pkt_len > skb->len - sizeof(struct ipv6hdr))
+ goto bad;
+ if (pskb_trim_rcsum(skb,
+ pkt_len + sizeof(struct ipv6hdr)))
+ goto bad;
+ nh = skb_network_header(skb);
+ break;
+ default:
+ if (optlen > len)
+ goto bad;
+ break;
+ }
+ off += optlen;
+ len -= optlen;
+ }
+ if (len == 0)
+ return 0;
+bad:
+ return -1;
+}
+
+int br_validate_ipv6(struct sk_buff *skb)
+{
+ const struct ipv6hdr *hdr;
+ struct net_device *dev = skb->dev;
+ struct inet6_dev *idev = __in6_dev_get(skb->dev);
+ u32 pkt_len;
+ u8 ip6h_len = sizeof(struct ipv6hdr);
+
+ if (!pskb_may_pull(skb, ip6h_len))
+ goto inhdr_error;
+
+ if (skb->len < ip6h_len)
+ goto drop;
+
+ hdr = ipv6_hdr(skb);
+
+ if (hdr->version != 6)
+ goto inhdr_error;
+
+ pkt_len = ntohs(hdr->payload_len);
+
+ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
+ if (pkt_len + ip6h_len > skb->len) {
+ IP6_INC_STATS_BH(dev_net(dev), idev,
+ IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
+ }
+ if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
+ IP6_INC_STATS_BH(dev_net(dev), idev,
+ IPSTATS_MIB_INDISCARDS);
+ goto drop;
+ }
+ }
+ if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb))
+ goto drop;
+
+ memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+ /* No IP options in IPv6 header; however it should be
+ * checked if some next headers need special treatment
+ */
+ return 0;
+
+inhdr_error:
+ IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS);
+drop:
+ return -1;
+}
+
+static inline bool
+br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb,
+ const struct nf_bridge_info *nf_bridge)
+{
+ return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr,
+ sizeof(ipv6_hdr(skb)->daddr)) != 0;
+}
+
+/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables
+ * PREROUTING and continue the bridge PRE_ROUTING hook. See comment
+ * for br_nf_pre_routing_finish(), same logic is used here but
+ * equivalent IPv6 function ip6_route_input() called indirectly.
+ */
+static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
+{
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ struct rtable *rt;
+ struct net_device *dev = skb->dev;
+ const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
+
+ nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
+
+ if (nf_bridge->pkt_otherhost) {
+ skb->pkt_type = PACKET_OTHERHOST;
+ nf_bridge->pkt_otherhost = false;
+ }
+ nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+ if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) {
+ skb_dst_drop(skb);
+ v6ops->route_input(skb);
+
+ if (skb_dst(skb)->error) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ if (skb_dst(skb)->dev == dev) {
+ skb->dev = nf_bridge->physindev;
+ nf_bridge_update_protocol(skb);
+ nf_bridge_push_encap_header(skb);
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
+ sk, skb, skb->dev, NULL,
+ br_nf_pre_routing_finish_bridge,
+ 1);
+ return 0;
+ }
+ ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
+ skb->pkt_type = PACKET_HOST;
+ } else {
+ rt = bridge_parent_rtable(nf_bridge->physindev);
+ if (!rt) {
+ kfree_skb(skb);
+ return 0;
+ }
+ skb_dst_set_noref(skb, &rt->dst);
+ }
+
+ skb->dev = nf_bridge->physindev;
+ nf_bridge_update_protocol(skb);
+ nf_bridge_push_encap_header(skb);
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
+ skb->dev, NULL,
+ br_handle_frame_finish, 1);
+
+ return 0;
+}
+
+/* Replicate the checks that IPv6 does on packet reception and pass the packet
+ * to ip6tables.
+ */
+unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_bridge_info *nf_bridge;
+
+ if (br_validate_ipv6(skb))
+ return NF_DROP;
+
+ nf_bridge_put(skb->nf_bridge);
+ if (!nf_bridge_alloc(skb))
+ return NF_DROP;
+ if (!setup_pre_routing(skb))
+ return NF_DROP;
+
+ nf_bridge = nf_bridge_info_get(skb);
+ nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
+
+ skb->protocol = htons(ETH_P_IPV6);
+ NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb,
+ skb->dev, NULL,
+ br_nf_pre_routing_finish_ipv6);
+
+ return NF_STOLEN;
+}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4b5c23699..4d74a0639 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -112,6 +112,8 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */
+ nla_total_size(1) /* IFLA_BRPORT_LEARNING */
+ nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */
+ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
+ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
+ 0;
}
@@ -457,6 +459,8 @@ static int br_afspec(struct net_bridge *br,
if (nla_len(attr) != sizeof(struct bridge_vlan_info))
return -EINVAL;
vinfo = nla_data(attr);
+ if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
+ return -EINVAL;
if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
if (vinfo_start)
return -EINVAL;
@@ -504,6 +508,8 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_FAST_LEAVE]= { .type = NLA_U8 },
[IFLA_BRPORT_LEARNING] = { .type = NLA_U8 },
[IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
+ [IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 },
+ [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 },
};
/* Change the state of the port and notify spanning tree */
@@ -586,7 +592,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
struct nlattr *afspec;
struct net_bridge_port *p;
struct nlattr *tb[IFLA_BRPORT_MAX + 1];
- int err = 0, ret_offload = 0;
+ int err = 0;
protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
@@ -628,16 +634,6 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
afspec, RTM_SETLINK);
}
- if (p && !(flags & BRIDGE_FLAGS_SELF)) {
- /* set bridge attributes in hardware if supported
- */
- ret_offload = netdev_switch_port_bridge_setlink(dev, nlh,
- flags);
- if (ret_offload && ret_offload != -EOPNOTSUPP)
- br_warn(p->br, "error setting attrs on port %u(%s)\n",
- (unsigned int)p->port_no, p->dev->name);
- }
-
if (err == 0)
br_ifinfo_notify(RTM_NEWLINK, p);
out:
@@ -649,7 +645,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
{
struct nlattr *afspec;
struct net_bridge_port *p;
- int err = 0, ret_offload = 0;
+ int err = 0;
afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (!afspec)
@@ -668,16 +664,6 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
*/
br_ifinfo_notify(RTM_NEWLINK, p);
- if (p && !(flags & BRIDGE_FLAGS_SELF)) {
- /* del bridge attributes in hardware
- */
- ret_offload = netdev_switch_port_bridge_dellink(dev, nlh,
- flags);
- if (ret_offload && ret_offload != -EOPNOTSUPP)
- br_warn(p->br, "error deleting attrs on port %u (%s)\n",
- (unsigned int)p->port_no, p->dev->name);
- }
-
return err;
}
static int br_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -711,9 +697,17 @@ static int br_port_slave_changelink(struct net_device *brdev,
struct nlattr *tb[],
struct nlattr *data[])
{
+ struct net_bridge *br = netdev_priv(brdev);
+ int ret;
+
if (!data)
return 0;
- return br_setport(br_port_get_rtnl(dev), data);
+
+ spin_lock_bh(&br->lock);
+ ret = br_setport(br_port_get_rtnl(dev), data);
+ spin_unlock_bh(&br->lock);
+
+ return ret;
}
static int br_port_fill_slave_info(struct sk_buff *skb,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3362c2940..8b21146b2 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -18,6 +18,7 @@
#include <linux/netpoll.h>
#include <linux/u64_stats_sync.h>
#include <net/route.h>
+#include <net/ip6_fib.h>
#include <linux/if_vlan.h>
#define BR_HASH_BITS 8
@@ -33,8 +34,8 @@
/* Control of forwarding link local multicast */
#define BR_GROUPFWD_DEFAULT 0
-/* Don't allow forwarding control protocols like STP and LLDP */
-#define BR_GROUPFWD_RESTRICTED 0x4007u
+/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */
+#define BR_GROUPFWD_RESTRICTED 0x0007u
/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
#define BR_GROUPFWD_8021AD 0xB801u
@@ -214,7 +215,10 @@ struct net_bridge
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- struct rtable fake_rtable;
+ union {
+ struct rtable fake_rtable;
+ struct rt6_info fake_rt6_info;
+ };
bool nf_call_iptables;
bool nf_call_ip6tables;
bool nf_call_arptables;
@@ -304,7 +308,6 @@ struct br_input_skb_cb {
int mrouters_only;
#endif
- u16 frag_max_size;
bool proxyarp_replied;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
@@ -384,7 +387,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr);
void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
void br_fdb_cleanup(unsigned long arg);
void br_fdb_delete_by_port(struct net_bridge *br,
- const struct net_bridge_port *p, int do_all);
+ const struct net_bridge_port *p, u16 vid, int do_all);
struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
const unsigned char *addr, __u16 vid);
int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index fb3ebe615..ed74ffaa8 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -39,10 +39,14 @@ void br_log_state(const struct net_bridge_port *p)
void br_set_state(struct net_bridge_port *p, unsigned int state)
{
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_PORT_STP_STATE,
+ .u.stp_state = state,
+ };
int err;
p->state = state;
- err = netdev_switch_port_stp_update(p->dev, state);
+ err = switchdev_port_attr_set(p->dev, &attr);
if (err && err != -EOPNOTSUPP)
br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
(unsigned int) p->port_no, p->dev->name);
@@ -205,8 +209,9 @@ void br_transmit_config(struct net_bridge_port *p)
br_send_config_bpdu(p, &bpdu);
p->topology_change_ack = 0;
p->config_pending = 0;
- mod_timer(&p->hold_timer,
- round_jiffies(jiffies + BR_HOLD_TIME));
+ if (p->br->stp_enabled == BR_KERNEL_STP)
+ mod_timer(&p->hold_timer,
+ round_jiffies(jiffies + BR_HOLD_TIME));
}
}
@@ -424,7 +429,6 @@ static void br_make_forwarding(struct net_bridge_port *p)
else
br_set_state(p, BR_STATE_LEARNING);
- br_multicast_enable_port(p);
br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
@@ -458,6 +462,12 @@ void br_port_state_selection(struct net_bridge *br)
}
}
+ if (p->state != BR_STATE_BLOCKING)
+ br_multicast_enable_port(p);
+ /* Multicast is not disabled for the port when it goes in
+ * blocking state because the timers will expire and stop by
+ * themselves without sending more queries.
+ */
if (p->state == BR_STATE_FORWARDING)
++liveports;
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 7832d07f4..4ca449a16 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -48,7 +48,8 @@ void br_stp_enable_bridge(struct net_bridge *br)
struct net_bridge_port *p;
spin_lock_bh(&br->lock);
- mod_timer(&br->hello_timer, jiffies + br->hello_time);
+ if (br->stp_enabled == BR_KERNEL_STP)
+ mod_timer(&br->hello_timer, jiffies + br->hello_time);
mod_timer(&br->gc_timer, jiffies + HZ/10);
br_config_bpdu_generation(br);
@@ -111,7 +112,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
del_timer(&p->forward_delay_timer);
del_timer(&p->hold_timer);
- br_fdb_delete_by_port(br, p, 0);
+ br_fdb_delete_by_port(br, p, 0, 0);
br_multicast_disable_port(p);
br_configuration_update(br);
@@ -127,6 +128,7 @@ static void br_stp_start(struct net_bridge *br)
int r;
char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
char *envp[] = { NULL };
+ struct net_bridge_port *p;
r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
@@ -140,6 +142,10 @@ static void br_stp_start(struct net_bridge *br)
if (r == 0) {
br->stp_enabled = BR_USER_STP;
br_debug(br, "userspace STP started\n");
+ /* Stop hello and hold timers */
+ del_timer(&br->hello_timer);
+ list_for_each_entry(p, &br->port_list, list)
+ del_timer(&p->hold_timer);
} else {
br->stp_enabled = BR_KERNEL_STP;
br_debug(br, "using kernel STP\n");
@@ -156,12 +162,17 @@ static void br_stp_stop(struct net_bridge *br)
int r;
char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL };
char *envp[] = { NULL };
+ struct net_bridge_port *p;
if (br->stp_enabled == BR_USER_STP) {
r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
br_info(br, "userspace STP stopped, return code %d\n", r);
/* To start timers on any ports left in blocking */
+ mod_timer(&br->hello_timer, jiffies + br->hello_time);
+ list_for_each_entry(p, &br->port_list, list)
+ mod_timer(&p->hold_timer,
+ round_jiffies(jiffies + BR_HOLD_TIME));
spin_lock_bh(&br->lock);
br_port_state_selection(br);
spin_unlock_bh(&br->lock);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 7caf7fae2..5f0f5af0e 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -40,7 +40,9 @@ static void br_hello_timer_expired(unsigned long arg)
if (br->dev->flags & IFF_UP) {
br_config_bpdu_generation(br);
- mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time));
+ if (br->stp_enabled != BR_USER_STP)
+ mod_timer(&br->hello_timer,
+ round_jiffies(jiffies + br->hello_time));
}
spin_unlock(&br->lock);
}
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 4905845a9..efe415ad8 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -160,7 +160,7 @@ static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
static int store_flush(struct net_bridge_port *p, unsigned long v)
{
- br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry
+ br_fdb_delete_by_port(p->br, p, 0, 0); // Don't delete local entry
return 0;
}
static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 13013fe8d..0d41f8183 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -2,6 +2,7 @@
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
+#include <net/switchdev.h>
#include "br_private.h"
@@ -36,6 +37,36 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)
clear_bit(vid, v->untagged_bitmap);
}
+static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
+ u16 vid, u16 flags)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ int err;
+
+ /* If driver uses VLAN ndo ops, use 8021q to install vid
+ * on device, otherwise try switchdev ops to install vid.
+ */
+
+ if (ops->ndo_vlan_rx_add_vid) {
+ err = vlan_vid_add(dev, br->vlan_proto, vid);
+ } else {
+ struct switchdev_obj vlan_obj = {
+ .id = SWITCHDEV_OBJ_PORT_VLAN,
+ .u.vlan = {
+ .flags = flags,
+ .vid_begin = vid,
+ .vid_end = vid,
+ },
+ };
+
+ err = switchdev_port_obj_add(dev, &vlan_obj);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ }
+
+ return err;
+}
+
static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
{
struct net_bridge_port *p = NULL;
@@ -62,7 +93,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
* This ensures tagged traffic enters the bridge when
* promiscuous mode is disabled by br_manage_promisc().
*/
- err = vlan_vid_add(dev, br->vlan_proto, vid);
+ err = __vlan_vid_add(dev, br, vid, flags);
if (err)
return err;
}
@@ -86,6 +117,30 @@ out_filt:
return err;
}
+static void __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
+ u16 vid)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ /* If driver uses VLAN ndo ops, use 8021q to delete vid
+ * on device, otherwise try switchdev ops to delete vid.
+ */
+
+ if (ops->ndo_vlan_rx_kill_vid) {
+ vlan_vid_del(dev, br->vlan_proto, vid);
+ } else {
+ struct switchdev_obj vlan_obj = {
+ .id = SWITCHDEV_OBJ_PORT_VLAN,
+ .u.vlan = {
+ .vid_begin = vid,
+ .vid_end = vid,
+ },
+ };
+
+ switchdev_port_obj_del(dev, &vlan_obj);
+ }
+}
+
static int __vlan_del(struct net_port_vlans *v, u16 vid)
{
if (!test_bit(vid, v->vlan_bitmap))
@@ -96,7 +151,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
if (v->port_idx) {
struct net_bridge_port *p = v->parent.port;
- vlan_vid_del(p->dev, p->br->vlan_proto, vid);
+ __vlan_vid_del(p->dev, p->br, vid);
}
clear_bit(vid, v->vlan_bitmap);
@@ -686,6 +741,7 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
return -EINVAL;
br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid);
+ br_fdb_delete_by_port(port->br, port, vid, 0);
return __vlan_del(pv, vid);
}
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 071d87214..0c4057006 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -164,8 +164,10 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
!(info->bitmask & EBT_STP_MASK))
return -EINVAL;
/* Make sure the match only receives stp frames */
- if (!ether_addr_equal(e->destmac, bridge_ula) ||
- !ether_addr_equal(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC))
+ if (!par->nft_compat &&
+ (!ether_addr_equal(e->destmac, bridge_ula) ||
+ !ether_addr_equal(e->destmsk, msk) ||
+ !(e->bitmask & EBT_DESTMAC)))
return -EINVAL;
return 0;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 91180a7fc..18ca4b24c 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -6,7 +6,7 @@
*
* ebtables.c,v 2.0, July, 2002
*
- * This code is stongly inspired on the iptables code which is
+ * This code is strongly inspired by the iptables code which is
* Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
*
* This program is free software; you can redistribute it and/or
@@ -139,7 +139,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
ethproto = h->h_proto;
if (e->bitmask & EBT_802_3) {
- if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO))
+ if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO))
return 1;
} else if (!(e->bitmask & EBT_NOPROTO) &&
FWINV2(e->ethproto != ethproto, EBT_IPROTO))
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 112ad7848..cc8589191 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -121,12 +121,13 @@ static void caif_flow_ctrl(struct sock *sk, int mode)
* Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are
* not dropped, but CAIF is sending flow off instead.
*/
-static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static void caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int err;
unsigned long flags;
struct sk_buff_head *list = &sk->sk_receive_queue;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+ bool queued = false;
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned int)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
@@ -139,7 +140,8 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
err = sk_filter(sk, skb);
if (err)
- return err;
+ goto out;
+
if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) {
set_rx_flow_off(cf_sk);
net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n");
@@ -147,21 +149,16 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
skb->dev = NULL;
skb_set_owner_r(skb, sk);
- /* Cache the SKB length before we tack it onto the receive
- * queue. Once it is added it no longer belongs to us and
- * may be freed by other threads of control pulling packets
- * from the queue.
- */
spin_lock_irqsave(&list->lock, flags);
- if (!sock_flag(sk, SOCK_DEAD))
+ queued = !sock_flag(sk, SOCK_DEAD);
+ if (queued)
__skb_queue_tail(list, skb);
spin_unlock_irqrestore(&list->lock, flags);
-
- if (!sock_flag(sk, SOCK_DEAD))
+out:
+ if (queued)
sk->sk_data_ready(sk);
else
kfree_skb(skb);
- return 0;
}
/* Packet Receive Callback function called from CAIF Stack */
@@ -1055,7 +1052,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
* is really not used at all in the net/core or socket.c but the
* initialization makes sure that sock->state is not uninitialized.
*/
- sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot);
+ sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 62c635f2b..166d43619 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -181,7 +181,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
sock->ops = cp->ops;
- sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot);
+ sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot, kern);
if (!sk) {
err = -ENOMEM;
goto errout;
diff --git a/net/can/gw.c b/net/can/gw.c
index a6f448e18..455168718 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -110,6 +110,7 @@ struct cf_mod {
void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor);
void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8);
} csumfunc;
+ u32 uid;
};
@@ -548,6 +549,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
goto cancel;
}
+ if (gwj->mod.uid) {
+ if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0)
+ goto cancel;
+ }
+
if (gwj->mod.csumfunc.crc8) {
if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN,
&gwj->mod.csum.crc8) < 0)
@@ -619,6 +625,7 @@ static const struct nla_policy cgw_policy[CGW_MAX+1] = {
[CGW_DST_IF] = { .type = NLA_U32 },
[CGW_FILTER] = { .len = sizeof(struct can_filter) },
[CGW_LIM_HOPS] = { .type = NLA_U8 },
+ [CGW_MOD_UID] = { .type = NLA_U32 },
};
/* check for common and gwtype specific attributes */
@@ -761,6 +768,10 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
else
mod->csumfunc.xor = cgw_csum_xor_neg;
}
+
+ if (tb[CGW_MOD_UID]) {
+ nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32));
+ }
}
if (gwtype == CGW_TYPE_CAN_CAN) {
@@ -802,6 +813,8 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct rtcanmsg *r;
struct cgw_job *gwj;
+ struct cf_mod mod;
+ struct can_can_gw ccgw;
u8 limhops = 0;
int err = 0;
@@ -819,6 +832,36 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
if (r->gwtype != CGW_TYPE_CAN_CAN)
return -EINVAL;
+ err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops);
+ if (err < 0)
+ return err;
+
+ if (mod.uid) {
+
+ ASSERT_RTNL();
+
+ /* check for updating an existing job with identical uid */
+ hlist_for_each_entry(gwj, &cgw_list, list) {
+
+ if (gwj->mod.uid != mod.uid)
+ continue;
+
+ /* interfaces & filters must be identical */
+ if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
+ return -EINVAL;
+
+ /* update modifications with disabled softirq & quit */
+ local_bh_disable();
+ memcpy(&gwj->mod, &mod, sizeof(mod));
+ local_bh_enable();
+ return 0;
+ }
+ }
+
+ /* ifindex == 0 is not allowed for job creation */
+ if (!ccgw.src_idx || !ccgw.dst_idx)
+ return -ENODEV;
+
gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL);
if (!gwj)
return -ENOMEM;
@@ -828,18 +871,14 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
gwj->deleted_frames = 0;
gwj->flags = r->flags;
gwj->gwtype = r->gwtype;
+ gwj->limit_hops = limhops;
- err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw,
- &limhops);
- if (err < 0)
- goto out;
+ /* insert already parsed information */
+ memcpy(&gwj->mod, &mod, sizeof(mod));
+ memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw));
err = -ENODEV;
- /* ifindex == 0 is not allowed for job creation */
- if (!gwj->ccgw.src_idx || !gwj->ccgw.dst_idx)
- goto out;
-
gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx);
if (!gwj->src.dev)
@@ -856,8 +895,6 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
if (gwj->dst.dev->type != ARPHRD_CAN)
goto out;
- gwj->limit_hops = limhops;
-
ASSERT_RTNL();
err = cgw_register_filter(gwj);
@@ -931,8 +968,15 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
if (gwj->limit_hops != limhops)
continue;
- if (memcmp(&gwj->mod, &mod, sizeof(mod)))
- continue;
+ /* we have a match when uid is enabled and identical */
+ if (gwj->mod.uid || mod.uid) {
+ if (gwj->mod.uid != mod.uid)
+ continue;
+ } else {
+ /* no uid => check for identical modifications */
+ if (memcmp(&gwj->mod, &mod, sizeof(mod)))
+ continue;
+ }
/* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */
if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 79e8f71ae..f30329f72 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -9,6 +9,7 @@
#include <keys/ceph-type.h>
#include <linux/module.h>
#include <linux/mount.h>
+#include <linux/nsproxy.h>
#include <linux/parser.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
@@ -16,8 +17,6 @@
#include <linux/statfs.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
#include <linux/ceph/ceph_features.h>
@@ -131,6 +130,13 @@ int ceph_compare_options(struct ceph_options *new_opt,
int i;
int ret;
+ /*
+ * Don't bother comparing options if network namespaces don't
+ * match.
+ */
+ if (!net_eq(current->nsproxy->net_ns, read_pnet(&client->msgr.net)))
+ return -1;
+
ret = memcmp(opt1, opt2, ofs);
if (ret)
return ret;
@@ -335,9 +341,6 @@ ceph_parse_options(char *options, const char *dev_name,
int err = -ENOMEM;
substring_t argstr[MAX_OPT_ARGS];
- if (current->nsproxy->net_ns != &init_net)
- return ERR_PTR(-EINVAL);
-
opt = kzalloc(sizeof(*opt), GFP_KERNEL);
if (!opt)
return ERR_PTR(-ENOMEM);
@@ -352,8 +355,8 @@ ceph_parse_options(char *options, const char *dev_name,
/* start with defaults */
opt->flags = CEPH_OPT_DEFAULT;
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
- opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
- opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
+ opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
+ opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
/* get mon ip(s) */
/* ip1[:port1][,ip2[:port2]...] */
@@ -439,13 +442,32 @@ ceph_parse_options(char *options, const char *dev_name,
pr_warn("ignoring deprecated osdtimeout option\n");
break;
case Opt_osdkeepalivetimeout:
- opt->osd_keepalive_timeout = intval;
+ /* 0 isn't well defined right now, reject it */
+ if (intval < 1 || intval > INT_MAX / 1000) {
+ pr_err("osdkeepalive out of range\n");
+ err = -EINVAL;
+ goto out;
+ }
+ opt->osd_keepalive_timeout =
+ msecs_to_jiffies(intval * 1000);
break;
case Opt_osd_idle_ttl:
- opt->osd_idle_ttl = intval;
+ /* 0 isn't well defined right now, reject it */
+ if (intval < 1 || intval > INT_MAX / 1000) {
+ pr_err("osd_idle_ttl out of range\n");
+ err = -EINVAL;
+ goto out;
+ }
+ opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000);
break;
case Opt_mount_timeout:
- opt->mount_timeout = intval;
+ /* 0 is "wait forever" (i.e. infinite timeout) */
+ if (intval < 0 || intval > INT_MAX / 1000) {
+ pr_err("mount_timeout out of range\n");
+ err = -EINVAL;
+ goto out;
+ }
+ opt->mount_timeout = msecs_to_jiffies(intval * 1000);
break;
case Opt_share:
@@ -512,12 +534,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
seq_puts(m, "notcp_nodelay,");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
- seq_printf(m, "mount_timeout=%d,", opt->mount_timeout);
+ seq_printf(m, "mount_timeout=%d,",
+ jiffies_to_msecs(opt->mount_timeout) / 1000);
if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
- seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl);
+ seq_printf(m, "osd_idle_ttl=%d,",
+ jiffies_to_msecs(opt->osd_idle_ttl) / 1000);
if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
seq_printf(m, "osdkeepalivetimeout=%d,",
- opt->osd_keepalive_timeout);
+ jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
/* drop redundant comma */
if (m->count != pos)
@@ -587,6 +611,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
fail_monc:
ceph_monc_stop(&client->monc);
fail:
+ ceph_messenger_fini(&client->msgr);
kfree(client);
return ERR_PTR(err);
}
@@ -600,8 +625,8 @@ void ceph_destroy_client(struct ceph_client *client)
/* unmount */
ceph_osdc_stop(&client->osdc);
-
ceph_monc_stop(&client->monc);
+ ceph_messenger_fini(&client->msgr);
ceph_debugfs_client_cleanup(client);
@@ -626,8 +651,8 @@ static int have_mon_and_osd_map(struct ceph_client *client)
*/
int __ceph_open_session(struct ceph_client *client, unsigned long started)
{
- int err;
- unsigned long timeout = client->options->mount_timeout * HZ;
+ unsigned long timeout = client->options->mount_timeout;
+ long err;
/* open session, and wait for mon and osd maps */
err = ceph_monc_open_session(&client->monc);
@@ -635,16 +660,15 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
return err;
while (!have_mon_and_osd_map(client)) {
- err = -EIO;
if (timeout && time_after_eq(jiffies, started + timeout))
- return err;
+ return -ETIMEDOUT;
/* wait */
dout("mount waiting for mon_map\n");
err = wait_event_interruptible_timeout(client->auth_wq,
have_mon_and_osd_map(client) || (client->auth_err < 0),
- timeout);
- if (err == -EINTR || err == -ERESTARTSYS)
+ ceph_timeout_jiffies(timeout));
+ if (err < 0)
return err;
if (client->auth_err < 0)
return client->auth_err;
@@ -721,5 +745,5 @@ module_exit(exit_ceph_lib);
MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
-MODULE_DESCRIPTION("Ceph filesystem for Linux");
+MODULE_DESCRIPTION("Ceph core library");
MODULE_LICENSE("GPL");
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 9d84ce4ea..80d7c3a97 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -1,15 +1,11 @@
-
#ifdef __KERNEL__
# include <linux/slab.h>
+# include <linux/crush/crush.h>
#else
-# include <stdlib.h>
-# include <assert.h>
-# define kfree(x) do { if (x) free(x); } while (0)
-# define BUG_ON(x) assert(!(x))
+# include "crush_compat.h"
+# include "crush.h"
#endif
-#include <linux/crush/crush.h>
-
const char *crush_bucket_alg_name(int alg)
{
switch (alg) {
@@ -134,6 +130,9 @@ void crush_destroy(struct crush_map *map)
kfree(map->rules);
}
+#ifndef __KERNEL__
+ kfree(map->choose_tries);
+#endif
kfree(map);
}
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h
index 6192c7fc9..aae534c90 100644
--- a/net/ceph/crush/crush_ln_table.h
+++ b/net/ceph/crush/crush_ln_table.h
@@ -10,20 +10,20 @@
*
*/
-#if defined(__linux__)
-#include <linux/types.h>
-#elif defined(__FreeBSD__)
-#include <sys/types.h>
-#endif
-
#ifndef CEPH_CRUSH_LN_H
#define CEPH_CRUSH_LN_H
+#ifdef __KERNEL__
+# include <linux/types.h>
+#else
+# include "crush_compat.h"
+#endif
-// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
-// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
-
-static int64_t __RH_LH_tbl[128*2+2] = {
+/*
+ * RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
+ * RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
+ */
+static __s64 __RH_LH_tbl[128*2+2] = {
0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll,
0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all,
0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll,
@@ -89,11 +89,12 @@ static int64_t __RH_LH_tbl[128*2+2] = {
0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll,
0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll,
0x0000800000000000ll, 0x0000ffff00000000ll,
- };
-
+};
- // LL_tbl[k] = 2^48*log2(1.0+k/2^15);
-static int64_t __LL_tbl[256] = {
+/*
+ * LL_tbl[k] = 2^48*log2(1.0+k/2^15)
+ */
+static __s64 __LL_tbl[256] = {
0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull,
0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull,
0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull,
@@ -160,7 +161,4 @@ static int64_t __LL_tbl[256] = {
0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull,
};
-
-
-
#endif
diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c
index 5bb63e37a..ed123af49 100644
--- a/net/ceph/crush/hash.c
+++ b/net/ceph/crush/hash.c
@@ -1,6 +1,8 @@
-
-#include <linux/types.h>
-#include <linux/crush/hash.h>
+#ifdef __KERNEL__
+# include <linux/crush/hash.h>
+#else
+# include "hash.h"
+#endif
/*
* Robert Jenkins' function for mixing 32-bit values
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 5b47736d2..393bfb22d 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -1,27 +1,31 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Intel Corporation All Rights Reserved
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
#ifdef __KERNEL__
# include <linux/string.h>
# include <linux/slab.h>
# include <linux/bug.h>
# include <linux/kernel.h>
-# ifndef dprintk
-# define dprintk(args...)
-# endif
+# include <linux/crush/crush.h>
+# include <linux/crush/hash.h>
#else
-# include <string.h>
-# include <stdio.h>
-# include <stdlib.h>
-# include <assert.h>
-# define BUG_ON(x) assert(!(x))
-# define dprintk(args...) /* printf(args) */
-# define kmalloc(x, f) malloc(x)
-# define kfree(x) free(x)
+# include "crush_compat.h"
+# include "crush.h"
+# include "hash.h"
#endif
-
-#include <linux/crush/crush.h>
-#include <linux/crush/hash.h>
#include "crush_ln_table.h"
+#define dprintk(args...) /* printf(args) */
+
/*
* Implement the core CRUSH mapping algorithm.
*/
@@ -139,7 +143,7 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
int i;
for (i = bucket->h.size-1; i >= 0; i--) {
- __u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i],
+ __u64 w = crush_hash32_4(bucket->h.hash, x, bucket->h.items[i],
r, bucket->h.id);
w &= 0xffff;
dprintk("list_choose i=%d x=%d r=%d item %d weight %x "
@@ -238,43 +242,46 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
return bucket->h.items[high];
}
-// compute 2^44*log2(input+1)
-uint64_t crush_ln(unsigned xin)
+/* compute 2^44*log2(input+1) */
+static __u64 crush_ln(unsigned int xin)
{
- unsigned x=xin, x1;
- int iexpon, index1, index2;
- uint64_t RH, LH, LL, xl64, result;
+ unsigned int x = xin, x1;
+ int iexpon, index1, index2;
+ __u64 RH, LH, LL, xl64, result;
- x++;
+ x++;
- // normalize input
- iexpon = 15;
- while(!(x&0x18000)) { x<<=1; iexpon--; }
+ /* normalize input */
+ iexpon = 15;
+ while (!(x & 0x18000)) {
+ x <<= 1;
+ iexpon--;
+ }
- index1 = (x>>8)<<1;
- // RH ~ 2^56/index1
- RH = __RH_LH_tbl[index1 - 256];
- // LH ~ 2^48 * log2(index1/256)
- LH = __RH_LH_tbl[index1 + 1 - 256];
+ index1 = (x >> 8) << 1;
+ /* RH ~ 2^56/index1 */
+ RH = __RH_LH_tbl[index1 - 256];
+ /* LH ~ 2^48 * log2(index1/256) */
+ LH = __RH_LH_tbl[index1 + 1 - 256];
- // RH*x ~ 2^48 * (2^15 + xf), xf<2^8
- xl64 = (int64_t)x * RH;
- xl64 >>= 48;
- x1 = xl64;
+ /* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */
+ xl64 = (__s64)x * RH;
+ xl64 >>= 48;
+ x1 = xl64;
- result = iexpon;
- result <<= (12 + 32);
+ result = iexpon;
+ result <<= (12 + 32);
- index2 = x1 & 0xff;
- // LL ~ 2^48*log2(1.0+index2/2^15)
- LL = __LL_tbl[index2];
+ index2 = x1 & 0xff;
+ /* LL ~ 2^48*log2(1.0+index2/2^15) */
+ LL = __LL_tbl[index2];
- LH = LH + LL;
+ LH = LH + LL;
- LH >>= (48-12 - 32);
- result += LH;
+ LH >>= (48 - 12 - 32);
+ result += LH;
- return result;
+ return result;
}
@@ -290,9 +297,9 @@ uint64_t crush_ln(unsigned xin)
static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
int x, int r)
{
- unsigned i, high = 0;
- unsigned u;
- unsigned w;
+ unsigned int i, high = 0;
+ unsigned int u;
+ unsigned int w;
__s64 ln, draw, high_draw = 0;
for (i = 0; i < bucket->h.size; i++) {
@@ -567,6 +574,10 @@ reject:
out[outpos] = item;
outpos++;
count--;
+#ifndef __KERNEL__
+ if (map->choose_tries && ftotal <= map->choose_total_tries)
+ map->choose_tries[ftotal]++;
+#endif
}
dprintk("CHOOSE returns %d\n", outpos);
@@ -610,6 +621,20 @@ static void crush_choose_indep(const struct crush_map *map,
}
for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) {
+#ifdef DEBUG_INDEP
+ if (out2 && ftotal) {
+ dprintk("%u %d a: ", ftotal, left);
+ for (rep = outpos; rep < endpos; rep++) {
+ dprintk(" %d", out[rep]);
+ }
+ dprintk("\n");
+ dprintk("%u %d b: ", ftotal, left);
+ for (rep = outpos; rep < endpos; rep++) {
+ dprintk(" %d", out2[rep]);
+ }
+ dprintk("\n");
+ }
+#endif
for (rep = outpos; rep < endpos; rep++) {
if (out[rep] != CRUSH_ITEM_UNDEF)
continue;
@@ -726,6 +751,24 @@ static void crush_choose_indep(const struct crush_map *map,
out2[rep] = CRUSH_ITEM_NONE;
}
}
+#ifndef __KERNEL__
+ if (map->choose_tries && ftotal <= map->choose_total_tries)
+ map->choose_tries[ftotal]++;
+#endif
+#ifdef DEBUG_INDEP
+ if (out2) {
+ dprintk("%u %d a: ", ftotal, left);
+ for (rep = outpos; rep < endpos; rep++) {
+ dprintk(" %d", out[rep]);
+ }
+ dprintk("\n");
+ dprintk("%u %d b: ", ftotal, left);
+ for (rep = outpos; rep < endpos; rep++) {
+ dprintk(" %d", out2[rep]);
+ }
+ dprintk("\n");
+ }
+#endif
}
/**
@@ -790,8 +833,15 @@ int crush_do_rule(const struct crush_map *map,
switch (curstep->op) {
case CRUSH_RULE_TAKE:
- w[0] = curstep->arg1;
- wsize = 1;
+ if ((curstep->arg1 >= 0 &&
+ curstep->arg1 < map->max_devices) ||
+ (-1-curstep->arg1 < map->max_buckets &&
+ map->buckets[-1-curstep->arg1])) {
+ w[0] = curstep->arg1;
+ wsize = 1;
+ } else {
+ dprintk(" bad take value %d\n", curstep->arg1);
+ }
break;
case CRUSH_RULE_SET_CHOOSE_TRIES:
@@ -877,7 +927,7 @@ int crush_do_rule(const struct crush_map *map,
0);
} else {
out_size = ((numrep < (result_max-osize)) ?
- numrep : (result_max-osize));
+ numrep : (result_max-osize));
crush_choose_indep(
map,
map->buckets[-1-w[i]],
@@ -923,5 +973,3 @@ int crush_do_rule(const struct crush_map *map,
}
return result_len;
}
-
-
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 967080a9f..e3be1d22a 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -6,6 +6,7 @@
#include <linux/inet.h>
#include <linux/kthread.h>
#include <linux/net.h>
+#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/string.h>
@@ -278,7 +279,6 @@ static void _ceph_msgr_exit(void)
ceph_msgr_slab_exit();
BUG_ON(zero_page == NULL);
- kunmap(zero_page);
page_cache_release(zero_page);
zero_page = NULL;
}
@@ -480,8 +480,8 @@ static int ceph_tcp_connect(struct ceph_connection *con)
int ret;
BUG_ON(con->sock);
- ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
- IPPROTO_TCP, &sock);
+ ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
+ SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret)
return ret;
sock->sk->sk_allocation = GFP_NOFS;
@@ -1545,7 +1545,7 @@ static int write_partial_message_data(struct ceph_connection *con)
page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
&last_piece);
ret = ceph_tcp_sendpage(con->sock, page, page_offset,
- length, last_piece);
+ length, !last_piece);
if (ret <= 0) {
if (do_datacrc)
msg->footer.data_crc = cpu_to_le32(crc);
@@ -1732,17 +1732,17 @@ static int verify_hello(struct ceph_connection *con)
static bool addr_is_blank(struct sockaddr_storage *ss)
{
+ struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr;
+ struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr;
+
switch (ss->ss_family) {
case AF_INET:
- return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0;
+ return addr->s_addr == htonl(INADDR_ANY);
case AF_INET6:
- return
- ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 &&
- ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 &&
- ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 &&
- ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0;
+ return ipv6_addr_any(addr6);
+ default:
+ return true;
}
- return false;
}
static int addr_port(struct sockaddr_storage *ss)
@@ -2945,11 +2945,18 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
msgr->tcp_nodelay = tcp_nodelay;
atomic_set(&msgr->stopping, 0);
+ write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
dout("%s %p\n", __func__, msgr);
}
EXPORT_SYMBOL(ceph_messenger_init);
+void ceph_messenger_fini(struct ceph_messenger *msgr)
+{
+ put_net(read_pnet(&msgr->net));
+}
+EXPORT_SYMBOL(ceph_messenger_fini);
+
static void clear_standby(struct ceph_connection *con)
{
/* come back from STANDBY? */
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2b3cf05e8..9d6ff1215 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -298,21 +298,28 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
}
EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
+/*
+ * Wait for an osdmap with a given epoch.
+ *
+ * @epoch: epoch to wait for
+ * @timeout: in jiffies, 0 means "wait forever"
+ */
int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
unsigned long timeout)
{
unsigned long started = jiffies;
- int ret;
+ long ret;
mutex_lock(&monc->mutex);
while (monc->have_osdmap < epoch) {
mutex_unlock(&monc->mutex);
- if (timeout != 0 && time_after_eq(jiffies, started + timeout))
+ if (timeout && time_after_eq(jiffies, started + timeout))
return -ETIMEDOUT;
ret = wait_event_interruptible_timeout(monc->client->auth_wq,
- monc->have_osdmap >= epoch, timeout);
+ monc->have_osdmap >= epoch,
+ ceph_timeout_jiffies(timeout));
if (ret < 0)
return ret;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index c4ec92392..50033677c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -296,6 +296,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
case CEPH_OSD_OP_CMPXATTR:
ceph_osd_data_release(&op->xattr.osd_data);
break;
+ case CEPH_OSD_OP_STAT:
+ ceph_osd_data_release(&op->raw_data_in);
+ break;
default:
break;
}
@@ -450,7 +453,7 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE)
*/
static struct ceph_osd_req_op *
_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
- u16 opcode)
+ u16 opcode, u32 flags)
{
struct ceph_osd_req_op *op;
@@ -460,14 +463,15 @@ _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
op = &osd_req->r_ops[which];
memset(op, 0, sizeof (*op));
op->op = opcode;
+ op->flags = flags;
return op;
}
void osd_req_op_init(struct ceph_osd_request *osd_req,
- unsigned int which, u16 opcode)
+ unsigned int which, u16 opcode, u32 flags)
{
- (void)_osd_req_op_init(osd_req, which, opcode);
+ (void)_osd_req_op_init(osd_req, which, opcode, flags);
}
EXPORT_SYMBOL(osd_req_op_init);
@@ -476,7 +480,8 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
u64 offset, u64 length,
u64 truncate_size, u32 truncate_seq)
{
- struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+ opcode, 0);
size_t payload_len = 0;
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
@@ -515,7 +520,8 @@ EXPORT_SYMBOL(osd_req_op_extent_update);
void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
u16 opcode, const char *class, const char *method)
{
- struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+ opcode, 0);
struct ceph_pagelist *pagelist;
size_t payload_len = 0;
size_t size;
@@ -552,7 +558,8 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
u16 opcode, const char *name, const void *value,
size_t size, u8 cmp_op, u8 cmp_mode)
{
- struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+ opcode, 0);
struct ceph_pagelist *pagelist;
size_t payload_len;
@@ -585,7 +592,8 @@ void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode,
u64 cookie, u64 version, int flag)
{
- struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+ opcode, 0);
BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
@@ -602,7 +610,8 @@ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
u64 expected_write_size)
{
struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
- CEPH_OSD_OP_SETALLOCHINT);
+ CEPH_OSD_OP_SETALLOCHINT,
+ 0);
op->alloc_hint.expected_object_size = expected_object_size;
op->alloc_hint.expected_write_size = expected_write_size;
@@ -786,7 +795,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
}
if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) {
- osd_req_op_init(req, which, opcode);
+ osd_req_op_init(req, which, opcode, 0);
} else {
u32 object_size = le32_to_cpu(layout->fl_object_size);
u32 object_base = off - objoff;
@@ -1088,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
BUG_ON(!list_empty(&osd->o_osd_lru));
list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
- osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
+ osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl;
}
static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
@@ -1199,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
{
schedule_delayed_work(&osdc->timeout_work,
- osdc->client->options->osd_keepalive_timeout * HZ);
+ osdc->client->options->osd_keepalive_timeout);
}
static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
@@ -1567,10 +1576,9 @@ static void handle_timeout(struct work_struct *work)
{
struct ceph_osd_client *osdc =
container_of(work, struct ceph_osd_client, timeout_work.work);
+ struct ceph_options *opts = osdc->client->options;
struct ceph_osd_request *req;
struct ceph_osd *osd;
- unsigned long keepalive =
- osdc->client->options->osd_keepalive_timeout * HZ;
struct list_head slow_osds;
dout("timeout\n");
down_read(&osdc->map_sem);
@@ -1586,7 +1594,8 @@ static void handle_timeout(struct work_struct *work)
*/
INIT_LIST_HEAD(&slow_osds);
list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
- if (time_before(jiffies, req->r_stamp + keepalive))
+ if (time_before(jiffies,
+ req->r_stamp + opts->osd_keepalive_timeout))
break;
osd = req->r_osd;
@@ -1613,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work)
struct ceph_osd_client *osdc =
container_of(work, struct ceph_osd_client,
osds_timeout_work.work);
- unsigned long delay =
- osdc->client->options->osd_idle_ttl * HZ >> 2;
+ unsigned long delay = osdc->client->options->osd_idle_ttl / 4;
dout("osds timeout\n");
down_read(&osdc->map_sem);
@@ -2619,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
osdc->event_count = 0;
schedule_delayed_work(&osdc->osds_timeout_work,
- round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
+ round_jiffies_relative(osdc->client->options->osd_idle_ttl));
err = -ENOMEM;
osdc->req_mempool = mempool_create_kmalloc_pool(10,
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 096d91447..d4f5f220a 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -51,10 +51,7 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
set_page_dirty_lock(pages[i]);
put_page(pages[i]);
}
- if (is_vmalloc_addr(pages))
- vfree(pages);
- else
- kfree(pages);
+ kvfree(pages);
}
EXPORT_SYMBOL(ceph_put_page_vector);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b80fb91bb..617088aee 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -131,6 +131,35 @@ out_noerr:
goto out;
}
+static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
+{
+ struct sk_buff *nskb;
+
+ if (skb->peeked)
+ return skb;
+
+ /* We have to unshare an skb before modifying it. */
+ if (!skb_shared(skb))
+ goto done;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return ERR_PTR(-ENOMEM);
+
+ skb->prev->next = nskb;
+ skb->next->prev = nskb;
+ nskb->prev = skb->prev;
+ nskb->next = skb->next;
+
+ consume_skb(skb);
+ skb = nskb;
+
+done:
+ skb->peeked = 1;
+
+ return skb;
+}
+
/**
* __skb_recv_datagram - Receive a datagram skbuff
* @sk: socket
@@ -165,7 +194,9 @@ out_noerr:
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
int *peeked, int *off, int *err)
{
+ struct sk_buff_head *queue = &sk->sk_receive_queue;
struct sk_buff *skb, *last;
+ unsigned long cpu_flags;
long timeo;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
@@ -184,8 +215,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
* Look at current nfs client by the way...
* However, this function was correct in any case. 8)
*/
- unsigned long cpu_flags;
- struct sk_buff_head *queue = &sk->sk_receive_queue;
int _off = *off;
last = (struct sk_buff *)queue;
@@ -199,7 +228,12 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
_off -= skb->len;
continue;
}
- skb->peeked = 1;
+
+ skb = skb_set_peeked(skb);
+ error = PTR_ERR(skb);
+ if (IS_ERR(skb))
+ goto unlock_err;
+
atomic_inc(&skb->users);
} else
__skb_unlink(skb, queue);
@@ -223,6 +257,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
return NULL;
+unlock_err:
+ spin_unlock_irqrestore(&queue->lock, cpu_flags);
no_packet:
*err = error;
return NULL;
@@ -622,7 +658,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
!skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
}
- skb->csum_valid = !sum;
+ if (!skb_shared(skb))
+ skb->csum_valid = !sum;
return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete_head);
@@ -642,11 +679,13 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
netdev_rx_csum_fault(skb->dev);
}
- /* Save full packet checksum */
- skb->csum = csum;
- skb->ip_summed = CHECKSUM_COMPLETE;
- skb->csum_complete_sw = 1;
- skb->csum_valid = !sum;
+ if (!skb_shared(skb)) {
+ /* Save full packet checksum */
+ skb->csum = csum;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum_complete_sw = 1;
+ skb->csum_valid = !sum;
+ }
return sum;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index aa82f9ab6..a8e4dd430 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,7 @@
#include <linux/if_macvlan.h>
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
+#include <linux/netfilter_ingress.h>
#include "net-sysfs.h"
@@ -468,10 +469,14 @@ EXPORT_SYMBOL(dev_remove_pack);
*/
void dev_add_offload(struct packet_offload *po)
{
- struct list_head *head = &offload_base;
+ struct packet_offload *elem;
spin_lock(&offload_lock);
- list_add_rcu(&po->list, head);
+ list_for_each_entry(elem, &offload_base, list) {
+ if (po->priority < elem->priority)
+ break;
+ }
+ list_add_rcu(&po->list, elem->list.prev);
spin_unlock(&offload_lock);
}
EXPORT_SYMBOL(dev_add_offload);
@@ -672,10 +677,6 @@ int dev_get_iflink(const struct net_device *dev)
if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
return dev->netdev_ops->ndo_get_iflink(dev);
- /* If dev->rtnl_link_ops is set, it's a virtual interface. */
- if (dev->rtnl_link_ops)
- return 0;
-
return dev->ifindex;
}
EXPORT_SYMBOL(dev_get_iflink);
@@ -1630,7 +1631,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_INGRESS
static struct static_key ingress_needed __read_mostly;
void net_inc_ingress_queue(void)
@@ -2343,6 +2344,34 @@ void netif_device_attach(struct net_device *dev)
}
EXPORT_SYMBOL(netif_device_attach);
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
+ unsigned int num_tx_queues)
+{
+ u32 hash;
+ u16 qoffset = 0;
+ u16 qcount = num_tx_queues;
+
+ if (skb_rx_queue_recorded(skb)) {
+ hash = skb_get_rx_queue(skb);
+ while (unlikely(hash >= num_tx_queues))
+ hash -= num_tx_queues;
+ return hash;
+ }
+
+ if (dev->num_tc) {
+ u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+ qoffset = dev->tc_to_txq[tc].offset;
+ qcount = dev->tc_to_txq[tc].count;
+ }
+
+ return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
+}
+EXPORT_SYMBOL(__skb_tx_hash);
+
static void skb_warn_bad_offload(const struct sk_buff *skb)
{
static const netdev_features_t null_features = 0;
@@ -2901,6 +2930,84 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(dev_loopback_xmit);
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+ struct xps_dev_maps *dev_maps;
+ struct xps_map *map;
+ int queue_index = -1;
+
+ rcu_read_lock();
+ dev_maps = rcu_dereference(dev->xps_maps);
+ if (dev_maps) {
+ map = rcu_dereference(
+ dev_maps->cpu_map[skb->sender_cpu - 1]);
+ if (map) {
+ if (map->len == 1)
+ queue_index = map->queues[0];
+ else
+ queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
+ map->len)];
+ if (unlikely(queue_index >= dev->real_num_tx_queues))
+ queue_index = -1;
+ }
+ }
+ rcu_read_unlock();
+
+ return queue_index;
+#else
+ return -1;
+#endif
+}
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ int queue_index = sk_tx_queue_get(sk);
+
+ if (queue_index < 0 || skb->ooo_okay ||
+ queue_index >= dev->real_num_tx_queues) {
+ int new_index = get_xps_queue(dev, skb);
+ if (new_index < 0)
+ new_index = skb_tx_hash(dev, skb);
+
+ if (queue_index != new_index && sk &&
+ rcu_access_pointer(sk->sk_dst_cache))
+ sk_tx_queue_set(sk, new_index);
+
+ queue_index = new_index;
+ }
+
+ return queue_index;
+}
+
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+ struct sk_buff *skb,
+ void *accel_priv)
+{
+ int queue_index = 0;
+
+#ifdef CONFIG_XPS
+ if (skb->sender_cpu == 0)
+ skb->sender_cpu = raw_smp_processor_id() + 1;
+#endif
+
+ if (dev->real_num_tx_queues != 1) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+ if (ops->ndo_select_queue)
+ queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+ __netdev_pick_tx);
+ else
+ queue_index = __netdev_pick_tx(dev, skb);
+
+ if (!accel_priv)
+ queue_index = netdev_cap_txqueue(dev, queue_index);
+ }
+
+ skb_set_queue_mapping(skb, queue_index);
+ return netdev_get_tx_queue(dev, queue_index);
+}
+
/**
* __dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
@@ -3341,6 +3448,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
local_irq_save(flags);
rps_lock(sd);
+ if (!netif_running(skb->dev))
+ goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
if (qlen) {
@@ -3362,6 +3471,7 @@ enqueue:
goto enqueue;
}
+drop:
sd->dropped++;
rps_unlock(sd);
@@ -3513,66 +3623,47 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#endif
-#ifdef CONFIG_NET_CLS_ACT
-/* TODO: Maybe we should just force sch_ingress to be compiled in
- * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
- * a compare and 2 stores extra right now if we dont have it on
- * but have CONFIG_NET_CLS_ACT
- * NOTE: This doesn't stop any functionality; if you dont have
- * the ingress scheduler, you just can't add policies on ingress.
- *
- */
-static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
-{
- struct net_device *dev = skb->dev;
- u32 ttl = G_TC_RTTL(skb->tc_verd);
- int result = TC_ACT_OK;
- struct Qdisc *q;
-
- if (unlikely(MAX_RED_LOOP < ttl++)) {
- net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
- skb->skb_iif, dev->ifindex);
- return TC_ACT_SHOT;
- }
-
- skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
- skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
-
- q = rcu_dereference(rxq->qdisc);
- if (q != &noop_qdisc) {
- spin_lock(qdisc_lock(q));
- if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
- result = qdisc_enqueue_root(skb, q);
- spin_unlock(qdisc_lock(q));
- }
-
- return result;
-}
-
static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
- struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+#ifdef CONFIG_NET_CLS_ACT
+ struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+ struct tcf_result cl_res;
- if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
+ /* If there's at least one ingress present somewhere (so
+ * we get here via enabled static key), remaining devices
+ * that are not configured with an ingress qdisc will bail
+ * out here.
+ */
+ if (!cl)
return skb;
-
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
- switch (ing_filter(skb, rxq)) {
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+ qdisc_bstats_update_cpu(cl->q, skb);
+
+ switch (tc_classify(skb, cl, &cl_res)) {
+ case TC_ACT_OK:
+ case TC_ACT_RECLASSIFY:
+ skb->tc_index = TC_H_MIN(cl_res.classid);
+ break;
case TC_ACT_SHOT:
+ qdisc_qstats_drop_cpu(cl->q);
case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
kfree_skb(skb);
return NULL;
+ default:
+ break;
}
-
+#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
-#endif
/**
* netdev_rx_handler_register - register receive handler
@@ -3645,6 +3736,22 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
}
}
+static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
+ int *ret, struct net_device *orig_dev)
+{
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (nf_hook_ingress_active(skb)) {
+ if (*pt_prev) {
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
+ *pt_prev = NULL;
+ }
+
+ return nf_hook_ingress(skb);
+ }
+#endif /* CONFIG_NETFILTER_INGRESS */
+ return 0;
+}
+
static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
{
struct packet_type *ptype, *pt_prev;
@@ -3667,8 +3774,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
pt_prev = NULL;
- rcu_read_lock();
-
another_round:
skb->skb_iif = skb->dev->ifindex;
@@ -3678,7 +3783,7 @@ another_round:
skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
skb = skb_vlan_untag(skb);
if (unlikely(!skb))
- goto unlock;
+ goto out;
}
#ifdef CONFIG_NET_CLS_ACT
@@ -3704,13 +3809,17 @@ another_round:
}
skip_taps:
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_INGRESS
if (static_key_false(&ingress_needed)) {
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
- goto unlock;
- }
+ goto out;
+ if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
+ goto out;
+ }
+#endif
+#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = 0;
ncls:
#endif
@@ -3725,7 +3834,7 @@ ncls:
if (vlan_do_receive(&skb))
goto another_round;
else if (unlikely(!skb))
- goto unlock;
+ goto out;
}
rx_handler = rcu_dereference(skb->dev->rx_handler);
@@ -3737,7 +3846,7 @@ ncls:
switch (rx_handler(&skb)) {
case RX_HANDLER_CONSUMED:
ret = NET_RX_SUCCESS;
- goto unlock;
+ goto out;
case RX_HANDLER_ANOTHER:
goto another_round;
case RX_HANDLER_EXACT:
@@ -3791,8 +3900,7 @@ drop:
ret = NET_RX_DROP;
}
-unlock:
- rcu_read_unlock();
+out:
return ret;
}
@@ -3823,29 +3931,30 @@ static int __netif_receive_skb(struct sk_buff *skb)
static int netif_receive_skb_internal(struct sk_buff *skb)
{
+ int ret;
+
net_timestamp_check(netdev_tstamp_prequeue, skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
+ rcu_read_lock();
+
#ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
- int cpu, ret;
-
- rcu_read_lock();
-
- cpu = get_rps_cpu(skb->dev, skb, &rflow);
+ int cpu = get_rps_cpu(skb->dev, skb, &rflow);
if (cpu >= 0) {
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
return ret;
}
- rcu_read_unlock();
}
#endif
- return __netif_receive_skb(skb);
+ ret = __netif_receive_skb(skb);
+ rcu_read_unlock();
+ return ret;
}
/**
@@ -4390,8 +4499,10 @@ static int process_backlog(struct napi_struct *napi, int quota)
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sd->process_queue))) {
+ rcu_read_lock();
local_irq_enable();
__netif_receive_skb(skb);
+ rcu_read_unlock();
local_irq_disable();
input_queue_head_incr(sd);
if (++work >= quota) {
@@ -6027,6 +6138,7 @@ static void rollback_registered_many(struct list_head *head)
unlist_netdevice(dev);
dev->reg_state = NETREG_UNREGISTERING;
+ on_each_cpu(flush_backlog, dev, 1);
}
synchronize_net();
@@ -6297,7 +6409,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
struct netdev_queue *tx;
size_t sz = count * sizeof(*tx);
- BUG_ON(count < 1 || count > 0xffff);
+ if (count < 1 || count > 0xffff)
+ return -EINVAL;
tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
if (!tx) {
@@ -6313,6 +6426,17 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
return 0;
}
+void netif_tx_stop_all_queues(struct net_device *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+ netif_tx_stop_queue(txq);
+ }
+}
+EXPORT_SYMBOL(netif_tx_stop_all_queues);
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -6650,8 +6774,6 @@ void netdev_run_todo(void)
dev->reg_state = NETREG_UNREGISTERED;
- on_each_cpu(flush_backlog, dev, 1);
-
netdev_wait_allrefs(dev);
/* paranoia */
@@ -6862,6 +6984,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;
+
+ nf_hook_ingress_init(dev);
+
return dev;
free_all:
diff --git a/net/core/dst.c b/net/core/dst.c
index e956ce6d1..002144bea 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -284,7 +284,9 @@ void dst_release(struct dst_entry *dst)
int newrefcnt;
newrefcnt = atomic_dec_return(&dst->__refcnt);
- WARN_ON(newrefcnt < 0);
+ if (unlikely(newrefcnt < 0))
+ net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
+ __func__, dst, newrefcnt);
if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 1d00b8922..b495ab179 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -98,7 +98,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_RXALL_BIT] = "rx-all",
[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
[NETIF_F_BUSY_POLL_BIT] = "busy-poll",
- [NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload",
};
static const char
@@ -107,6 +106,13 @@ rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
[ETH_RSS_HASH_XOR_BIT] = "xor",
};
+static const char
+tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+ [ETHTOOL_ID_UNSPEC] = "Unspec",
+ [ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
+ [ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
+};
+
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gfeatures cmd = {
@@ -195,6 +201,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
if (sset == ETH_SS_RSS_HASH_FUNCS)
return ARRAY_SIZE(rss_hash_func_strings);
+ if (sset == ETH_SS_TUNABLES)
+ return ARRAY_SIZE(tunable_strings);
+
if (ops->get_sset_count && ops->get_strings)
return ops->get_sset_count(dev, sset);
else
@@ -212,6 +221,8 @@ static void __ethtool_get_strings(struct net_device *dev,
else if (stringset == ETH_SS_RSS_HASH_FUNCS)
memcpy(data, rss_hash_func_strings,
sizeof(rss_hash_func_strings));
+ else if (stringset == ETH_SS_TUNABLES)
+ memcpy(data, tunable_strings, sizeof(tunable_strings));
else
/* ops->get_strings is valid because checked earlier */
ops->get_strings(dev, stringset, data);
diff --git a/net/core/filter.c b/net/core/filter.c
index bf831a85c..be3098fb6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,6 +36,7 @@
#include <net/netlink.h>
#include <linux/skbuff.h>
#include <net/sock.h>
+#include <net/flow_dissector.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/uaccess.h>
@@ -45,6 +46,7 @@
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
+#include <net/sch_generic.h>
/**
* sk_filter - run a packet through a socket filter
@@ -355,8 +357,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* for socket filters: ctx == 'struct sk_buff *', for seccomp:
* ctx == 'struct seccomp_data *'.
*/
-int bpf_convert_filter(struct sock_filter *prog, int len,
- struct bpf_insn *new_prog, int *new_len)
+static int bpf_convert_filter(struct sock_filter *prog, int len,
+ struct bpf_insn *new_prog, int *new_len)
{
int new_flen = 0, pass = 0, target, i;
struct bpf_insn *new_insn;
@@ -371,7 +373,8 @@ int bpf_convert_filter(struct sock_filter *prog, int len,
return -EINVAL;
if (new_prog) {
- addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL);
+ addrs = kcalloc(len, sizeof(*addrs),
+ GFP_KERNEL | __GFP_NOWARN);
if (!addrs)
return -ENOMEM;
}
@@ -751,7 +754,8 @@ static bool chk_code_allowed(u16 code_to_probe)
*
* Returns 0 if the rule set is legal or -EINVAL if not.
*/
-int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
+static int bpf_check_classic(const struct sock_filter *filter,
+ unsigned int flen)
{
bool anc_found;
int pc;
@@ -825,7 +829,6 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
return -EINVAL;
}
-EXPORT_SYMBOL(bpf_check_classic);
static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
const struct sock_fprog *fprog)
@@ -839,7 +842,9 @@ static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
fkprog = fp->orig_prog;
fkprog->len = fprog->len;
- fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL);
+
+ fkprog->filter = kmemdup(fp->insns, fsize,
+ GFP_KERNEL | __GFP_NOWARN);
if (!fkprog->filter) {
kfree(fp->orig_prog);
return -ENOMEM;
@@ -941,7 +946,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
* pass. At this time, the user BPF is stored in fp->insns.
*/
old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (!old_prog) {
err = -ENOMEM;
goto out_err;
@@ -988,7 +993,8 @@ out_err:
return ERR_PTR(err);
}
-static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
+static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
+ bpf_aux_classic_check_t trans)
{
int err;
@@ -1001,6 +1007,17 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
return ERR_PTR(err);
}
+ /* There might be additional checks and transformations
+ * needed on classic filters, f.e. in case of seccomp.
+ */
+ if (trans) {
+ err = trans(fp->insns, fp->len);
+ if (err) {
+ __bpf_prog_release(fp);
+ return ERR_PTR(err);
+ }
+ }
+
/* Probe if we can JIT compile the filter and if so, do
* the compilation of the filter.
*/
@@ -1050,7 +1067,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
- fp = bpf_prepare_filter(fp);
+ fp = bpf_prepare_filter(fp, NULL);
if (IS_ERR(fp))
return PTR_ERR(fp);
@@ -1059,6 +1076,53 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
}
EXPORT_SYMBOL_GPL(bpf_prog_create);
+/**
+ * bpf_prog_create_from_user - create an unattached filter from user buffer
+ * @pfp: the unattached filter that is created
+ * @fprog: the filter program
+ * @trans: post-classic verifier transformation handler
+ *
+ * This function effectively does the same as bpf_prog_create(), only
+ * that it builds up its insns buffer from user space provided buffer.
+ * It also allows for passing a bpf_aux_classic_check_t handler.
+ */
+int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
+ bpf_aux_classic_check_t trans)
+{
+ unsigned int fsize = bpf_classic_proglen(fprog);
+ struct bpf_prog *fp;
+
+ /* Make sure new filter is there and in the right amounts. */
+ if (fprog->filter == NULL)
+ return -EINVAL;
+
+ fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
+ if (!fp)
+ return -ENOMEM;
+
+ if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+ __bpf_prog_free(fp);
+ return -EFAULT;
+ }
+
+ fp->len = fprog->len;
+ /* Since unattached filters are not copied back to user
+ * space through sk_get_filter(), we do not need to hold
+ * a copy here, and can spare us the work.
+ */
+ fp->orig_prog = NULL;
+
+ /* bpf_prepare_filter() already takes care of freeing
+ * memory in case something goes wrong.
+ */
+ fp = bpf_prepare_filter(fp, trans);
+ if (IS_ERR(fp))
+ return PTR_ERR(fp);
+
+ *pfp = fp;
+ return 0;
+}
+
void bpf_prog_destroy(struct bpf_prog *fp)
{
__bpf_prog_release(fp);
@@ -1135,7 +1199,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
- prog = bpf_prepare_filter(prog);
+ prog = bpf_prepare_filter(prog, NULL);
if (IS_ERR(prog))
return PTR_ERR(prog);
@@ -1175,21 +1239,6 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
return 0;
}
-/**
- * bpf_skb_clone_not_writable - is the header of a clone not writable
- * @skb: buffer to check
- * @len: length up to which to write, can be negative
- *
- * Returns true if modifying the header part of the cloned buffer
- * does require the data to be copied. I.e. this version works with
- * negative lengths needed for eBPF case!
- */
-static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len)
-{
- return skb_header_cloned(skb) ||
- (int) skb_headroom(skb) + len > skb->hdr_len;
-}
-
#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
@@ -1212,9 +1261,8 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
return -EFAULT;
- offset -= skb->data - skb_mac_header(skb);
if (unlikely(skb_cloned(skb) &&
- bpf_skb_clone_unwritable(skb, offset + len)))
+ !skb_clone_writable(skb, offset + len)))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1258,9 +1306,8 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- offset -= skb->data - skb_mac_header(skb);
if (unlikely(skb_cloned(skb) &&
- bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
+ !skb_clone_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1306,9 +1353,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- offset -= skb->data - skb_mac_header(skb);
if (unlikely(skb_cloned(skb) &&
- bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
+ !skb_clone_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1344,6 +1390,40 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
.arg5_type = ARG_ANYTHING,
};
+#define BPF_IS_REDIRECT_INGRESS(flags) ((flags) & 1)
+
+static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
+ if (unlikely(!dev))
+ return -EINVAL;
+
+ if (unlikely(!(dev->flags & IFF_UP)))
+ return -EINVAL;
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!skb2))
+ return -ENOMEM;
+
+ if (BPF_IS_REDIRECT_INGRESS(flags))
+ return dev_forward_skb(dev, skb2);
+
+ skb2->dev = dev;
+ return dev_queue_xmit(skb2);
+}
+
+const struct bpf_func_proto bpf_clone_redirect_proto = {
+ .func = bpf_clone_redirect,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
{
@@ -1358,6 +1438,12 @@ sk_filter_func_proto(enum bpf_func_id func_id)
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_tail_call:
+ return &bpf_tail_call_proto;
+ case BPF_FUNC_ktime_get_ns:
+ return &bpf_ktime_get_ns_proto;
+ case BPF_FUNC_trace_printk:
+ return bpf_get_trace_printk_proto();
default:
return NULL;
}
@@ -1373,18 +1459,15 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_l3_csum_replace_proto;
case BPF_FUNC_l4_csum_replace:
return &bpf_l4_csum_replace_proto;
+ case BPF_FUNC_clone_redirect:
+ return &bpf_clone_redirect_proto;
default:
return sk_filter_func_proto(func_id);
}
}
-static bool sk_filter_is_valid_access(int off, int size,
- enum bpf_access_type type)
+static bool __is_valid_access(int off, int size, enum bpf_access_type type)
{
- /* only read is allowed */
- if (type != BPF_READ)
- return false;
-
/* check bounds */
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
@@ -1400,8 +1483,42 @@ static bool sk_filter_is_valid_access(int off, int size,
return true;
}
-static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
- struct bpf_insn *insn_buf)
+static bool sk_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type)
+{
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct __sk_buff, cb[0]) ...
+ offsetof(struct __sk_buff, cb[4]):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ return __is_valid_access(off, size, type);
+}
+
+static bool tc_cls_act_is_valid_access(int off, int size,
+ enum bpf_access_type type)
+{
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct __sk_buff, mark):
+ case offsetof(struct __sk_buff, tc_index):
+ case offsetof(struct __sk_buff, cb[0]) ...
+ offsetof(struct __sk_buff, cb[4]):
+ break;
+ default:
+ return false;
+ }
+ }
+ return __is_valid_access(off, size, type);
+}
+
+static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf)
{
struct bpf_insn *insn = insn_buf;
@@ -1434,8 +1551,34 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
offsetof(struct sk_buff, priority));
break;
+ case offsetof(struct __sk_buff, ingress_ifindex):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, skb_iif));
+ break;
+
+ case offsetof(struct __sk_buff, ifindex):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+ dst_reg, src_reg,
+ offsetof(struct sk_buff, dev));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+ offsetof(struct net_device, ifindex));
+ break;
+
case offsetof(struct __sk_buff, mark):
- return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, mark));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, mark));
+ break;
case offsetof(struct __sk_buff, pkt_type):
return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
@@ -1450,6 +1593,38 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
case offsetof(struct __sk_buff, vlan_tci):
return convert_skb_access(SKF_AD_VLAN_TAG,
dst_reg, src_reg, insn);
+
+ case offsetof(struct __sk_buff, cb[0]) ...
+ offsetof(struct __sk_buff, cb[4]):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+
+ ctx_off -= offsetof(struct __sk_buff, cb[0]);
+ ctx_off += offsetof(struct sk_buff, cb);
+ ctx_off += offsetof(struct qdisc_skb_cb, data);
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+ break;
+
+ case offsetof(struct __sk_buff, tc_index):
+#ifdef CONFIG_NET_SCHED
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sk_buff, tc_index));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sk_buff, tc_index));
+ break;
+#else
+ if (type == BPF_WRITE)
+ *insn++ = BPF_MOV64_REG(dst_reg, dst_reg);
+ else
+ *insn++ = BPF_MOV64_IMM(dst_reg, 0);
+ break;
+#endif
}
return insn - insn_buf;
@@ -1458,13 +1633,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
static const struct bpf_verifier_ops sk_filter_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_net_convert_ctx_access,
};
static const struct bpf_verifier_ops tc_cls_act_ops = {
.get_func_proto = tc_cls_act_func_proto,
- .is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .is_valid_access = tc_cls_act_is_valid_access,
+ .convert_ctx_access = bpf_net_convert_ctx_access,
};
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2c35c02a9..2a834c617 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1,3 +1,4 @@
+#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/export.h>
#include <linux/ip.h>
@@ -12,19 +13,60 @@
#include <linux/if_tunnel.h>
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
-#include <net/flow_keys.h>
+#include <linux/stddef.h>
+#include <linux/if_ether.h>
+#include <linux/mpls.h>
+#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h>
-/* copy saddr & daddr, possibly using 64bit load/store
- * Equivalent to : flow->src = iph->saddr;
- * flow->dst = iph->daddr;
- */
-static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph)
+static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id)
+{
+ return flow_dissector->used_keys & (1 << key_id);
+}
+
+static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id)
+{
+ flow_dissector->used_keys |= (1 << key_id);
+}
+
+static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id,
+ void *target_container)
+{
+ return ((char *) target_container) + flow_dissector->offset[key_id];
+}
+
+void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
+ const struct flow_dissector_key *key,
+ unsigned int key_count)
{
- BUILD_BUG_ON(offsetof(typeof(*flow), dst) !=
- offsetof(typeof(*flow), src) + sizeof(flow->src));
- memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
+ unsigned int i;
+
+ memset(flow_dissector, 0, sizeof(*flow_dissector));
+
+ for (i = 0; i < key_count; i++, key++) {
+ /* User should make sure that every key target offset is withing
+ * boundaries of unsigned short.
+ */
+ BUG_ON(key->offset > USHRT_MAX);
+ BUG_ON(skb_flow_dissector_uses_key(flow_dissector,
+ key->key_id));
+
+ skb_flow_dissector_set_key(flow_dissector, key->key_id);
+ flow_dissector->offset[key->key_id] = key->offset;
+ }
+
+ /* Ensure that the dissector always includes control and basic key.
+ * That way we are able to avoid handling lack of these in fast path.
+ */
+ BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_CONTROL));
+ BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_BASIC));
}
+EXPORT_SYMBOL(skb_flow_dissector_init);
/**
* __skb_flow_get_ports - extract the upper layer ports and return them
@@ -63,18 +105,31 @@ EXPORT_SYMBOL(__skb_flow_get_ports);
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
+ * @flow_dissector: list of keys to dissect
+ * @target_container: target structure to put dissected values into
* @data: raw buffer pointer to the packet, if NULL use skb->data
* @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
* @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
* @hlen: packet header length, if @data is NULL use skb_headlen(skb)
*
- * The function will try to retrieve the struct flow_keys from either the skbuff
- * or a raw buffer specified by the rest parameters
+ * The function will try to retrieve individual keys into target specified
+ * by flow_dissector from either the skbuff or a raw buffer specified by the
+ * rest parameters.
+ *
+ * Caller must take care of zeroing target container memory.
*/
-bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
+bool __skb_flow_dissect(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container,
void *data, __be16 proto, int nhoff, int hlen)
{
- u8 ip_proto;
+ struct flow_dissector_key_control *key_control;
+ struct flow_dissector_key_basic *key_basic;
+ struct flow_dissector_key_addrs *key_addrs;
+ struct flow_dissector_key_ports *key_ports;
+ struct flow_dissector_key_tags *key_tags;
+ struct flow_dissector_key_keyid *key_keyid;
+ u8 ip_proto = 0;
if (!data) {
data = skb->data;
@@ -83,7 +138,30 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
hlen = skb_headlen(skb);
}
- memset(flow, 0, sizeof(*flow));
+ /* It is ensured by skb_flow_dissector_init() that control key will
+ * be always present.
+ */
+ key_control = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ target_container);
+
+ /* It is ensured by skb_flow_dissector_init() that basic key will
+ * be always present.
+ */
+ key_basic = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ target_container);
+
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct ethhdr *eth = eth_hdr(skb);
+ struct flow_dissector_key_eth_addrs *key_eth_addrs;
+
+ key_eth_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ target_container);
+ memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
+ }
again:
switch (proto) {
@@ -100,14 +178,15 @@ ip:
if (ip_is_fragment(iph))
ip_proto = 0;
- /* skip the address processing if skb is NULL. The assumption
- * here is that if there is no skb we are not looking for flow
- * info but lengths and protocols.
- */
- if (!skb)
+ if (!skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS))
break;
- iph_to_flow_copy_addrs(flow, iph);
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container);
+ memcpy(&key_addrs->v4addrs, &iph->saddr,
+ sizeof(key_addrs->v4addrs));
+ key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
break;
}
case htons(ETH_P_IPV6): {
@@ -123,25 +202,27 @@ ipv6:
ip_proto = iph->nexthdr;
nhoff += sizeof(struct ipv6hdr);
- /* see comment above in IPv4 section */
- if (!skb)
- break;
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+ struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs;
+
+ key_ipv6_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ target_container);
- flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
- flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
+ memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs));
+ key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ }
flow_label = ip6_flowlabel(iph);
if (flow_label) {
- /* Awesome, IPv6 packet has a flow label so we can
- * use that to represent the ports without any
- * further dissection.
- */
- flow->n_proto = proto;
- flow->ip_proto = ip_proto;
- flow->ports = flow_label;
- flow->thoff = (u16)nhoff;
-
- return true;
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
+ key_tags = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_FLOW_LABEL,
+ target_container);
+ key_tags->flow_label = ntohl(flow_label);
+ }
}
break;
@@ -155,6 +236,15 @@ ipv6:
if (!vlan)
return false;
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_VLANID)) {
+ key_tags = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_VLANID,
+ target_container);
+
+ key_tags->vlan_id = skb_vlan_tag_get_id(skb);
+ }
+
proto = vlan->h_vlan_encapsulated_proto;
nhoff += sizeof(*vlan);
goto again;
@@ -186,19 +276,58 @@ ipv6:
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
return false;
- flow->src = hdr->srcnode;
- flow->dst = 0;
- flow->n_proto = proto;
- flow->thoff = (u16)nhoff;
+ key_basic->n_proto = proto;
+ key_control->thoff = (u16)nhoff;
+
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_TIPC_ADDRS,
+ target_container);
+ key_addrs->tipcaddrs.srcnode = hdr->srcnode;
+ key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
+ }
+ return true;
+ }
+
+ case htons(ETH_P_MPLS_UC):
+ case htons(ETH_P_MPLS_MC): {
+ struct mpls_label *hdr, _hdr[2];
+mpls:
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
+ hlen, &_hdr);
+ if (!hdr)
+ return false;
+
+ if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
+ MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
+ target_container);
+ key_keyid->keyid = hdr[1].entry &
+ htonl(MPLS_LS_LABEL_MASK);
+ }
+
+ key_basic->n_proto = proto;
+ key_basic->ip_proto = ip_proto;
+ key_control->thoff = (u16)nhoff;
+
+ return true;
+ }
+
return true;
}
+
case htons(ETH_P_FCOE):
- flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
+ key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
/* fall through */
default:
return false;
}
+ip_proto_again:
switch (ip_proto) {
case IPPROTO_GRE: {
struct gre_hdr {
@@ -213,30 +342,65 @@ ipv6:
* Only look inside GRE if version zero and no
* routing
*/
- if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) {
- proto = hdr->proto;
+ if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
+ break;
+
+ proto = hdr->proto;
+ nhoff += 4;
+ if (hdr->flags & GRE_CSUM)
nhoff += 4;
- if (hdr->flags & GRE_CSUM)
- nhoff += 4;
- if (hdr->flags & GRE_KEY)
- nhoff += 4;
- if (hdr->flags & GRE_SEQ)
- nhoff += 4;
- if (proto == htons(ETH_P_TEB)) {
- const struct ethhdr *eth;
- struct ethhdr _eth;
-
- eth = __skb_header_pointer(skb, nhoff,
- sizeof(_eth),
- data, hlen, &_eth);
- if (!eth)
- return false;
- proto = eth->h_proto;
- nhoff += sizeof(*eth);
+ if (hdr->flags & GRE_KEY) {
+ const __be32 *keyid;
+ __be32 _keyid;
+
+ keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
+ data, hlen, &_keyid);
+
+ if (!keyid)
+ return false;
+
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID)) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID,
+ target_container);
+ key_keyid->keyid = *keyid;
}
- goto again;
+ nhoff += 4;
}
- break;
+ if (hdr->flags & GRE_SEQ)
+ nhoff += 4;
+ if (proto == htons(ETH_P_TEB)) {
+ const struct ethhdr *eth;
+ struct ethhdr _eth;
+
+ eth = __skb_header_pointer(skb, nhoff,
+ sizeof(_eth),
+ data, hlen, &_eth);
+ if (!eth)
+ return false;
+ proto = eth->h_proto;
+ nhoff += sizeof(*eth);
+ }
+ goto again;
+ }
+ case NEXTHDR_HOP:
+ case NEXTHDR_ROUTING:
+ case NEXTHDR_DEST: {
+ u8 _opthdr[2], *opthdr;
+
+ if (proto != htons(ETH_P_IPV6))
+ break;
+
+ opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
+ data, hlen, &_opthdr);
+ if (!opthdr)
+ return false;
+
+ ip_proto = opthdr[0];
+ nhoff += (opthdr[1] + 1) << 3;
+
+ goto ip_proto_again;
}
case IPPROTO_IPIP:
proto = htons(ETH_P_IP);
@@ -244,18 +408,25 @@ ipv6:
case IPPROTO_IPV6:
proto = htons(ETH_P_IPV6);
goto ipv6;
+ case IPPROTO_MPLS:
+ proto = htons(ETH_P_MPLS_UC);
+ goto mpls;
default:
break;
}
- flow->n_proto = proto;
- flow->ip_proto = ip_proto;
- flow->thoff = (u16) nhoff;
-
- /* unless skb is set we don't need to record port info */
- if (skb)
- flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
- data, hlen);
+ key_basic->n_proto = proto;
+ key_basic->ip_proto = ip_proto;
+ key_control->thoff = (u16)nhoff;
+
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS)) {
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ target_container);
+ key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+ data, hlen);
+ }
return true;
}
@@ -267,27 +438,109 @@ static __always_inline void __flow_hash_secret_init(void)
net_get_random_once(&hashrnd, sizeof(hashrnd));
}
-static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
+static __always_inline u32 __flow_hash_words(u32 *words, u32 length, u32 keyval)
{
- __flow_hash_secret_init();
- return jhash_3words(a, b, c, hashrnd);
+ return jhash2(words, length, keyval);
}
-static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
+static inline void *flow_keys_hash_start(struct flow_keys *flow)
{
- u32 hash;
+ BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
+ return (void *)flow + FLOW_KEYS_HASH_OFFSET;
+}
+
+static inline size_t flow_keys_hash_length(struct flow_keys *flow)
+{
+ size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
+ BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
+ BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
+ sizeof(*flow) - sizeof(flow->addrs));
+
+ switch (flow->control.addr_type) {
+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+ diff -= sizeof(flow->addrs.v4addrs);
+ break;
+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+ diff -= sizeof(flow->addrs.v6addrs);
+ break;
+ case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
+ diff -= sizeof(flow->addrs.tipcaddrs);
+ break;
+ }
+ return (sizeof(*flow) - diff) / sizeof(u32);
+}
+
+__be32 flow_get_u32_src(const struct flow_keys *flow)
+{
+ switch (flow->control.addr_type) {
+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+ return flow->addrs.v4addrs.src;
+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+ return (__force __be32)ipv6_addr_hash(
+ &flow->addrs.v6addrs.src);
+ case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
+ return flow->addrs.tipcaddrs.srcnode;
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(flow_get_u32_src);
- /* get a consistent hash (same value on both flow directions) */
- if (((__force u32)keys->dst < (__force u32)keys->src) ||
- (((__force u32)keys->dst == (__force u32)keys->src) &&
- ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
- swap(keys->dst, keys->src);
- swap(keys->port16[0], keys->port16[1]);
+__be32 flow_get_u32_dst(const struct flow_keys *flow)
+{
+ switch (flow->control.addr_type) {
+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+ return flow->addrs.v4addrs.dst;
+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+ return (__force __be32)ipv6_addr_hash(
+ &flow->addrs.v6addrs.dst);
+ default:
+ return 0;
}
+}
+EXPORT_SYMBOL(flow_get_u32_dst);
- hash = __flow_hash_3words((__force u32)keys->dst,
- (__force u32)keys->src,
- (__force u32)keys->ports);
+static inline void __flow_hash_consistentify(struct flow_keys *keys)
+{
+ int addr_diff, i;
+
+ switch (keys->control.addr_type) {
+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+ addr_diff = (__force u32)keys->addrs.v4addrs.dst -
+ (__force u32)keys->addrs.v4addrs.src;
+ if ((addr_diff < 0) ||
+ (addr_diff == 0 &&
+ ((__force u16)keys->ports.dst <
+ (__force u16)keys->ports.src))) {
+ swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
+ swap(keys->ports.src, keys->ports.dst);
+ }
+ break;
+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+ addr_diff = memcmp(&keys->addrs.v6addrs.dst,
+ &keys->addrs.v6addrs.src,
+ sizeof(keys->addrs.v6addrs.dst));
+ if ((addr_diff < 0) ||
+ (addr_diff == 0 &&
+ ((__force u16)keys->ports.dst <
+ (__force u16)keys->ports.src))) {
+ for (i = 0; i < 4; i++)
+ swap(keys->addrs.v6addrs.src.s6_addr32[i],
+ keys->addrs.v6addrs.dst.s6_addr32[i]);
+ swap(keys->ports.src, keys->ports.dst);
+ }
+ break;
+ }
+}
+
+static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
+{
+ u32 hash;
+
+ __flow_hash_consistentify(keys);
+
+ hash = __flow_hash_words((u32 *)flow_keys_hash_start(keys),
+ flow_keys_hash_length(keys), keyval);
if (!hash)
hash = 1;
@@ -296,12 +549,52 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
u32 flow_hash_from_keys(struct flow_keys *keys)
{
- return __flow_hash_from_keys(keys);
+ __flow_hash_secret_init();
+ return __flow_hash_from_keys(keys, hashrnd);
}
EXPORT_SYMBOL(flow_hash_from_keys);
-/*
- * __skb_get_hash: calculate a flow hash based on src/dst addresses
+static inline u32 ___skb_get_hash(const struct sk_buff *skb,
+ struct flow_keys *keys, u32 keyval)
+{
+ if (!skb_flow_dissect_flow_keys(skb, keys))
+ return 0;
+
+ return __flow_hash_from_keys(keys, keyval);
+}
+
+struct _flow_keys_digest_data {
+ __be16 n_proto;
+ u8 ip_proto;
+ u8 padding;
+ __be32 ports;
+ __be32 src;
+ __be32 dst;
+};
+
+void make_flow_keys_digest(struct flow_keys_digest *digest,
+ const struct flow_keys *flow)
+{
+ struct _flow_keys_digest_data *data =
+ (struct _flow_keys_digest_data *)digest;
+
+ BUILD_BUG_ON(sizeof(*data) > sizeof(*digest));
+
+ memset(digest, 0, sizeof(*digest));
+
+ data->n_proto = flow->basic.n_proto;
+ data->ip_proto = flow->basic.ip_proto;
+ data->ports = flow->ports.ports;
+ data->src = flow->addrs.v4addrs.src;
+ data->dst = flow->addrs.v4addrs.dst;
+}
+EXPORT_SYMBOL(make_flow_keys_digest);
+
+/**
+ * __skb_get_hash: calculate a flow hash
+ * @skb: sk_buff to calculate flow hash from
+ *
+ * This function calculates a flow hash based on src/dst addresses
* and src/dst port numbers. Sets hash in skb to non-zero hash value
* on success, zero indicates no valid hash. Also, sets l4_hash in skb
* if hash is a canonical 4-tuple hash over transport ports.
@@ -309,53 +602,34 @@ EXPORT_SYMBOL(flow_hash_from_keys);
void __skb_get_hash(struct sk_buff *skb)
{
struct flow_keys keys;
+ u32 hash;
- if (!skb_flow_dissect(skb, &keys))
- return;
+ __flow_hash_secret_init();
- if (keys.ports)
+ hash = ___skb_get_hash(skb, &keys, hashrnd);
+ if (!hash)
+ return;
+ if (keys.ports.ports)
skb->l4_hash = 1;
-
skb->sw_hash = 1;
-
- skb->hash = __flow_hash_from_keys(&keys);
+ skb->hash = hash;
}
EXPORT_SYMBOL(__skb_get_hash);
-/*
- * Returns a Tx hash based on the given packet descriptor a Tx queues' number
- * to be used as a distribution range.
- */
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
- unsigned int num_tx_queues)
+__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
{
- u32 hash;
- u16 qoffset = 0;
- u16 qcount = num_tx_queues;
-
- if (skb_rx_queue_recorded(skb)) {
- hash = skb_get_rx_queue(skb);
- while (unlikely(hash >= num_tx_queues))
- hash -= num_tx_queues;
- return hash;
- }
-
- if (dev->num_tc) {
- u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
- qoffset = dev->tc_to_txq[tc].offset;
- qcount = dev->tc_to_txq[tc].count;
- }
+ struct flow_keys keys;
- return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
+ return ___skb_get_hash(skb, &keys, perturb);
}
-EXPORT_SYMBOL(__skb_tx_hash);
+EXPORT_SYMBOL(skb_get_hash_perturb);
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
const struct flow_keys *keys, int hlen)
{
- u32 poff = keys->thoff;
+ u32 poff = keys->control.thoff;
- switch (keys->ip_proto) {
+ switch (keys->basic.ip_proto) {
case IPPROTO_TCP: {
/* access doff as u8 to avoid unaligned access */
const u8 *doff;
@@ -396,8 +670,12 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
return poff;
}
-/* skb_get_poff() returns the offset to the payload as far as it could
- * be dissected. The main user is currently BPF, so that we can dynamically
+/**
+ * skb_get_poff - get the offset to the payload
+ * @skb: sk_buff to get the payload offset from
+ *
+ * The function will get the offset to the payload as far as it could
+ * be dissected. The main user is currently BPF, so that we can dynamically
* truncate packets without needing to push actual payload to the user
* space and can analyze headers only, instead.
*/
@@ -405,86 +683,76 @@ u32 skb_get_poff(const struct sk_buff *skb)
{
struct flow_keys keys;
- if (!skb_flow_dissect(skb, &keys))
+ if (!skb_flow_dissect_flow_keys(skb, &keys))
return 0;
return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
}
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+static const struct flow_dissector_key flow_keys_dissector_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+ .offset = offsetof(struct flow_keys, control),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct flow_keys, basic),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v4addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v6addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.tipcaddrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_PORTS,
+ .offset = offsetof(struct flow_keys, ports),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_VLANID,
+ .offset = offsetof(struct flow_keys, tags),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
+ .offset = offsetof(struct flow_keys, tags),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
+ .offset = offsetof(struct flow_keys, keyid),
+ },
+};
+
+static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+ .offset = offsetof(struct flow_keys, control),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct flow_keys, basic),
+ },
+};
+
+struct flow_dissector flow_keys_dissector __read_mostly;
+EXPORT_SYMBOL(flow_keys_dissector);
+
+struct flow_dissector flow_keys_buf_dissector __read_mostly;
+
+static int __init init_default_flow_dissectors(void)
{
-#ifdef CONFIG_XPS
- struct xps_dev_maps *dev_maps;
- struct xps_map *map;
- int queue_index = -1;
-
- rcu_read_lock();
- dev_maps = rcu_dereference(dev->xps_maps);
- if (dev_maps) {
- map = rcu_dereference(
- dev_maps->cpu_map[skb->sender_cpu - 1]);
- if (map) {
- if (map->len == 1)
- queue_index = map->queues[0];
- else
- queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
- map->len)];
- if (unlikely(queue_index >= dev->real_num_tx_queues))
- queue_index = -1;
- }
- }
- rcu_read_unlock();
-
- return queue_index;
-#else
- return -1;
-#endif
-}
-
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
- int queue_index = sk_tx_queue_get(sk);
-
- if (queue_index < 0 || skb->ooo_okay ||
- queue_index >= dev->real_num_tx_queues) {
- int new_index = get_xps_queue(dev, skb);
- if (new_index < 0)
- new_index = skb_tx_hash(dev, skb);
-
- if (queue_index != new_index && sk &&
- rcu_access_pointer(sk->sk_dst_cache))
- sk_tx_queue_set(sk, new_index);
-
- queue_index = new_index;
- }
-
- return queue_index;
+ skb_flow_dissector_init(&flow_keys_dissector,
+ flow_keys_dissector_keys,
+ ARRAY_SIZE(flow_keys_dissector_keys));
+ skb_flow_dissector_init(&flow_keys_buf_dissector,
+ flow_keys_buf_dissector_keys,
+ ARRAY_SIZE(flow_keys_buf_dissector_keys));
+ return 0;
}
-struct netdev_queue *netdev_pick_tx(struct net_device *dev,
- struct sk_buff *skb,
- void *accel_priv)
-{
- int queue_index = 0;
-
-#ifdef CONFIG_XPS
- if (skb->sender_cpu == 0)
- skb->sender_cpu = raw_smp_processor_id() + 1;
-#endif
-
- if (dev->real_num_tx_queues != 1) {
- const struct net_device_ops *ops = dev->netdev_ops;
- if (ops->ndo_select_queue)
- queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
- __netdev_pick_tx);
- else
- queue_index = __netdev_pick_tx(dev, skb);
-
- if (!accel_priv)
- queue_index = netdev_cap_txqueue(dev, queue_index);
- }
-
- skb_set_queue_mapping(skb, queue_index);
- return netdev_get_tx_queue(dev, queue_index);
-}
+late_initcall_sync(init_default_flow_dissectors);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9dfb88a93..92d886f4a 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -66,7 +66,7 @@
NOTES.
- * avbps is scaled by 2^5, avpps is scaled by 2^10.
+ * avbps and avpps are scaled by 2^5.
* both values are reported as 32 bit unsigned values. bps can
overflow for fast links : max speed being 34360Mbit/sec
* Minimal interval is HZ/4=250msec (it is the greatest common divisor
@@ -85,10 +85,10 @@ struct gen_estimator
struct gnet_stats_rate_est64 *rate_est;
spinlock_t *stats_lock;
int ewma_log;
+ u32 last_packets;
+ unsigned long avpps;
u64 last_bytes;
u64 avbps;
- u32 last_packets;
- u32 avpps;
struct rcu_head e_rcu;
struct rb_node node;
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
@@ -118,8 +118,8 @@ static void est_timer(unsigned long arg)
rcu_read_lock();
list_for_each_entry_rcu(e, &elist[idx].list, list) {
struct gnet_stats_basic_packed b = {0};
+ unsigned long rate;
u64 brate;
- u32 rate;
spin_lock(e->stats_lock);
read_lock(&est_lock);
@@ -133,10 +133,11 @@ static void est_timer(unsigned long arg)
e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
e->rate_est->bps = (e->avbps+0xF)>>5;
- rate = (b.packets - e->last_packets)<<(12 - idx);
+ rate = b.packets - e->last_packets;
+ rate <<= (7 - idx);
e->last_packets = b.packets;
e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
- e->rate_est->pps = (e->avpps+0x1FF)>>10;
+ e->rate_est->pps = (e->avpps + 0xF) >> 5;
skip:
read_unlock(&est_lock);
spin_unlock(e->stats_lock);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2237c1b3c..84195dacb 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -913,6 +913,7 @@ static void neigh_timer_handler(unsigned long arg)
neigh->nud_state = NUD_PROBE;
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
+ notify = 1;
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
}
} else {
@@ -1155,6 +1156,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
if (new != old) {
neigh_del_timer(neigh);
+ if (new & NUD_PROBE)
+ atomic_set(&neigh->probes, 0);
if (new & NUD_IN_TIMER)
neigh_add_timer(neigh, (jiffies +
((new & NUD_REACHABLE) ?
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4238d6da5..18b34d771 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -458,11 +458,15 @@ static ssize_t phys_switch_id_show(struct device *dev,
return restart_syscall();
if (dev_isalive(netdev)) {
- struct netdev_phys_item_id ppid;
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .flags = SWITCHDEV_F_NO_RECURSE,
+ };
- ret = netdev_switch_parent_id_get(netdev, &ppid);
+ ret = switchdev_port_attr_get(netdev, &attr);
if (!ret)
- ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
+ ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len,
+ attr.u.ppid.id);
}
rtnl_unlock();
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 572af0011..2c2eb1b62 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -147,24 +147,17 @@ static void ops_free_list(const struct pernet_operations *ops,
}
}
-static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
- int id);
+/* should be called with nsid_lock held */
static int alloc_netid(struct net *net, struct net *peer, int reqid)
{
- int min = 0, max = 0, id;
-
- ASSERT_RTNL();
+ int min = 0, max = 0;
if (reqid >= 0) {
min = reqid;
max = reqid + 1;
}
- id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
- if (id >= 0)
- rtnl_net_notifyid(net, peer, RTM_NEWNSID, id);
-
- return id;
+ return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
}
/* This function is used by idr_for_each(). If net is equal to peer, the
@@ -180,11 +173,16 @@ static int net_eq_idr(int id, void *net, void *peer)
return 0;
}
-static int __peernet2id(struct net *net, struct net *peer, bool alloc)
+/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
+ * is set to true, thus the caller knows that the new id must be notified via
+ * rtnl.
+ */
+static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
{
int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
+ bool alloc_it = *alloc;
- ASSERT_RTNL();
+ *alloc = false;
/* Magic value for id 0. */
if (id == NET_ID_ZERO)
@@ -192,36 +190,77 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc)
if (id > 0)
return id;
- if (alloc)
- return alloc_netid(net, peer, -1);
+ if (alloc_it) {
+ id = alloc_netid(net, peer, -1);
+ *alloc = true;
+ return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
+ }
+
+ return NETNSA_NSID_NOT_ASSIGNED;
+}
+
+/* should be called with nsid_lock held */
+static int __peernet2id(struct net *net, struct net *peer)
+{
+ bool no = false;
- return -ENOENT;
+ return __peernet2id_alloc(net, peer, &no);
}
+static void rtnl_net_notifyid(struct net *net, int cmd, int id);
/* This function returns the id of a peer netns. If no id is assigned, one will
* be allocated and returned.
*/
+int peernet2id_alloc(struct net *net, struct net *peer)
+{
+ unsigned long flags;
+ bool alloc;
+ int id;
+
+ spin_lock_irqsave(&net->nsid_lock, flags);
+ alloc = atomic_read(&peer->count) == 0 ? false : true;
+ id = __peernet2id_alloc(net, peer, &alloc);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
+ if (alloc && id >= 0)
+ rtnl_net_notifyid(net, RTM_NEWNSID, id);
+ return id;
+}
+EXPORT_SYMBOL(peernet2id_alloc);
+
+/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(struct net *net, struct net *peer)
{
- bool alloc = atomic_read(&peer->count) == 0 ? false : true;
+ unsigned long flags;
int id;
- id = __peernet2id(net, peer, alloc);
- return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
+ spin_lock_irqsave(&net->nsid_lock, flags);
+ id = __peernet2id(net, peer);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
+ return id;
+}
+
+/* This function returns true is the peer netns has an id assigned into the
+ * current netns.
+ */
+bool peernet_has_id(struct net *net, struct net *peer)
+{
+ return peernet2id(net, peer) >= 0;
}
-EXPORT_SYMBOL(peernet2id);
struct net *get_net_ns_by_id(struct net *net, int id)
{
+ unsigned long flags;
struct net *peer;
if (id < 0)
return NULL;
rcu_read_lock();
+ spin_lock_irqsave(&net->nsid_lock, flags);
peer = idr_find(&net->netns_ids, id);
if (peer)
get_net(peer);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
rcu_read_unlock();
return peer;
@@ -242,6 +281,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
net->dev_base_seq = 1;
net->user_ns = user_ns;
idr_init(&net->netns_ids);
+ spin_lock_init(&net->nsid_lock);
list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
@@ -362,14 +402,19 @@ static void cleanup_net(struct work_struct *work)
list_del_rcu(&net->list);
list_add_tail(&net->exit_list, &net_exit_list);
for_each_net(tmp) {
- int id = __peernet2id(tmp, net, false);
+ int id;
- if (id >= 0) {
- rtnl_net_notifyid(tmp, net, RTM_DELNSID, id);
+ spin_lock_irq(&tmp->nsid_lock);
+ id = __peernet2id(tmp, net);
+ if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- }
+ spin_unlock_irq(&tmp->nsid_lock);
+ if (id >= 0)
+ rtnl_net_notifyid(tmp, RTM_DELNSID, id);
}
+ spin_lock_irq(&net->nsid_lock);
idr_destroy(&net->netns_ids);
+ spin_unlock_irq(&net->nsid_lock);
}
rtnl_unlock();
@@ -497,6 +542,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
+ unsigned long flags;
struct net *peer;
int nsid, err;
@@ -517,14 +563,19 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
if (IS_ERR(peer))
return PTR_ERR(peer);
- if (__peernet2id(net, peer, false) >= 0) {
+ spin_lock_irqsave(&net->nsid_lock, flags);
+ if (__peernet2id(net, peer) >= 0) {
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
err = -EEXIST;
goto out;
}
err = alloc_netid(net, peer, nsid);
- if (err > 0)
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
+ if (err >= 0) {
+ rtnl_net_notifyid(net, RTM_NEWNSID, err);
err = 0;
+ }
out:
put_net(peer);
return err;
@@ -538,14 +589,10 @@ static int rtnl_net_get_size(void)
}
static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
- int cmd, struct net *net, struct net *peer,
- int nsid)
+ int cmd, struct net *net, int nsid)
{
struct nlmsghdr *nlh;
struct rtgenmsg *rth;
- int id;
-
- ASSERT_RTNL();
nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
if (!nlh)
@@ -554,14 +601,7 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
rth = nlmsg_data(nlh);
rth->rtgen_family = AF_UNSPEC;
- if (nsid >= 0) {
- id = nsid;
- } else {
- id = __peernet2id(net, peer, false);
- if (id < 0)
- id = NETNSA_NSID_NOT_ASSIGNED;
- }
- if (nla_put_s32(skb, NETNSA_NSID, id))
+ if (nla_put_s32(skb, NETNSA_NSID, nsid))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -578,7 +618,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[NETNSA_MAX + 1];
struct sk_buff *msg;
struct net *peer;
- int err;
+ int err, id;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
rtnl_net_policy);
@@ -600,8 +640,9 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
goto out;
}
+ id = peernet2id(net, peer);
err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- RTM_NEWNSID, net, peer, -1);
+ RTM_NEWNSID, net, id);
if (err < 0)
goto err_out;
@@ -633,7 +674,7 @@ static int rtnl_net_dumpid_one(int id, void *peer, void *data)
ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWNSID, net_cb->net, peer, id);
+ RTM_NEWNSID, net_cb->net, id);
if (ret < 0)
return ret;
@@ -652,17 +693,17 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
.idx = 0,
.s_idx = cb->args[0],
};
+ unsigned long flags;
- ASSERT_RTNL();
-
+ spin_lock_irqsave(&net->nsid_lock, flags);
idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
cb->args[0] = net_cb.idx;
return skb->len;
}
-static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
- int id)
+static void rtnl_net_notifyid(struct net *net, int cmd, int id)
{
struct sk_buff *msg;
int err = -ENOMEM;
@@ -671,7 +712,7 @@ static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
if (!msg)
goto out;
- err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id);
+ err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
if (err < 0)
goto err_out;
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 1f2a126f4..6441f47b1 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -23,7 +23,8 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state
struct cgroup_cls_state *task_cls_state(struct task_struct *p)
{
- return css_cls_state(task_css(p, net_cls_cgrp_id));
+ return css_cls_state(task_css_check(p, net_cls_cgrp_id,
+ rcu_read_lock_bh_held()));
}
EXPORT_SYMBOL_GPL(task_cls_state);
diff --git a/net/core/netevent.c b/net/core/netevent.c
index f17ccd291..8b3bc4fac 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -31,10 +31,7 @@ static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain);
*/
int register_netevent_notifier(struct notifier_block *nb)
{
- int err;
-
- err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
- return err;
+ return atomic_notifier_chain_register(&netevent_notif_chain, nb);
}
EXPORT_SYMBOL_GPL(register_netevent_notifier);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 508155b28..1cbd20919 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -177,7 +177,7 @@
#include <asm/dma.h>
#include <asm/div64.h> /* do_div */
-#define VERSION "2.74"
+#define VERSION "2.75"
#define IP_NAME_SZ 32
#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
#define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -210,6 +210,10 @@
#define T_REMDEVALL (1<<2) /* Remove all devs */
#define T_REMDEV (1<<3) /* Remove one dev */
+/* Xmit modes */
+#define M_START_XMIT 0 /* Default normal TX */
+#define M_NETIF_RECEIVE 1 /* Inject packets into stack */
+
/* If lock -- protects updating of if_list */
#define if_lock(t) spin_lock(&(t->if_lock));
#define if_unlock(t) spin_unlock(&(t->if_lock));
@@ -251,13 +255,14 @@ struct pktgen_dev {
* we will do a random selection from within the range.
*/
__u32 flags;
- int removal_mark; /* non-zero => the device is marked for
- * removal by worker thread */
-
+ int xmit_mode;
int min_pkt_size;
int max_pkt_size;
int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */
int nfrags;
+ int removal_mark; /* non-zero => the device is marked for
+ * removal by worker thread */
+
struct page *page;
u64 delay; /* nano-seconds */
@@ -507,7 +512,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
pktgen_reset_all_threads(pn);
else
- pr_warn("Unknown command: %s\n", data);
+ return -EINVAL;
return count;
}
@@ -567,7 +572,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
" dst_min: %s dst_max: %s\n",
pkt_dev->dst_min, pkt_dev->dst_max);
seq_printf(seq,
- " src_min: %s src_max: %s\n",
+ " src_min: %s src_max: %s\n",
pkt_dev->src_min, pkt_dev->src_max);
}
@@ -620,6 +625,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->node >= 0)
seq_printf(seq, " node: %d\n", pkt_dev->node);
+ if (pkt_dev->xmit_mode == M_NETIF_RECEIVE)
+ seq_puts(seq, " xmit_mode: netif_receive\n");
+
seq_puts(seq, " Flags: ");
if (pkt_dev->flags & F_IPV6)
@@ -1081,7 +1089,8 @@ static ssize_t pktgen_if_write(struct file *file,
if (len < 0)
return len;
if ((value > 0) &&
- (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+ ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) ||
+ !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
return -ENOTSUPP;
i += len;
pkt_dev->clone_skb = value;
@@ -1134,7 +1143,7 @@ static ssize_t pktgen_if_write(struct file *file,
return len;
i += len;
- if ((value > 1) &&
+ if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) &&
(!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
return -ENOTSUPP;
pkt_dev->burst = value < 1 ? 1 : value;
@@ -1160,6 +1169,45 @@ static ssize_t pktgen_if_write(struct file *file,
sprintf(pg_result, "ERROR: node not possible");
return count;
}
+ if (!strcmp(name, "xmit_mode")) {
+ char f[32];
+
+ memset(f, 0, 32);
+ len = strn_len(&user_buffer[i], sizeof(f) - 1);
+ if (len < 0)
+ return len;
+
+ if (copy_from_user(f, &user_buffer[i], len))
+ return -EFAULT;
+ i += len;
+
+ if (strcmp(f, "start_xmit") == 0) {
+ pkt_dev->xmit_mode = M_START_XMIT;
+ } else if (strcmp(f, "netif_receive") == 0) {
+ /* clone_skb set earlier, not supported in this mode */
+ if (pkt_dev->clone_skb > 0)
+ return -ENOTSUPP;
+
+ pkt_dev->xmit_mode = M_NETIF_RECEIVE;
+
+ /* make sure new packet is allocated every time
+ * pktgen_xmit() is called
+ */
+ pkt_dev->last_ok = 1;
+
+ /* override clone_skb if user passed default value
+ * at module loading time
+ */
+ pkt_dev->clone_skb = 0;
+ } else {
+ sprintf(pg_result,
+ "xmit_mode -:%s:- unknown\nAvailable modes: %s",
+ f, "start_xmit, netif_receive\n");
+ return count;
+ }
+ sprintf(pg_result, "OK: xmit_mode=%s", f);
+ return count;
+ }
if (!strcmp(name, "flag")) {
char f[32];
memset(f, 0, 32);
@@ -1267,6 +1315,9 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "NO_TIMESTAMP") == 0)
pkt_dev->flags |= F_NO_TIMESTAMP;
+ else if (strcmp(f, "!NO_TIMESTAMP") == 0)
+ pkt_dev->flags &= ~F_NO_TIMESTAMP;
+
else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
@@ -2212,8 +2263,6 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
do {
set_current_state(TASK_INTERRUPTIBLE);
hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
- if (!hrtimer_active(&t.timer))
- t.task = NULL;
if (likely(t.task))
schedule();
@@ -2594,9 +2643,9 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
int nhead = 0;
if (x) {
- int ret;
- __u8 *eth;
+ struct ethhdr *eth;
struct iphdr *iph;
+ int ret;
nhead = x->props.header_len - skb_headroom(skb);
if (nhead > 0) {
@@ -2616,9 +2665,9 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
goto err;
}
/* restore ll */
- eth = (__u8 *) skb_push(skb, ETH_HLEN);
- memcpy(eth, pkt_dev->hh, 12);
- *(u16 *) &eth[12] = protocol;
+ eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+ memcpy(eth, pkt_dev->hh, 2 * ETH_ALEN);
+ eth->h_proto = protocol;
/* Update IPv4 header len as well as checksum value */
iph = ip_hdr(skb);
@@ -3317,6 +3366,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
struct net_device *odev = pkt_dev->odev;
struct netdev_queue *txq;
+ struct sk_buff *skb;
int ret;
/* If device is offline, then don't send */
@@ -3354,6 +3404,37 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
if (pkt_dev->delay && pkt_dev->last_ok)
spin(pkt_dev, pkt_dev->next_tx);
+ if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) {
+ skb = pkt_dev->skb;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ atomic_add(burst, &skb->users);
+ local_bh_disable();
+ do {
+ ret = netif_receive_skb(skb);
+ if (ret == NET_RX_DROP)
+ pkt_dev->errors++;
+ pkt_dev->sofar++;
+ pkt_dev->seq_num++;
+ if (atomic_read(&skb->users) != burst) {
+ /* skb was queued by rps/rfs or taps,
+ * so cannot reuse this skb
+ */
+ atomic_sub(burst - 1, &skb->users);
+ /* get out of the loop and wait
+ * until skb is consumed
+ */
+ break;
+ }
+ /* skb was 'freed' by stack, so clean few
+ * bits and reuse it
+ */
+#ifdef CONFIG_NET_CLS_ACT
+ skb->tc_verd = 0; /* reset reclass/redir ttl */
+#endif
+ } while (--burst > 0);
+ goto out; /* Skips xmit_mode M_START_XMIT */
+ }
+
txq = skb_get_tx_queue(odev, pkt_dev->skb);
local_bh_disable();
@@ -3401,6 +3482,7 @@ xmit_more:
unlock:
HARD_TX_UNLOCK(odev, txq);
+out:
local_bh_enable();
/* If pkt_dev->count is zero, then run forever */
@@ -3432,8 +3514,6 @@ static int pktgen_thread_worker(void *arg)
set_freezable();
- __set_current_state(TASK_RUNNING);
-
while (!kthread_should_stop()) {
pkt_dev = next_to_run(t);
@@ -3478,7 +3558,6 @@ static int pktgen_thread_worker(void *arg)
try_to_freeze();
}
- set_current_state(TASK_INTERRUPTIBLE);
pr_debug("%s stopping all device\n", t->tsk->comm);
pktgen_stop(t);
@@ -3489,13 +3568,6 @@ static int pktgen_thread_worker(void *arg)
pr_debug("%s removing thread\n", t->tsk->comm);
pktgen_rem_thread(t);
- /* Wait for kthread_stop */
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- }
- __set_current_state(TASK_RUNNING);
-
return 0;
}
@@ -3687,6 +3759,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
}
t->net = pn;
+ get_task_struct(p);
wake_up_process(p);
wait_for_completion(&t->start_done);
@@ -3809,6 +3882,7 @@ static void __net_exit pg_net_exit(struct net *net)
t = list_entry(q, struct pktgen_thread, th_list);
list_del(&t->th_list);
kthread_stop(t->tsk);
+ put_task_struct(t->tsk);
kfree(t);
}
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 87b22c0bc..b42f0e26f 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -103,10 +103,16 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
spin_lock_bh(&queue->syn_wait_lock);
while ((req = lopt->syn_table[i]) != NULL) {
lopt->syn_table[i] = req->dl_next;
+ /* Because of following del_timer_sync(),
+ * we must release the spinlock here
+ * or risk a dead lock.
+ */
+ spin_unlock_bh(&queue->syn_wait_lock);
atomic_inc(&lopt->qlen_dec);
- if (del_timer(&req->rsk_timer))
+ if (del_timer_sync(&req->rsk_timer))
reqsk_put(req);
reqsk_put(req);
+ spin_lock_bh(&queue->syn_wait_lock);
}
spin_unlock_bh(&queue->syn_wait_lock);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8de368240..dc004b1e1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -819,7 +819,19 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
nla_total_size(sizeof(struct ifla_vf_rate)) +
nla_total_size(sizeof(struct ifla_vf_link_state)) +
- nla_total_size(sizeof(struct ifla_vf_rss_query_en)));
+ nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
+ /* IFLA_VF_STATS_RX_PACKETS */
+ nla_total_size(sizeof(__u64)) +
+ /* IFLA_VF_STATS_TX_PACKETS */
+ nla_total_size(sizeof(__u64)) +
+ /* IFLA_VF_STATS_RX_BYTES */
+ nla_total_size(sizeof(__u64)) +
+ /* IFLA_VF_STATS_TX_BYTES */
+ nla_total_size(sizeof(__u64)) +
+ /* IFLA_VF_STATS_BROADCAST */
+ nla_total_size(sizeof(__u64)) +
+ /* IFLA_VF_STATS_MULTICAST */
+ nla_total_size(sizeof(__u64)));
return size;
} else
return 0;
@@ -1004,16 +1016,20 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
{
int err;
- struct netdev_phys_item_id psid;
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .flags = SWITCHDEV_F_NO_RECURSE,
+ };
- err = netdev_switch_parent_id_get(dev, &psid);
+ err = switchdev_port_attr_get(dev, &attr);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
return err;
}
- if (nla_put(skb, IFLA_PHYS_SWITCH_ID, psid.id_len, psid.id))
+ if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len,
+ attr.u.ppid.id))
return -EMSGSIZE;
return 0;
@@ -1119,7 +1135,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
&& (ext_filter_mask & RTEXT_FILTER_VF)) {
int i;
- struct nlattr *vfinfo, *vf;
+ struct nlattr *vfinfo, *vf, *vfstats;
int num_vfs = dev_num_vf(dev->dev.parent);
vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
@@ -1134,6 +1150,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct ifla_vf_spoofchk vf_spoofchk;
struct ifla_vf_link_state vf_linkstate;
struct ifla_vf_rss_query_en vf_rss_query_en;
+ struct ifla_vf_stats vf_stats;
/*
* Not all SR-IOV capable drivers support the
@@ -1186,6 +1203,30 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
sizeof(vf_rss_query_en),
&vf_rss_query_en))
goto nla_put_failure;
+ memset(&vf_stats, 0, sizeof(vf_stats));
+ if (dev->netdev_ops->ndo_get_vf_stats)
+ dev->netdev_ops->ndo_get_vf_stats(dev, i,
+ &vf_stats);
+ vfstats = nla_nest_start(skb, IFLA_VF_STATS);
+ if (!vfstats) {
+ nla_nest_cancel(skb, vf);
+ nla_nest_cancel(skb, vfinfo);
+ goto nla_put_failure;
+ }
+ if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
+ vf_stats.rx_packets) ||
+ nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
+ vf_stats.tx_packets) ||
+ nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
+ vf_stats.rx_bytes) ||
+ nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
+ vf_stats.tx_bytes) ||
+ nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
+ vf_stats.broadcast) ||
+ nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
+ vf_stats.multicast))
+ goto nla_put_failure;
+ nla_nest_end(skb, vfstats);
nla_nest_end(skb, vf);
}
nla_nest_end(skb, vfinfo);
@@ -1204,7 +1245,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
if (!net_eq(dev_net(dev), link_net)) {
- int id = peernet2id(dev_net(dev), link_net);
+ int id = peernet2id_alloc(dev_net(dev), link_net);
if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
goto nla_put_failure;
@@ -1287,10 +1328,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
[IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED },
};
-static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
- [IFLA_VF_INFO] = { .type = NLA_NESTED },
-};
-
static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) },
[IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) },
@@ -1299,6 +1336,16 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) },
[IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) },
[IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
+ [IFLA_VF_STATS] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
+ [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 },
+ [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 },
+ [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 },
+ [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 },
+ [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 },
+ [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 },
};
static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1437,96 +1484,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
return 0;
}
-static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
+static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
{
- int rem, err = -EINVAL;
- struct nlattr *vf;
const struct net_device_ops *ops = dev->netdev_ops;
+ int err = -EINVAL;
- nla_for_each_nested(vf, attr, rem) {
- switch (nla_type(vf)) {
- case IFLA_VF_MAC: {
- struct ifla_vf_mac *ivm;
- ivm = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_mac)
- err = ops->ndo_set_vf_mac(dev, ivm->vf,
- ivm->mac);
- break;
- }
- case IFLA_VF_VLAN: {
- struct ifla_vf_vlan *ivv;
- ivv = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_vlan)
- err = ops->ndo_set_vf_vlan(dev, ivv->vf,
- ivv->vlan,
- ivv->qos);
- break;
- }
- case IFLA_VF_TX_RATE: {
- struct ifla_vf_tx_rate *ivt;
- struct ifla_vf_info ivf;
- ivt = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_get_vf_config)
- err = ops->ndo_get_vf_config(dev, ivt->vf,
- &ivf);
- if (err)
- break;
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivf.min_tx_rate,
- ivt->rate);
- break;
- }
- case IFLA_VF_RATE: {
- struct ifla_vf_rate *ivt;
- ivt = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivt->min_tx_rate,
- ivt->max_tx_rate);
- break;
- }
- case IFLA_VF_SPOOFCHK: {
- struct ifla_vf_spoofchk *ivs;
- ivs = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_spoofchk)
- err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
- ivs->setting);
- break;
- }
- case IFLA_VF_LINK_STATE: {
- struct ifla_vf_link_state *ivl;
- ivl = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_link_state)
- err = ops->ndo_set_vf_link_state(dev, ivl->vf,
- ivl->link_state);
- break;
- }
- case IFLA_VF_RSS_QUERY_EN: {
- struct ifla_vf_rss_query_en *ivrssq_en;
+ if (tb[IFLA_VF_MAC]) {
+ struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]);
- ivrssq_en = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rss_query_en)
- err = ops->ndo_set_vf_rss_query_en(dev,
- ivrssq_en->vf,
- ivrssq_en->setting);
- break;
- }
- default:
- err = -EINVAL;
- break;
- }
- if (err)
- break;
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_mac)
+ err = ops->ndo_set_vf_mac(dev, ivm->vf,
+ ivm->mac);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_VLAN]) {
+ struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_vlan)
+ err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
+ ivv->qos);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_TX_RATE]) {
+ struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]);
+ struct ifla_vf_info ivf;
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_get_vf_config)
+ err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf);
+ if (err < 0)
+ return err;
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivf.min_tx_rate,
+ ivt->rate);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_RATE]) {
+ struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivt->min_tx_rate,
+ ivt->max_tx_rate);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_SPOOFCHK]) {
+ struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_spoofchk)
+ err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
+ ivs->setting);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_LINK_STATE]) {
+ struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_link_state)
+ err = ops->ndo_set_vf_link_state(dev, ivl->vf,
+ ivl->link_state);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_RSS_QUERY_EN]) {
+ struct ifla_vf_rss_query_en *ivrssq_en;
+
+ err = -EOPNOTSUPP;
+ ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]);
+ if (ops->ndo_set_vf_rss_query_en)
+ err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf,
+ ivrssq_en->setting);
+ if (err < 0)
+ return err;
}
+
return err;
}
@@ -1722,14 +1771,21 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_VFINFO_LIST]) {
+ struct nlattr *vfinfo[IFLA_VF_MAX + 1];
struct nlattr *attr;
int rem;
+
nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
- if (nla_type(attr) != IFLA_VF_INFO) {
+ if (nla_type(attr) != IFLA_VF_INFO ||
+ nla_len(attr) < NLA_HDRLEN) {
err = -EINVAL;
goto errout;
}
- err = do_setvfinfo(dev, attr);
+ err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr,
+ ifla_vf_policy);
+ if (err < 0)
+ goto errout;
+ err = do_setvfinfo(dev, vfinfo);
if (err < 0)
goto errout;
status |= DO_SETLINK_NOTIFY;
@@ -1748,10 +1804,13 @@ static int do_setlink(const struct sk_buff *skb,
goto errout;
nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
- if (nla_type(attr) != IFLA_VF_PORT)
- continue;
- err = nla_parse_nested(port, IFLA_PORT_MAX,
- attr, ifla_port_policy);
+ if (nla_type(attr) != IFLA_VF_PORT ||
+ nla_len(attr) < NLA_HDRLEN) {
+ err = -EINVAL;
+ goto errout;
+ }
+ err = nla_parse_nested(port, IFLA_PORT_MAX, attr,
+ ifla_port_policy);
if (err < 0)
goto errout;
if (!port[IFLA_PORT_VF]) {
@@ -2857,7 +2916,11 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u16 mode,
- u32 flags, u32 mask, int nlflags)
+ u32 flags, u32 mask, int nlflags,
+ u32 filter_mask,
+ int (*vlan_fill)(struct sk_buff *skb,
+ struct net_device *dev,
+ u32 filter_mask))
{
struct nlmsghdr *nlh;
struct ifinfomsg *ifm;
@@ -2865,6 +2928,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct nlattr *protinfo;
u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+ int err = 0;
nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags);
if (nlh == NULL)
@@ -2905,6 +2969,13 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
goto nla_put_failure;
}
}
+ if (vlan_fill) {
+ err = vlan_fill(skb, dev, filter_mask);
+ if (err) {
+ nla_nest_cancel(skb, br_afspec);
+ goto nla_put_failure;
+ }
+ }
nla_nest_end(skb, br_afspec);
protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
@@ -2938,9 +3009,9 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
+ return err ? err : -EMSGSIZE;
}
-EXPORT_SYMBOL(ndo_dflt_bridge_getlink);
+EXPORT_SYMBOL_GPL(ndo_dflt_bridge_getlink);
static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
{
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index d0c430921..2d49ceeea 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -254,7 +254,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
- secret[i] = net_secret[i] + daddr[i];
+ secret[i] = net_secret[i] + (__force u32)daddr[i];
secret[4] = net_secret[4] +
(((__force u16)sport << 16) + (__force u16)dport);
for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 41ec02242..7b84330e5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -340,101 +340,25 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
if (skb && frag_size) {
skb->head_frag = 1;
- if (virt_to_head_page(data)->pfmemalloc)
+ if (page_is_pfmemalloc(virt_to_head_page(data)))
skb->pfmemalloc = 1;
}
return skb;
}
EXPORT_SYMBOL(build_skb);
-struct netdev_alloc_cache {
- struct page_frag frag;
- /* we maintain a pagecount bias, so that we dont dirty cache line
- * containing page->_count every time we allocate a fragment.
- */
- unsigned int pagecnt_bias;
-};
-static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
-static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
-
-static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
- gfp_t gfp_mask)
-{
- const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
- struct page *page = NULL;
- gfp_t gfp = gfp_mask;
-
- if (order) {
- gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
- __GFP_NOMEMALLOC;
- page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
- nc->frag.size = PAGE_SIZE << (page ? order : 0);
- }
-
- if (unlikely(!page))
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
-
- nc->frag.page = page;
-
- return page;
-}
-
-static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
- unsigned int fragsz, gfp_t gfp_mask)
-{
- struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
- struct page *page = nc->frag.page;
- unsigned int size;
- int offset;
-
- if (unlikely(!page)) {
-refill:
- page = __page_frag_refill(nc, gfp_mask);
- if (!page)
- return NULL;
-
- /* if size can vary use frag.size else just use PAGE_SIZE */
- size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
-
- /* Even if we own the page, we do not use atomic_set().
- * This would break get_page_unless_zero() users.
- */
- atomic_add(size - 1, &page->_count);
-
- /* reset page count bias and offset to start of new frag */
- nc->pagecnt_bias = size;
- nc->frag.offset = size;
- }
-
- offset = nc->frag.offset - fragsz;
- if (unlikely(offset < 0)) {
- if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
- goto refill;
-
- /* if size can vary use frag.size else just use PAGE_SIZE */
- size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
-
- /* OK, page count is 0, we can safely set it */
- atomic_set(&page->_count, size);
-
- /* reset page count bias and offset to start of new frag */
- nc->pagecnt_bias = size;
- offset = size - fragsz;
- }
-
- nc->pagecnt_bias--;
- nc->frag.offset = offset;
-
- return page_address(page) + offset;
-}
+static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
+static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
+ struct page_frag_cache *nc;
unsigned long flags;
void *data;
local_irq_save(flags);
- data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask);
+ nc = this_cpu_ptr(&netdev_alloc_cache);
+ data = __alloc_page_frag(nc, fragsz, gfp_mask);
local_irq_restore(flags);
return data;
}
@@ -454,7 +378,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
- return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
+ struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+ return __alloc_page_frag(nc, fragsz, gfp_mask);
}
void *napi_alloc_frag(unsigned int fragsz)
@@ -464,76 +390,70 @@ void *napi_alloc_frag(unsigned int fragsz)
EXPORT_SYMBOL(napi_alloc_frag);
/**
- * __alloc_rx_skb - allocate an skbuff for rx
+ * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
+ * @dev: network device to receive on
* @length: length to allocate
* @gfp_mask: get_free_pages mask, passed to alloc_skb
- * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
- * allocations in case we have to fallback to __alloc_skb()
- * If SKB_ALLOC_NAPI is set, page fragment will be allocated
- * from napi_cache instead of netdev_cache.
*
* Allocate a new &sk_buff and assign it a usage count of one. The
- * buffer has unspecified headroom built in. Users should allocate
+ * buffer has NET_SKB_PAD headroom built in. Users should allocate
* the headroom they think they need without accounting for the
* built in space. The built in space is used for optimisations.
*
* %NULL is returned if there is no free memory.
*/
-static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask,
- int flags)
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
+ gfp_t gfp_mask)
{
- struct sk_buff *skb = NULL;
- unsigned int fragsz = SKB_DATA_ALIGN(length) +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ struct page_frag_cache *nc;
+ unsigned long flags;
+ struct sk_buff *skb;
+ bool pfmemalloc;
+ void *data;
- if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
- void *data;
+ len += NET_SKB_PAD;
- if (sk_memalloc_socks())
- gfp_mask |= __GFP_MEMALLOC;
+ if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
+ (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+ skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
+ if (!skb)
+ goto skb_fail;
+ goto skb_success;
+ }
- data = (flags & SKB_ALLOC_NAPI) ?
- __napi_alloc_frag(fragsz, gfp_mask) :
- __netdev_alloc_frag(fragsz, gfp_mask);
+ len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ len = SKB_DATA_ALIGN(len);
- if (likely(data)) {
- skb = build_skb(data, fragsz);
- if (unlikely(!skb))
- put_page(virt_to_head_page(data));
- }
- } else {
- skb = __alloc_skb(length, gfp_mask,
- SKB_ALLOC_RX, NUMA_NO_NODE);
- }
- return skb;
-}
+ if (sk_memalloc_socks())
+ gfp_mask |= __GFP_MEMALLOC;
-/**
- * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
- * @dev: network device to receive on
- * @length: length to allocate
- * @gfp_mask: get_free_pages mask, passed to alloc_skb
- *
- * Allocate a new &sk_buff and assign it a usage count of one. The
- * buffer has NET_SKB_PAD headroom built in. Users should allocate
- * the headroom they think they need without accounting for the
- * built in space. The built in space is used for optimisations.
- *
- * %NULL is returned if there is no free memory.
- */
-struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
- unsigned int length, gfp_t gfp_mask)
-{
- struct sk_buff *skb;
+ local_irq_save(flags);
+
+ nc = this_cpu_ptr(&netdev_alloc_cache);
+ data = __alloc_page_frag(nc, len, gfp_mask);
+ pfmemalloc = nc->pfmemalloc;
- length += NET_SKB_PAD;
- skb = __alloc_rx_skb(length, gfp_mask, 0);
+ local_irq_restore(flags);
+
+ if (unlikely(!data))
+ return NULL;
- if (likely(skb)) {
- skb_reserve(skb, NET_SKB_PAD);
- skb->dev = dev;
+ skb = __build_skb(data, len);
+ if (unlikely(!skb)) {
+ skb_free_frag(data);
+ return NULL;
}
+ /* use OR instead of assignment to avoid clearing of bits in mask */
+ if (pfmemalloc)
+ skb->pfmemalloc = 1;
+ skb->head_frag = 1;
+
+skb_success:
+ skb_reserve(skb, NET_SKB_PAD);
+ skb->dev = dev;
+
+skb_fail:
return skb;
}
EXPORT_SYMBOL(__netdev_alloc_skb);
@@ -551,19 +471,49 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
*
* %NULL is returned if there is no free memory.
*/
-struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
- unsigned int length, gfp_t gfp_mask)
+struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
+ gfp_t gfp_mask)
{
+ struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
struct sk_buff *skb;
+ void *data;
- length += NET_SKB_PAD + NET_IP_ALIGN;
- skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI);
+ len += NET_SKB_PAD + NET_IP_ALIGN;
- if (likely(skb)) {
- skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
- skb->dev = napi->dev;
+ if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
+ (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+ skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
+ if (!skb)
+ goto skb_fail;
+ goto skb_success;
}
+ len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ len = SKB_DATA_ALIGN(len);
+
+ if (sk_memalloc_socks())
+ gfp_mask |= __GFP_MEMALLOC;
+
+ data = __alloc_page_frag(nc, len, gfp_mask);
+ if (unlikely(!data))
+ return NULL;
+
+ skb = __build_skb(data, len);
+ if (unlikely(!skb)) {
+ skb_free_frag(data);
+ return NULL;
+ }
+
+ /* use OR instead of assignment to avoid clearing of bits in mask */
+ if (nc->pfmemalloc)
+ skb->pfmemalloc = 1;
+ skb->head_frag = 1;
+
+skb_success:
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+ skb->dev = napi->dev;
+
+skb_fail:
return skb;
}
EXPORT_SYMBOL(__napi_alloc_skb);
@@ -611,10 +561,12 @@ static void skb_clone_fraglist(struct sk_buff *skb)
static void skb_free_head(struct sk_buff *skb)
{
+ unsigned char *head = skb->head;
+
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ skb_free_frag(head);
else
- kfree(skb->head);
+ kfree(head);
}
static void skb_release_data(struct sk_buff *skb)
@@ -1918,15 +1870,39 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
return false;
}
+ssize_t skb_socket_splice(struct sock *sk,
+ struct pipe_inode_info *pipe,
+ struct splice_pipe_desc *spd)
+{
+ int ret;
+
+ /* Drop the socket lock, otherwise we have reverse
+ * locking dependencies between sk_lock and i_mutex
+ * here as compared to sendfile(). We enter here
+ * with the socket lock held, and splice_to_pipe() will
+ * grab the pipe inode lock. For sendfile() emulation,
+ * we call into ->sendpage() with the i_mutex lock held
+ * and networking will grab the socket lock.
+ */
+ release_sock(sk);
+ ret = splice_to_pipe(pipe, spd);
+ lock_sock(sk);
+
+ return ret;
+}
+
/*
* Map data from the skb to a pipe. Should handle both the linear part,
* the fragments, and the frag list. It does NOT handle frag lists within
* the frag list, if such a thing exists. We'd probably need to recurse to
* handle that cleanly.
*/
-int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
+int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int tlen,
- unsigned int flags)
+ unsigned int flags,
+ ssize_t (*splice_cb)(struct sock *,
+ struct pipe_inode_info *,
+ struct splice_pipe_desc *))
{
struct partial_page partial[MAX_SKB_FRAGS];
struct page *pages[MAX_SKB_FRAGS];
@@ -1939,7 +1915,6 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
.spd_release = sock_spd_release,
};
struct sk_buff *frag_iter;
- struct sock *sk = skb->sk;
int ret = 0;
/*
@@ -1962,23 +1937,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
}
done:
- if (spd.nr_pages) {
- /*
- * Drop the socket lock, otherwise we have reverse
- * locking dependencies between sk_lock and i_mutex
- * here as compared to sendfile(). We enter here
- * with the socket lock held, and splice_to_pipe() will
- * grab the pipe inode lock. For sendfile() emulation,
- * we call into ->sendpage() with the i_mutex lock held
- * and networking will grab the socket lock.
- */
- release_sock(sk);
- ret = splice_to_pipe(pipe, &spd);
- lock_sock(sk);
- }
+ if (spd.nr_pages)
+ ret = splice_cb(sk, pipe, &spd);
return ret;
}
+EXPORT_SYMBOL_GPL(skb_splice_bits);
/**
* skb_store_bits - store bits from kernel buffer to skb
@@ -2963,6 +2927,24 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
}
EXPORT_SYMBOL(skb_append_datato_frags);
+int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
+ int offset, size_t size)
+{
+ int i = skb_shinfo(skb)->nr_frags;
+
+ if (skb_can_coalesce(skb, i, page, offset)) {
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
+ } else if (i < MAX_SKB_FRAGS) {
+ get_page(page);
+ skb_fill_page_desc(skb, i, page, offset, size);
+ } else {
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(skb_append_pagefrags);
+
/**
* skb_pull_rcsum - pull skb and update receive checksum
* @skb: buffer to update
@@ -4030,6 +4012,92 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
}
EXPORT_SYMBOL(skb_checksum_setup);
+/**
+ * skb_checksum_maybe_trim - maybe trims the given skb
+ * @skb: the skb to check
+ * @transport_len: the data length beyond the network header
+ *
+ * Checks whether the given skb has data beyond the given transport length.
+ * If so, returns a cloned skb trimmed to this transport length.
+ * Otherwise returns the provided skb. Returns NULL in error cases
+ * (e.g. transport_len exceeds skb length or out-of-memory).
+ *
+ * Caller needs to set the skb transport header and free any returned skb if it
+ * differs from the provided skb.
+ */
+static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
+ unsigned int transport_len)
+{
+ struct sk_buff *skb_chk;
+ unsigned int len = skb_transport_offset(skb) + transport_len;
+ int ret;
+
+ if (skb->len < len)
+ return NULL;
+ else if (skb->len == len)
+ return skb;
+
+ skb_chk = skb_clone(skb, GFP_ATOMIC);
+ if (!skb_chk)
+ return NULL;
+
+ ret = pskb_trim_rcsum(skb_chk, len);
+ if (ret) {
+ kfree_skb(skb_chk);
+ return NULL;
+ }
+
+ return skb_chk;
+}
+
+/**
+ * skb_checksum_trimmed - validate checksum of an skb
+ * @skb: the skb to check
+ * @transport_len: the data length beyond the network header
+ * @skb_chkf: checksum function to use
+ *
+ * Applies the given checksum function skb_chkf to the provided skb.
+ * Returns a checked and maybe trimmed skb. Returns NULL on error.
+ *
+ * If the skb has data beyond the given transport length, then a
+ * trimmed & cloned skb is checked and returned.
+ *
+ * Caller needs to set the skb transport header and free any returned skb if it
+ * differs from the provided skb.
+ */
+struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
+ unsigned int transport_len,
+ __sum16(*skb_chkf)(struct sk_buff *skb))
+{
+ struct sk_buff *skb_chk;
+ unsigned int offset = skb_transport_offset(skb);
+ __sum16 ret;
+
+ skb_chk = skb_checksum_maybe_trim(skb, transport_len);
+ if (!skb_chk)
+ goto err;
+
+ if (!pskb_may_pull(skb_chk, offset))
+ goto err;
+
+ __skb_pull(skb_chk, offset);
+ ret = skb_chkf(skb_chk);
+ __skb_push(skb_chk, offset);
+
+ if (ret)
+ goto err;
+
+ return skb_chk;
+
+err:
+ if (skb_chk && skb_chk != skb)
+ kfree_skb(skb_chk);
+
+ return NULL;
+
+}
+EXPORT_SYMBOL(skb_checksum_trimmed);
+
void __skb_warn_lro_forwarding(const struct sk_buff *skb)
{
net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
diff --git a/net/core/sock.c b/net/core/sock.c
index dc30dc5bb..193901d09 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -131,6 +131,7 @@
#include <linux/ipsec.h>
#include <net/cls_cgroup.h>
#include <net/netprio_cgroup.h>
+#include <linux/sock_diag.h>
#include <linux/filter.h>
@@ -1393,9 +1394,10 @@ EXPORT_SYMBOL_GPL(sock_update_netprioidx);
* @family: protocol family
* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
* @prot: struct proto associated with this new sock instance
+ * @kern: is this to be a kernel socket?
*/
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
- struct proto *prot)
+ struct proto *prot, int kern)
{
struct sock *sk;
@@ -1408,7 +1410,10 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
*/
sk->sk_prot = sk->sk_prot_creator = prot;
sock_lock_init(sk);
- sock_net_set(sk, get_net(net));
+ sk->sk_net_refcnt = kern ? 0 : 1;
+ if (likely(sk->sk_net_refcnt))
+ get_net(net);
+ sock_net_set(sk, net);
atomic_set(&sk->sk_wmem_alloc, 1);
sock_update_classid(sk);
@@ -1419,7 +1424,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
}
EXPORT_SYMBOL(sk_alloc);
-static void __sk_free(struct sock *sk)
+void sk_destruct(struct sock *sk)
{
struct sk_filter *filter;
@@ -1442,10 +1447,19 @@ static void __sk_free(struct sock *sk)
if (sk->sk_peer_cred)
put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
- put_net(sock_net(sk));
+ if (likely(sk->sk_net_refcnt))
+ put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
}
+static void __sk_free(struct sock *sk)
+{
+ if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
+ sock_diag_broadcast_destroy(sk);
+ else
+ sk_destruct(sk);
+}
+
void sk_free(struct sock *sk)
{
/*
@@ -1458,25 +1472,6 @@ void sk_free(struct sock *sk)
}
EXPORT_SYMBOL(sk_free);
-/*
- * Last sock_put should drop reference to sk->sk_net. It has already
- * been dropped in sk_change_net. Taking reference to stopping namespace
- * is not an option.
- * Take reference to a socket to remove it from hash _alive_ and after that
- * destroy it in the context of init_net.
- */
-void sk_release_kernel(struct sock *sk)
-{
- if (sk == NULL || sk->sk_socket == NULL)
- return;
-
- sock_hold(sk);
- sock_release(sk->sk_socket);
- sock_net_set(sk, get_net(&init_net));
- sock_put(sk);
-}
-EXPORT_SYMBOL(sk_release_kernel);
-
static void sk_update_clone(const struct sock *sk, struct sock *newsk)
{
if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
@@ -1502,7 +1497,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
sock_copy(newsk, sk);
/* SANITY */
- get_net(sock_net(newsk));
+ if (likely(newsk->sk_net_refcnt))
+ get_net(sock_net(newsk));
sk_node_init(&newsk->sk_node);
sock_lock_init(newsk);
bh_lock_sock(newsk);
@@ -1592,6 +1588,8 @@ EXPORT_SYMBOL_GPL(sk_clone_lock);
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
+ u32 max_segs = 1;
+
__sk_dst_set(sk, dst);
sk->sk_route_caps = dst->dev->features;
if (sk->sk_route_caps & NETIF_F_GSO)
@@ -1603,9 +1601,10 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
sk->sk_gso_max_size = dst->dev->gso_max_size;
- sk->sk_gso_max_segs = dst->dev->gso_max_segs;
+ max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
}
}
+ sk->sk_gso_max_segs = max_segs;
}
EXPORT_SYMBOL_GPL(sk_setup_caps);
@@ -1969,20 +1968,21 @@ static void __release_sock(struct sock *sk)
* sk_wait_data - wait for data to arrive at sk_receive_queue
* @sk: sock to wait on
* @timeo: for how long
+ * @skb: last skb seen on sk_receive_queue
*
* Now socket state including sk->sk_err is changed only under lock,
* hence we may omit checks after joining wait queue.
* We check receive queue before schedule() only as optimization;
* it is very likely that release_sock() added new data.
*/
-int sk_wait_data(struct sock *sk, long *timeo)
+int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
{
int rc;
DEFINE_WAIT(wait);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
- rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
+ rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
finish_wait(sk_sleep(sk), &wait);
return rc;
@@ -2080,12 +2080,13 @@ EXPORT_SYMBOL(__sk_mem_schedule);
/**
* __sk_reclaim - reclaim memory_allocated
* @sk: socket
+ * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
*/
-void __sk_mem_reclaim(struct sock *sk)
+void __sk_mem_reclaim(struct sock *sk, int amount)
{
- sk_memory_allocated_sub(sk,
- sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
- sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
+ amount >>= SK_MEM_QUANTUM_SHIFT;
+ sk_memory_allocated_sub(sk, amount);
+ sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
if (sk_under_memory_pressure(sk) &&
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
@@ -2270,7 +2271,6 @@ static void sock_def_write_space(struct sock *sk)
static void sock_def_destruct(struct sock *sk)
{
- kfree(sk->sk_protinfo);
}
void sk_send_sigurg(struct sock *sk)
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 74dddf84a..d79866c5f 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -5,6 +5,9 @@
#include <net/net_namespace.h>
#include <linux/module.h>
#include <net/sock.h>
+#include <linux/kernel.h>
+#include <linux/tcp.h>
+#include <linux/workqueue.h>
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
@@ -12,6 +15,7 @@
static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
+static struct workqueue_struct *broadcast_wq;
static u64 sock_gen_cookie(struct sock *sk)
{
@@ -101,6 +105,62 @@ out:
}
EXPORT_SYMBOL(sock_diag_put_filterinfo);
+struct broadcast_sk {
+ struct sock *sk;
+ struct work_struct work;
+};
+
+static size_t sock_diag_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
+ + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
+ + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
+}
+
+static void sock_diag_broadcast_destroy_work(struct work_struct *work)
+{
+ struct broadcast_sk *bsk =
+ container_of(work, struct broadcast_sk, work);
+ struct sock *sk = bsk->sk;
+ const struct sock_diag_handler *hndl;
+ struct sk_buff *skb;
+ const enum sknetlink_groups group = sock_diag_destroy_group(sk);
+ int err = -1;
+
+ WARN_ON(group == SKNLGRP_NONE);
+
+ skb = nlmsg_new(sock_diag_nlmsg_size(), GFP_KERNEL);
+ if (!skb)
+ goto out;
+
+ mutex_lock(&sock_diag_table_mutex);
+ hndl = sock_diag_handlers[sk->sk_family];
+ if (hndl && hndl->get_info)
+ err = hndl->get_info(skb, sk);
+ mutex_unlock(&sock_diag_table_mutex);
+
+ if (!err)
+ nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
+ GFP_KERNEL);
+ else
+ kfree_skb(skb);
+out:
+ sk_destruct(sk);
+ kfree(bsk);
+}
+
+void sock_diag_broadcast_destroy(struct sock *sk)
+{
+ /* Note, this function is often called from an interrupt context. */
+ struct broadcast_sk *bsk =
+ kmalloc(sizeof(struct broadcast_sk), GFP_ATOMIC);
+ if (!bsk)
+ return sk_destruct(sk);
+ bsk->sk = sk;
+ INIT_WORK(&bsk->work, sock_diag_broadcast_destroy_work);
+ queue_work(broadcast_wq, &bsk->work);
+}
+
void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
{
mutex_lock(&sock_diag_table_mutex);
@@ -211,10 +271,32 @@ static void sock_diag_rcv(struct sk_buff *skb)
mutex_unlock(&sock_diag_mutex);
}
+static int sock_diag_bind(struct net *net, int group)
+{
+ switch (group) {
+ case SKNLGRP_INET_TCP_DESTROY:
+ case SKNLGRP_INET_UDP_DESTROY:
+ if (!sock_diag_handlers[AF_INET])
+ request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, AF_INET);
+ break;
+ case SKNLGRP_INET6_TCP_DESTROY:
+ case SKNLGRP_INET6_UDP_DESTROY:
+ if (!sock_diag_handlers[AF_INET6])
+ request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, AF_INET);
+ break;
+ }
+ return 0;
+}
+
static int __net_init diag_net_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
+ .groups = SKNLGRP_MAX,
.input = sock_diag_rcv,
+ .bind = sock_diag_bind,
+ .flags = NL_CFG_F_NONROOT_RECV,
};
net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg);
@@ -234,12 +316,15 @@ static struct pernet_operations diag_net_ops = {
static int __init sock_diag_init(void)
{
+ broadcast_wq = alloc_workqueue("sock_diag_events", 0, 0);
+ BUG_ON(!broadcast_wq);
return register_pernet_subsys(&diag_net_ops);
}
static void __exit sock_diag_exit(void)
{
unregister_pernet_subsys(&diag_net_ops);
+ destroy_workqueue(broadcast_wq);
}
module_init(sock_diag_init);
diff --git a/net/core/stream.c b/net/core/stream.c
index 301c05f26..d70f77a0c 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -119,6 +119,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
int err = 0;
long vm_wait = 0;
long current_timeo = *timeo_p;
+ bool noblock = (*timeo_p ? false : true);
DEFINE_WAIT(wait);
if (sk_stream_memory_free(sk))
@@ -131,8 +132,11 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
- if (!*timeo_p)
+ if (!*timeo_p) {
+ if (noblock)
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
goto do_nonblock;
+ }
if (signal_pending(current))
goto do_interrupted;
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
diff --git a/net/core/utils.c b/net/core/utils.c
index 7b803884c..a7732a068 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -304,13 +304,15 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
__be32 from, __be32 to, int pseudohdr)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
- *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), from),
- to));
+ csum_replace4(sum, from, to);
if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
- skb->csum = ~csum_add(csum_sub(~(skb->csum), from), to);
+ skb->csum = ~csum_add(csum_sub(~(skb->csum),
+ (__force __wsum)from),
+ (__force __wsum)to);
} else if (pseudohdr)
- *sum = ~csum_fold(csum_add(csum_sub(csum_unfold(*sum), from),
- to));
+ *sum = ~csum_fold(csum_add(csum_sub(csum_unfold(*sum),
+ (__force __wsum)from),
+ (__force __wsum)to));
}
EXPORT_SYMBOL(inet_proto_csum_replace4);
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 5a45f8de5..2d84303ea 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -66,6 +66,7 @@ static const struct inet_diag_handler dccp_diag_handler = {
.dump_one = dccp_diag_dump_one,
.idiag_get_info = dccp_diag_get_info,
.idiag_type = IPPROTO_DCCP,
+ .idiag_info_size = sizeof(struct tcp_info),
};
static int __init dccp_diag_init(void)
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 52a940165..b5cf13a28 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -886,7 +886,7 @@ verify_sock_status:
break;
}
- sk_wait_data(sk, &timeo);
+ sk_wait_data(sk, &timeo, NULL);
continue;
found_ok_skb:
if (len > skb->len)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 754484b3c..675cf94e0 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -468,10 +468,10 @@ static struct proto dn_proto = {
.obj_size = sizeof(struct dn_sock),
};
-static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp)
+static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern)
{
struct dn_scp *scp;
- struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto);
+ struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern);
if (!sk)
goto out;
@@ -693,7 +693,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol,
}
- if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL)
+ if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL)
return -ENOBUFS;
sk->sk_protocol = protocol;
@@ -1096,7 +1096,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
cb = DN_SKB_CB(skb);
sk->sk_ack_backlog--;
- newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation);
+ newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0);
if (newsk == NULL) {
release_sock(sk);
kfree_skb(skb);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 392e29a02..b445d492c 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -630,7 +630,7 @@ static int dsa_of_probe(struct device *dev)
continue;
cd->sw_addr = be32_to_cpup(sw_addr);
- if (cd->sw_addr > PHY_MAX_ADDR)
+ if (cd->sw_addr >= PHY_MAX_ADDR)
continue;
if (!of_property_read_u32(child, "eeprom-length", &eeprom_len))
@@ -642,6 +642,8 @@ static int dsa_of_probe(struct device *dev)
continue;
port_index = be32_to_cpup(port_reg);
+ if (port_index >= DSA_MAX_PORTS)
+ break;
port_name = of_get_property(port, "label", NULL);
if (!port_name)
@@ -666,8 +668,6 @@ static int dsa_of_probe(struct device *dev)
goto out_free_chip;
}
- if (port_index == DSA_MAX_PORTS)
- break;
}
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 827cda560..35c47ddd0 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -112,7 +112,7 @@ static int dsa_slave_open(struct net_device *dev)
clear_promisc:
if (dev->flags & IFF_PROMISC)
- dev_set_promiscuity(master, 0);
+ dev_set_promiscuity(master, -1);
clear_allmulti:
if (dev->flags & IFF_ALLMULTI)
dev_set_allmulti(master, -1);
@@ -345,6 +345,24 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state)
return ret;
}
+static int dsa_slave_port_attr_set(struct net_device *dev,
+ struct switchdev_attr *attr)
+{
+ int ret = 0;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_PORT_STP_STATE:
+ if (attr->trans == SWITCHDEV_TRANS_COMMIT)
+ ret = dsa_slave_stp_update(dev, attr->u.stp_state);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
static int dsa_slave_bridge_port_join(struct net_device *dev,
struct net_device *br)
{
@@ -382,14 +400,20 @@ static int dsa_slave_bridge_port_leave(struct net_device *dev)
return ret;
}
-static int dsa_slave_parent_id_get(struct net_device *dev,
- struct netdev_phys_item_id *psid)
+static int dsa_slave_port_attr_get(struct net_device *dev,
+ struct switchdev_attr *attr)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- psid->id_len = sizeof(ds->index);
- memcpy(&psid->id, &ds->index, psid->id_len);
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_PORT_PARENT_ID:
+ attr->u.ppid.id_len = sizeof(ds->index);
+ memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
return 0;
}
@@ -675,9 +699,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_get_iflink = dsa_slave_get_iflink,
};
-static const struct swdev_ops dsa_slave_swdev_ops = {
- .swdev_parent_id_get = dsa_slave_parent_id_get,
- .swdev_port_stp_update = dsa_slave_stp_update,
+static const struct switchdev_ops dsa_slave_switchdev_ops = {
+ .switchdev_port_attr_get = dsa_slave_port_attr_get,
+ .switchdev_port_attr_set = dsa_slave_port_attr_set,
};
static void dsa_slave_adjust_link(struct net_device *dev)
@@ -732,7 +756,8 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
return -ENODEV;
/* Use already configured phy mode */
- p->phy_interface = p->phy->interface;
+ if (p->phy_interface == PHY_INTERFACE_MODE_NA)
+ p->phy_interface = p->phy->interface;
phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
p->phy_interface);
@@ -810,12 +835,19 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
return 0;
}
+static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
+static void dsa_slave_set_lockdep_class_one(struct net_device *dev,
+ struct netdev_queue *txq,
+ void *_unused)
+{
+ lockdep_set_class(&txq->_xmit_lock,
+ &dsa_slave_netdev_xmit_lock_key);
+}
+
int dsa_slave_suspend(struct net_device *slave_dev)
{
struct dsa_slave_priv *p = netdev_priv(slave_dev);
- netif_device_detach(slave_dev);
-
if (p->phy) {
phy_stop(p->phy);
p->old_pause = -1;
@@ -859,7 +891,10 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
eth_hw_addr_inherit(slave_dev, master);
slave_dev->tx_queue_len = 0;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
- slave_dev->swdev_ops = &dsa_slave_swdev_ops;
+ slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
+
+ netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
+ NULL);
SET_NETDEV_DEV(slave_dev, parent);
slave_dev->dev.of_node = ds->pd->port_dn[port];
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index d8aa0a221..5ee0be64b 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -58,6 +58,7 @@
#include <net/ipv6.h>
#include <net/ip.h>
#include <net/dsa.h>
+#include <net/flow_dissector.h>
#include <linux/uaccess.h>
__setup("ether=", netdev_boot_setup);
@@ -130,9 +131,9 @@ u32 eth_get_headlen(void *data, unsigned int len)
return len;
/* parse any remaining L2/L3 headers, check for L4 */
- if (!__skb_flow_dissect(NULL, &keys, data,
- eth->h_proto, sizeof(*eth), len))
- return max_t(u32, keys.thoff, sizeof(*eth));
+ if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
+ sizeof(*eth), len))
+ return max_t(u32, keys.control.thoff, sizeof(*eth));
/* parse for any L4 headers */
return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len);
@@ -156,10 +157,11 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
skb->dev = dev;
skb_reset_mac_header(skb);
+
+ eth = (struct ethhdr *)skb->data;
skb_pull_inline(skb, ETH_HLEN);
- eth = eth_hdr(skb);
- if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
+ if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
skb->pkt_type = PACKET_BROADCAST;
else
@@ -178,7 +180,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
if (unlikely(netdev_uses_dsa(dev)))
return htons(ETH_P_XDSA);
- if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN))
+ if (likely(eth_proto_is_802_3(eth->h_proto)))
return eth->h_proto;
/*
@@ -468,6 +470,7 @@ EXPORT_SYMBOL(eth_gro_complete);
static struct packet_offload eth_packet_offload __read_mostly = {
.type = cpu_to_be16(ETH_P_TEB),
+ .priority = 10,
.callbacks = {
.gro_receive = eth_gro_receive,
.gro_complete = eth_gro_complete,
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 0ae5822ef..f20a387a1 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -55,27 +55,6 @@
LIST_HEAD(lowpan_devices);
static int lowpan_open_count;
-static __le16 lowpan_get_pan_id(const struct net_device *dev)
-{
- struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
-
- return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev);
-}
-
-static __le16 lowpan_get_short_addr(const struct net_device *dev)
-{
- struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
-
- return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
-}
-
-static u8 lowpan_get_dsn(const struct net_device *dev)
-{
- struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
-
- return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev);
-}
-
static struct header_ops lowpan_header_ops = {
.create = lowpan_header_create,
};
@@ -103,12 +82,6 @@ static const struct net_device_ops lowpan_netdev_ops = {
.ndo_start_xmit = lowpan_xmit,
};
-static struct ieee802154_mlme_ops lowpan_mlme = {
- .get_pan_id = lowpan_get_pan_id,
- .get_short_addr = lowpan_get_short_addr,
- .get_dsn = lowpan_get_dsn,
-};
-
static void lowpan_setup(struct net_device *dev)
{
dev->addr_len = IEEE802154_ADDR_LEN;
@@ -124,7 +97,6 @@ static void lowpan_setup(struct net_device *dev)
dev->netdev_ops = &lowpan_netdev_ops;
dev->header_ops = &lowpan_header_ops;
- dev->ml_priv = &lowpan_mlme;
dev->destructor = free_netdev;
dev->features |= NETIF_F_NETNS_LOCAL;
}
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index f46e4d130..214d44aef 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -207,7 +207,7 @@ found:
} else {
fq->q.meat += skb->len;
}
- add_frag_mem_limit(&fq->q, skb->truesize);
+ add_frag_mem_limit(fq->q.net, skb->truesize);
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
@@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
clone->data_len = clone->len;
head->data_len -= clone->len;
head->len -= clone->len;
- add_frag_mem_limit(&fq->q, clone->truesize);
+ add_frag_mem_limit(fq->q.net, clone->truesize);
}
WARN_ON(head == NULL);
@@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
}
fp = next;
}
- sub_frag_mem_limit(&fq->q, sum_truesize);
+ sub_frag_mem_limit(fq->q.net, sum_truesize);
head->next = NULL;
head->dev = dev;
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index 2349070bd..2597abbf7 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -190,6 +190,7 @@ err:
static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
{
+ struct wpan_dev *wpan_dev = lowpan_dev_info(dev)->real_dev->ieee802154_ptr;
struct ieee802154_addr sa, da;
struct ieee802154_mac_cb *cb = mac_cb_init(skb);
struct lowpan_addr_info info;
@@ -207,7 +208,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
/* prepare wpan address data */
sa.mode = IEEE802154_ADDR_LONG;
- sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+ sa.pan_id = wpan_dev->pan_id;
sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
/* intra-PAN communications */
@@ -223,7 +224,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
} else {
da.mode = IEEE802154_ADDR_LONG;
da.extended_addr = ieee802154_devaddr_from_raw(daddr);
- cb->ackreq = true;
+ cb->ackreq = wpan_dev->frame_retries >= 0;
}
return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index 2ee00e8a0..b0248e934 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -121,8 +121,6 @@ wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size)
/* atomic_inc_return makes it start at 1, make it start at 0 */
rdev->wpan_phy_idx--;
- mutex_init(&rdev->wpan_phy.pib_lock);
-
INIT_LIST_HEAD(&rdev->wpan_dev_list);
device_initialize(&rdev->wpan_phy.dev);
dev_set_name(&rdev->wpan_phy.dev, PHY_NAME "%d", rdev->wpan_phy_idx);
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 2b4955d7a..3503c3895 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -97,8 +97,10 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
BUG_ON(!phy);
get_device(&phy->dev);
- short_addr = ops->get_short_addr(dev);
- pan_id = ops->get_pan_id(dev);
+ rtnl_lock();
+ short_addr = dev->ieee802154_ptr->short_addr;
+ pan_id = dev->ieee802154_ptr->pan_id;
+ rtnl_unlock();
if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) ||
@@ -117,12 +119,12 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
rtnl_unlock();
if (nla_put_s8(msg, IEEE802154_ATTR_TXPOWER,
- params.transmit_power) ||
+ params.transmit_power / 100) ||
nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) ||
nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE,
params.cca.mode) ||
nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL,
- params.cca_ed_level) ||
+ params.cca_ed_level / 100) ||
nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES,
params.csma_retries) ||
nla_put_u8(msg, IEEE802154_ATTR_CSMA_MIN_BE,
@@ -166,10 +168,7 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info)
if (!dev)
return NULL;
- /* Check on mtu is currently a hacked solution because lowpan
- * and wpan have the same ARPHRD type.
- */
- if (dev->type != ARPHRD_IEEE802154 || dev->mtu != IEEE802154_MTU) {
+ if (dev->type != ARPHRD_IEEE802154) {
dev_put(dev);
return NULL;
}
@@ -244,7 +243,9 @@ int ieee802154_associate_resp(struct sk_buff *skb, struct genl_info *info)
addr.mode = IEEE802154_ADDR_LONG;
addr.extended_addr = nla_get_hwaddr(
info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]);
- addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+ rtnl_lock();
+ addr.pan_id = dev->ieee802154_ptr->pan_id;
+ rtnl_unlock();
ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr,
nla_get_shortaddr(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
@@ -281,7 +282,9 @@ int ieee802154_disassociate_req(struct sk_buff *skb, struct genl_info *info)
addr.short_addr = nla_get_shortaddr(
info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]);
}
- addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+ rtnl_lock();
+ addr.pan_id = dev->ieee802154_ptr->pan_id;
+ rtnl_unlock();
ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr,
nla_get_u8(info->attrs[IEEE802154_ATTR_REASON]));
@@ -449,11 +452,7 @@ int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
for_each_netdev(net, dev) {
- /* Check on mtu is currently a hacked solution because lowpan
- * and wpan have the same ARPHRD type.
- */
- if (idx < s_idx || dev->type != ARPHRD_IEEE802154 ||
- dev->mtu != IEEE802154_MTU)
+ if (idx < s_idx || dev->type != ARPHRD_IEEE802154)
goto cont;
if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid,
@@ -510,7 +509,7 @@ int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info)
ops->get_mac_params(dev, &params);
if (info->attrs[IEEE802154_ATTR_TXPOWER])
- params.transmit_power = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]);
+ params.transmit_power = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]) * 100;
if (info->attrs[IEEE802154_ATTR_LBT_ENABLED])
params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]);
@@ -519,7 +518,7 @@ int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info)
params.cca.mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]);
if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL])
- params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]);
+ params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) * 100;
if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES])
params.csma_retries = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_RETRIES]);
@@ -783,11 +782,7 @@ ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb,
int rc;
for_each_netdev(net, dev) {
- /* Check on mtu is currently a hacked solution because lowpan
- * and wpan have the same ARPHRD type.
- */
- if (idx < first_dev || dev->type != ARPHRD_IEEE802154 ||
- dev->mtu != IEEE802154_MTU)
+ if (idx < first_dev || dev->type != ARPHRD_IEEE802154)
goto skip;
data.ops = ieee802154_mlme_ops(dev);
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 346c6665d..77d73014b 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -50,26 +50,26 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
if (!hdr)
goto out;
- mutex_lock(&phy->pib_lock);
+ rtnl_lock();
if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) ||
nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) ||
nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel))
goto nla_put_failure;
for (i = 0; i < 32; i++) {
- if (phy->channels_supported[i])
- buf[pages++] = phy->channels_supported[i] | (i << 27);
+ if (phy->supported.channels[i])
+ buf[pages++] = phy->supported.channels[i] | (i << 27);
}
if (pages &&
nla_put(msg, IEEE802154_ATTR_CHANNEL_PAGE_LIST,
pages * sizeof(uint32_t), buf))
goto nla_put_failure;
- mutex_unlock(&phy->pib_lock);
+ rtnl_unlock();
kfree(buf);
genlmsg_end(msg, hdr);
return 0;
nla_put_failure:
- mutex_unlock(&phy->pib_lock);
+ rtnl_unlock();
genlmsg_cancel(msg, hdr);
out:
kfree(buf);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index f3c12f6a4..68f240168 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -207,10 +207,11 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
[NL802154_ATTR_PAGE] = { .type = NLA_U8, },
[NL802154_ATTR_CHANNEL] = { .type = NLA_U8, },
- [NL802154_ATTR_TX_POWER] = { .type = NLA_S8, },
+ [NL802154_ATTR_TX_POWER] = { .type = NLA_S32, },
[NL802154_ATTR_CCA_MODE] = { .type = NLA_U32, },
[NL802154_ATTR_CCA_OPT] = { .type = NLA_U32, },
+ [NL802154_ATTR_CCA_ED_LEVEL] = { .type = NLA_S32, },
[NL802154_ATTR_SUPPORTED_CHANNEL] = { .type = NLA_U32, },
@@ -225,6 +226,10 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
[NL802154_ATTR_MAX_FRAME_RETRIES] = { .type = NLA_S8, },
[NL802154_ATTR_LBT_MODE] = { .type = NLA_U8, },
+
+ [NL802154_ATTR_WPAN_PHY_CAPS] = { .type = NLA_NESTED },
+
+ [NL802154_ATTR_SUPPORTED_COMMANDS] = { .type = NLA_NESTED },
};
/* message building helper */
@@ -236,6 +241,28 @@ static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
}
static int
+nl802154_put_flags(struct sk_buff *msg, int attr, u32 mask)
+{
+ struct nlattr *nl_flags = nla_nest_start(msg, attr);
+ int i;
+
+ if (!nl_flags)
+ return -ENOBUFS;
+
+ i = 0;
+ while (mask) {
+ if ((mask & 1) && nla_put_flag(msg, i))
+ return -ENOBUFS;
+
+ mask >>= 1;
+ i++;
+ }
+
+ nla_nest_end(msg, nl_flags);
+ return 0;
+}
+
+static int
nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev,
struct sk_buff *msg)
{
@@ -248,7 +275,7 @@ nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev,
for (page = 0; page <= IEEE802154_MAX_PAGE; page++) {
if (nla_put_u32(msg, NL802154_ATTR_SUPPORTED_CHANNEL,
- rdev->wpan_phy.channels_supported[page]))
+ rdev->wpan_phy.supported.channels[page]))
return -ENOBUFS;
}
nla_nest_end(msg, nl_page);
@@ -256,12 +283,100 @@ nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev,
return 0;
}
+static int
+nl802154_put_capabilities(struct sk_buff *msg,
+ struct cfg802154_registered_device *rdev)
+{
+ const struct wpan_phy_supported *caps = &rdev->wpan_phy.supported;
+ struct nlattr *nl_caps, *nl_channels;
+ int i;
+
+ nl_caps = nla_nest_start(msg, NL802154_ATTR_WPAN_PHY_CAPS);
+ if (!nl_caps)
+ return -ENOBUFS;
+
+ nl_channels = nla_nest_start(msg, NL802154_CAP_ATTR_CHANNELS);
+ if (!nl_channels)
+ return -ENOBUFS;
+
+ for (i = 0; i <= IEEE802154_MAX_PAGE; i++) {
+ if (caps->channels[i]) {
+ if (nl802154_put_flags(msg, i, caps->channels[i]))
+ return -ENOBUFS;
+ }
+ }
+
+ nla_nest_end(msg, nl_channels);
+
+ if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) {
+ struct nlattr *nl_ed_lvls;
+
+ nl_ed_lvls = nla_nest_start(msg,
+ NL802154_CAP_ATTR_CCA_ED_LEVELS);
+ if (!nl_ed_lvls)
+ return -ENOBUFS;
+
+ for (i = 0; i < caps->cca_ed_levels_size; i++) {
+ if (nla_put_s32(msg, i, caps->cca_ed_levels[i]))
+ return -ENOBUFS;
+ }
+
+ nla_nest_end(msg, nl_ed_lvls);
+ }
+
+ if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) {
+ struct nlattr *nl_tx_pwrs;
+
+ nl_tx_pwrs = nla_nest_start(msg, NL802154_CAP_ATTR_TX_POWERS);
+ if (!nl_tx_pwrs)
+ return -ENOBUFS;
+
+ for (i = 0; i < caps->tx_powers_size; i++) {
+ if (nla_put_s32(msg, i, caps->tx_powers[i]))
+ return -ENOBUFS;
+ }
+
+ nla_nest_end(msg, nl_tx_pwrs);
+ }
+
+ if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE) {
+ if (nl802154_put_flags(msg, NL802154_CAP_ATTR_CCA_MODES,
+ caps->cca_modes) ||
+ nl802154_put_flags(msg, NL802154_CAP_ATTR_CCA_OPTS,
+ caps->cca_opts))
+ return -ENOBUFS;
+ }
+
+ if (nla_put_u8(msg, NL802154_CAP_ATTR_MIN_MINBE, caps->min_minbe) ||
+ nla_put_u8(msg, NL802154_CAP_ATTR_MAX_MINBE, caps->max_minbe) ||
+ nla_put_u8(msg, NL802154_CAP_ATTR_MIN_MAXBE, caps->min_maxbe) ||
+ nla_put_u8(msg, NL802154_CAP_ATTR_MAX_MAXBE, caps->max_maxbe) ||
+ nla_put_u8(msg, NL802154_CAP_ATTR_MIN_CSMA_BACKOFFS,
+ caps->min_csma_backoffs) ||
+ nla_put_u8(msg, NL802154_CAP_ATTR_MAX_CSMA_BACKOFFS,
+ caps->max_csma_backoffs) ||
+ nla_put_s8(msg, NL802154_CAP_ATTR_MIN_FRAME_RETRIES,
+ caps->min_frame_retries) ||
+ nla_put_s8(msg, NL802154_CAP_ATTR_MAX_FRAME_RETRIES,
+ caps->max_frame_retries) ||
+ nl802154_put_flags(msg, NL802154_CAP_ATTR_IFTYPES,
+ caps->iftypes) ||
+ nla_put_u32(msg, NL802154_CAP_ATTR_LBT, caps->lbt))
+ return -ENOBUFS;
+
+ nla_nest_end(msg, nl_caps);
+
+ return 0;
+}
+
static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev,
enum nl802154_commands cmd,
struct sk_buff *msg, u32 portid, u32 seq,
int flags)
{
+ struct nlattr *nl_cmds;
void *hdr;
+ int i;
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
@@ -286,25 +401,76 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev,
rdev->wpan_phy.current_channel))
goto nla_put_failure;
- /* supported channels array */
+ /* TODO remove this behaviour, we still keep support it for a while
+ * so users can change the behaviour to the new one.
+ */
if (nl802154_send_wpan_phy_channels(rdev, msg))
goto nla_put_failure;
/* cca mode */
- if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE,
- rdev->wpan_phy.cca.mode))
- goto nla_put_failure;
+ if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE) {
+ if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE,
+ rdev->wpan_phy.cca.mode))
+ goto nla_put_failure;
+
+ if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) {
+ if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT,
+ rdev->wpan_phy.cca.opt))
+ goto nla_put_failure;
+ }
+ }
+
+ if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) {
+ if (nla_put_s32(msg, NL802154_ATTR_TX_POWER,
+ rdev->wpan_phy.transmit_power))
+ goto nla_put_failure;
+ }
- if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) {
- if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT,
- rdev->wpan_phy.cca.opt))
+ if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) {
+ if (nla_put_s32(msg, NL802154_ATTR_CCA_ED_LEVEL,
+ rdev->wpan_phy.cca_ed_level))
goto nla_put_failure;
}
- if (nla_put_s8(msg, NL802154_ATTR_TX_POWER,
- rdev->wpan_phy.transmit_power))
+ if (nl802154_put_capabilities(msg, rdev))
+ goto nla_put_failure;
+
+ nl_cmds = nla_nest_start(msg, NL802154_ATTR_SUPPORTED_COMMANDS);
+ if (!nl_cmds)
goto nla_put_failure;
+ i = 0;
+#define CMD(op, n) \
+ do { \
+ if (rdev->ops->op) { \
+ i++; \
+ if (nla_put_u32(msg, i, NL802154_CMD_ ## n)) \
+ goto nla_put_failure; \
+ } \
+ } while (0)
+
+ CMD(add_virtual_intf, NEW_INTERFACE);
+ CMD(del_virtual_intf, DEL_INTERFACE);
+ CMD(set_channel, SET_CHANNEL);
+ CMD(set_pan_id, SET_PAN_ID);
+ CMD(set_short_addr, SET_SHORT_ADDR);
+ CMD(set_backoff_exponent, SET_BACKOFF_EXPONENT);
+ CMD(set_max_csma_backoffs, SET_MAX_CSMA_BACKOFFS);
+ CMD(set_max_frame_retries, SET_MAX_FRAME_RETRIES);
+ CMD(set_lbt_mode, SET_LBT_MODE);
+
+ if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER)
+ CMD(set_tx_power, SET_TX_POWER);
+
+ if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL)
+ CMD(set_cca_ed_level, SET_CCA_ED_LEVEL);
+
+ if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE)
+ CMD(set_cca_mode, SET_CCA_MODE);
+
+#undef CMD
+ nla_nest_end(msg, nl_cmds);
+
finish:
genlmsg_end(msg, hdr);
return 0;
@@ -575,7 +741,8 @@ static int nl802154_new_interface(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL802154_ATTR_IFTYPE]) {
type = nla_get_u32(info->attrs[NL802154_ATTR_IFTYPE]);
- if (type > NL802154_IFTYPE_MAX)
+ if (type > NL802154_IFTYPE_MAX ||
+ !(rdev->wpan_phy.supported.iftypes & BIT(type)))
return -EINVAL;
}
@@ -625,7 +792,8 @@ static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info)
channel = nla_get_u8(info->attrs[NL802154_ATTR_CHANNEL]);
/* check 802.15.4 constraints */
- if (page > IEEE802154_MAX_PAGE || channel > IEEE802154_MAX_CHANNEL)
+ if (page > IEEE802154_MAX_PAGE || channel > IEEE802154_MAX_CHANNEL ||
+ !(rdev->wpan_phy.supported.channels[page] & BIT(channel)))
return -EINVAL;
return rdev_set_channel(rdev, page, channel);
@@ -636,12 +804,17 @@ static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info)
struct cfg802154_registered_device *rdev = info->user_ptr[0];
struct wpan_phy_cca cca;
+ if (!(rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE))
+ return -EOPNOTSUPP;
+
if (!info->attrs[NL802154_ATTR_CCA_MODE])
return -EINVAL;
cca.mode = nla_get_u32(info->attrs[NL802154_ATTR_CCA_MODE]);
/* checking 802.15.4 constraints */
- if (cca.mode < NL802154_CCA_ENERGY || cca.mode > NL802154_CCA_ATTR_MAX)
+ if (cca.mode < NL802154_CCA_ENERGY ||
+ cca.mode > NL802154_CCA_ATTR_MAX ||
+ !(rdev->wpan_phy.supported.cca_modes & BIT(cca.mode)))
return -EINVAL;
if (cca.mode == NL802154_CCA_ENERGY_CARRIER) {
@@ -649,13 +822,58 @@ static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
cca.opt = nla_get_u32(info->attrs[NL802154_ATTR_CCA_OPT]);
- if (cca.opt > NL802154_CCA_OPT_ATTR_MAX)
+ if (cca.opt > NL802154_CCA_OPT_ATTR_MAX ||
+ !(rdev->wpan_phy.supported.cca_opts & BIT(cca.opt)))
return -EINVAL;
}
return rdev_set_cca_mode(rdev, &cca);
}
+static int nl802154_set_cca_ed_level(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ s32 ed_level;
+ int i;
+
+ if (!(rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL))
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL802154_ATTR_CCA_ED_LEVEL])
+ return -EINVAL;
+
+ ed_level = nla_get_s32(info->attrs[NL802154_ATTR_CCA_ED_LEVEL]);
+
+ for (i = 0; i < rdev->wpan_phy.supported.cca_ed_levels_size; i++) {
+ if (ed_level == rdev->wpan_phy.supported.cca_ed_levels[i])
+ return rdev_set_cca_ed_level(rdev, ed_level);
+ }
+
+ return -EINVAL;
+}
+
+static int nl802154_set_tx_power(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ s32 power;
+ int i;
+
+ if (!(rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER))
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL802154_ATTR_TX_POWER])
+ return -EINVAL;
+
+ power = nla_get_s32(info->attrs[NL802154_ATTR_TX_POWER]);
+
+ for (i = 0; i < rdev->wpan_phy.supported.tx_powers_size; i++) {
+ if (power == rdev->wpan_phy.supported.tx_powers[i])
+ return rdev_set_tx_power(rdev, power);
+ }
+
+ return -EINVAL;
+}
+
static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
{
struct cfg802154_registered_device *rdev = info->user_ptr[0];
@@ -668,14 +886,22 @@ static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
return -EBUSY;
/* don't change address fields on monitor */
- if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
- return -EINVAL;
-
- if (!info->attrs[NL802154_ATTR_PAN_ID])
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR ||
+ !info->attrs[NL802154_ATTR_PAN_ID])
return -EINVAL;
pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]);
+ /* TODO
+ * I am not sure about to check here on broadcast pan_id.
+ * Broadcast is a valid setting, comment from 802.15.4:
+ * If this value is 0xffff, the device is not associated.
+ *
+ * This could useful to simple deassociate an device.
+ */
+ if (pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST))
+ return -EINVAL;
+
return rdev_set_pan_id(rdev, wpan_dev, pan_id);
}
@@ -691,14 +917,27 @@ static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info)
return -EBUSY;
/* don't change address fields on monitor */
- if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
- return -EINVAL;
-
- if (!info->attrs[NL802154_ATTR_SHORT_ADDR])
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR ||
+ !info->attrs[NL802154_ATTR_SHORT_ADDR])
return -EINVAL;
short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]);
+ /* TODO
+ * I am not sure about to check here on broadcast short_addr.
+ * Broadcast is a valid setting, comment from 802.15.4:
+ * A value of 0xfffe indicates that the device has
+ * associated but has not been allocated an address. A
+ * value of 0xffff indicates that the device does not
+ * have a short address.
+ *
+ * I think we should allow to set these settings but
+ * don't allow to allow socket communication with it.
+ */
+ if (short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC) ||
+ short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST))
+ return -EINVAL;
+
return rdev_set_short_addr(rdev, wpan_dev, short_addr);
}
@@ -722,7 +961,11 @@ nl802154_set_backoff_exponent(struct sk_buff *skb, struct genl_info *info)
max_be = nla_get_u8(info->attrs[NL802154_ATTR_MAX_BE]);
/* check 802.15.4 constraints */
- if (max_be < 3 || max_be > 8 || min_be > max_be)
+ if (min_be < rdev->wpan_phy.supported.min_minbe ||
+ min_be > rdev->wpan_phy.supported.max_minbe ||
+ max_be < rdev->wpan_phy.supported.min_maxbe ||
+ max_be > rdev->wpan_phy.supported.max_maxbe ||
+ min_be > max_be)
return -EINVAL;
return rdev_set_backoff_exponent(rdev, wpan_dev, min_be, max_be);
@@ -747,7 +990,8 @@ nl802154_set_max_csma_backoffs(struct sk_buff *skb, struct genl_info *info)
info->attrs[NL802154_ATTR_MAX_CSMA_BACKOFFS]);
/* check 802.15.4 constraints */
- if (max_csma_backoffs > 5)
+ if (max_csma_backoffs < rdev->wpan_phy.supported.min_csma_backoffs ||
+ max_csma_backoffs > rdev->wpan_phy.supported.max_csma_backoffs)
return -EINVAL;
return rdev_set_max_csma_backoffs(rdev, wpan_dev, max_csma_backoffs);
@@ -771,7 +1015,8 @@ nl802154_set_max_frame_retries(struct sk_buff *skb, struct genl_info *info)
info->attrs[NL802154_ATTR_MAX_FRAME_RETRIES]);
/* check 802.15.4 constraints */
- if (max_frame_retries < -1 || max_frame_retries > 7)
+ if (max_frame_retries < rdev->wpan_phy.supported.min_frame_retries ||
+ max_frame_retries > rdev->wpan_phy.supported.max_frame_retries)
return -EINVAL;
return rdev_set_max_frame_retries(rdev, wpan_dev, max_frame_retries);
@@ -791,6 +1036,9 @@ static int nl802154_set_lbt_mode(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
mode = !!nla_get_u8(info->attrs[NL802154_ATTR_LBT_MODE]);
+ if (!wpan_phy_supported_bool(mode, rdev->wpan_phy.supported.lbt))
+ return -EINVAL;
+
return rdev_set_lbt_mode(rdev, wpan_dev, mode);
}
@@ -937,6 +1185,22 @@ static const struct genl_ops nl802154_ops[] = {
NL802154_FLAG_NEED_RTNL,
},
{
+ .cmd = NL802154_CMD_SET_CCA_ED_LEVEL,
+ .doit = nl802154_set_cca_ed_level,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_SET_TX_POWER,
+ .doit = nl802154_set_tx_power,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
.cmd = NL802154_CMD_SET_PAN_ID,
.doit = nl802154_set_pan_id,
.policy = nl802154_policy,
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 7b5a9dd94..b2155a123 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -75,6 +75,29 @@ rdev_set_cca_mode(struct cfg802154_registered_device *rdev,
}
static inline int
+rdev_set_cca_ed_level(struct cfg802154_registered_device *rdev, s32 ed_level)
+{
+ int ret;
+
+ trace_802154_rdev_set_cca_ed_level(&rdev->wpan_phy, ed_level);
+ ret = rdev->ops->set_cca_ed_level(&rdev->wpan_phy, ed_level);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
+static inline int
+rdev_set_tx_power(struct cfg802154_registered_device *rdev,
+ s32 power)
+{
+ int ret;
+
+ trace_802154_rdev_set_tx_power(&rdev->wpan_phy, power);
+ ret = rdev->ops->set_tx_power(&rdev->wpan_phy, power);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
+static inline int
rdev_set_pan_id(struct cfg802154_registered_device *rdev,
struct wpan_dev *wpan_dev, __le16 pan_id)
{
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 627a25376..b6eacf30e 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -64,10 +64,8 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
if (tmp->type != ARPHRD_IEEE802154)
continue;
- pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp);
- short_addr =
- ieee802154_mlme_ops(tmp)->get_short_addr(tmp);
-
+ pan_id = tmp->ieee802154_ptr->pan_id;
+ short_addr = tmp->ieee802154_ptr->short_addr;
if (pan_id == addr->pan_id &&
short_addr == addr->short_addr) {
dev = tmp;
@@ -228,15 +226,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len)
goto out;
}
- if (dev->type != ARPHRD_IEEE802154) {
- err = -ENODEV;
- goto out_put;
- }
-
sk->sk_bound_dev_if = dev->ifindex;
sk_dst_reset(sk);
-out_put:
dev_put(dev);
out:
release_sock(sk);
@@ -286,7 +278,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
if (size > mtu) {
pr_debug("size = %Zu, mtu = %u\n", size, mtu);
- err = -EINVAL;
+ err = -EMSGSIZE;
goto out_dev;
}
@@ -803,9 +795,9 @@ static int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
/* Data frame processing */
BUG_ON(dev->type != ARPHRD_IEEE802154);
- pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
- short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev);
- hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr);
+ pan_id = dev->ieee802154_ptr->pan_id;
+ short_addr = dev->ieee802154_ptr->short_addr;
+ hw_addr = dev->ieee802154_ptr->extended_addr;
read_lock(&dgram_lock);
sk_for_each(sk, &dgram_head) {
@@ -1020,7 +1012,7 @@ static int ieee802154_create(struct net *net, struct socket *sock,
}
rc = -ENOMEM;
- sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto);
+ sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto, kern);
if (!sk)
goto out;
rc = 0;
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index 5ac25eb6e..9b5f0eb36 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -1,4 +1,4 @@
-/* Based on net/wireless/tracing.h */
+/* Based on net/wireless/trace.h */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cfg802154
@@ -56,7 +56,7 @@ TRACE_EVENT(802154_rdev_add_virtual_intf,
__entry->type = type;
__entry->extended_addr = extended_addr;
),
- TP_printk(WPAN_PHY_PR_FMT ", virtual intf name: %s, type: %d, ea %llx",
+ TP_printk(WPAN_PHY_PR_FMT ", virtual intf name: %s, type: %d, extended addr: 0x%llx",
WPAN_PHY_PR_ARG, __get_str(vir_intf_name), __entry->type,
__le64_to_cpu(__entry->extended_addr))
);
@@ -93,6 +93,21 @@ TRACE_EVENT(802154_rdev_set_channel,
__entry->page, __entry->channel)
);
+TRACE_EVENT(802154_rdev_set_tx_power,
+ TP_PROTO(struct wpan_phy *wpan_phy, s32 power),
+ TP_ARGS(wpan_phy, power),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ __field(s32, power)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ __entry->power = power;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", mbm: %d", WPAN_PHY_PR_ARG,
+ __entry->power)
+);
+
TRACE_EVENT(802154_rdev_set_cca_mode,
TP_PROTO(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca),
TP_ARGS(wpan_phy, cca),
@@ -108,6 +123,21 @@ TRACE_EVENT(802154_rdev_set_cca_mode,
WPAN_CCA_PR_ARG)
);
+TRACE_EVENT(802154_rdev_set_cca_ed_level,
+ TP_PROTO(struct wpan_phy *wpan_phy, s32 ed_level),
+ TP_ARGS(wpan_phy, ed_level),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ __field(s32, ed_level)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ __entry->ed_level = ed_level;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", ed level: %d", WPAN_PHY_PR_ARG,
+ __entry->ed_level)
+);
+
DECLARE_EVENT_CLASS(802154_le16_template,
TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
__le16 le16arg),
@@ -137,7 +167,7 @@ DEFINE_EVENT_PRINT(802154_le16_template, 802154_rdev_set_short_addr,
TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
__le16 le16arg),
TP_ARGS(wpan_phy, wpan_dev, le16arg),
- TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", sa: 0x%04x",
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", short addr: 0x%04x",
WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG,
__le16_to_cpu(__entry->le16arg))
);
@@ -160,7 +190,7 @@ TRACE_EVENT(802154_rdev_set_backoff_exponent,
),
TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT
- ", min be: %d, max_be: %d", WPAN_PHY_PR_ARG,
+ ", min be: %d, max be: %d", WPAN_PHY_PR_ARG,
WPAN_DEV_PR_ARG, __entry->min_be, __entry->max_be)
);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index b295af069..23431321c 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -331,8 +331,8 @@ config NET_FOU_IP_TUNNELS
When this option is enabled IP tunnels can be configured to use
FOU or GUE encapsulation.
-config GENEVE
- tristate "Generic Network Virtualization Encapsulation (Geneve)"
+config GENEVE_CORE
+ tristate "Generic Network Virtualization Encapsulation library"
depends on INET
select NET_UDP_TUNNEL
---help---
@@ -615,6 +615,22 @@ config TCP_CONG_DCTCP
For further details see:
http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
+config TCP_CONG_CDG
+ tristate "CAIA Delay-Gradient (CDG)"
+ default n
+ ---help---
+ CAIA Delay-Gradient (CDG) is a TCP congestion control that modifies
+ the TCP sender in order to:
+
+ o Use the delay gradient as a congestion signal.
+ o Back off with an average probability that is independent of the RTT.
+ o Coexist with flows that use loss-based congestion control.
+ o Tolerate packet loss unrelated to congestion.
+
+ For further details see:
+ D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using
+ delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg
+
choice
prompt "Default TCP congestion control"
default DEFAULT_CUBIC
@@ -649,6 +665,9 @@ choice
config DEFAULT_DCTCP
bool "DCTCP" if TCP_CONG_DCTCP=y
+ config DEFAULT_CDG
+ bool "CDG" if TCP_CONG_CDG=y
+
config DEFAULT_RENO
bool "Reno"
endchoice
@@ -672,6 +691,7 @@ config DEFAULT_TCP_CONG
default "veno" if DEFAULT_VENO
default "reno" if DEFAULT_RENO
default "dctcp" if DEFAULT_DCTCP
+ default "cdg" if DEFAULT_CDG
default "cubic"
config TCP_MD5SIG
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 518c04ed6..efc43f300 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
+obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
obj-$(CONFIG_TCP_CONG_DCTCP) += tcp_dctcp.o
obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
@@ -56,7 +57,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
-obj-$(CONFIG_GENEVE) += geneve.o
+obj-$(CONFIG_GENEVE_CORE) += geneve_core.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a5aa54ea6..9532ee871 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -319,7 +319,7 @@ lookup_protocol:
WARN_ON(!answer_prot->slab);
err = -ENOBUFS;
- sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);
+ sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
if (!sk)
goto out;
@@ -490,7 +490,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
- if (sk->sk_prot->get_port(sk, snum)) {
+ if ((snum || !inet->bind_address_no_port) &&
+ sk->sk_prot->get_port(sk, snum)) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
err = -EADDRINUSE;
goto out_release_sock;
@@ -1432,7 +1433,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
struct net *net)
{
struct socket *sock;
- int rc = sock_create_kern(family, type, protocol, &sock);
+ int rc = sock_create_kern(net, family, type, protocol, &sock);
if (rc == 0) {
*sk = sock->sk;
@@ -1442,8 +1443,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
* we do not wish this socket to see incoming packets.
*/
(*sk)->sk_prot->unhash(*sk);
-
- sk_change_net(*sk, net);
}
return rc;
}
@@ -1599,7 +1598,7 @@ static __net_init int inet_init_net(struct net *net)
*/
seqlock_init(&net->ipv4.ip_local_ports.lock);
net->ipv4.ip_local_ports.range[0] = 32768;
- net->ipv4.ip_local_ports.range[1] = 61000;
+ net->ipv4.ip_local_ports.range[1] = 60999;
seqlock_init(&net->ipv4.ping_group_range.lock);
/*
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 933a92820..6c8b1fbaf 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1017,14 +1017,16 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
neigh = neigh_lookup(&arp_tbl, &ip, dev);
if (neigh) {
- read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
- r->arp_flags = arp_state_to_flags(neigh);
- read_unlock_bh(&neigh->lock);
- r->arp_ha.sa_family = dev->type;
- strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
+ if (!(neigh->nud_state & NUD_NOARP)) {
+ read_lock_bh(&neigh->lock);
+ memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
+ r->arp_flags = arp_state_to_flags(neigh);
+ read_unlock_bh(&neigh->lock);
+ r->arp_ha.sa_family = dev->type;
+ strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
+ err = 0;
+ }
neigh_release(neigh);
- err = 0;
}
return err;
}
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 90c0e8386..574fad9cc 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -20,7 +20,7 @@
#include <net/route.h>
#include <net/tcp_states.h>
-int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
@@ -39,8 +39,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
- lock_sock(sk);
-
oif = sk->sk_bound_dev_if;
saddr = inet->inet_saddr;
if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
@@ -82,9 +80,19 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk_dst_set(sk, &rt->dst);
err = 0;
out:
- release_sock(sk);
return err;
}
+EXPORT_SYMBOL(__ip4_datagram_connect);
+
+int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ int res;
+
+ lock_sock(sk);
+ res = __ip4_datagram_connect(sk, uaddr, addr_len);
+ release_sock(sk);
+ return res;
+}
EXPORT_SYMBOL(ip4_datagram_connect);
/* Because UDP xmit path can manipulate sk_dst_cache without holding
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 419d23c53..2d9cb1748 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -882,7 +882,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
queue_delayed_work(system_power_efficient_wq,
&check_lifetime_work, 0);
rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
- blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
}
return 0;
}
@@ -1740,6 +1739,8 @@ static int inet_netconf_msgsize_devconf(int type)
size += nla_total_size(4);
if (type == -1 || type == NETCONFA_PROXY_NEIGH)
size += nla_total_size(4);
+ if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
+ size += nla_total_size(4);
return size;
}
@@ -1780,6 +1781,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
goto nla_put_failure;
+ if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
+ nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -1819,6 +1824,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
[NETCONFA_FORWARDING] = { .len = sizeof(int) },
[NETCONFA_RP_FILTER] = { .len = sizeof(int) },
[NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
+ [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
};
static int inet_netconf_get_devconf(struct sk_buff *in_skb,
@@ -2048,6 +2054,12 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
ifindex, cnf);
}
+ if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
+ new_value != old_value) {
+ ifindex = devinet_conf_ifindex(net, cnf);
+ inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ ifindex, cnf);
+ }
}
return ret;
@@ -2169,6 +2181,8 @@ static struct devinet_sysctl_table {
"igmpv2_unsolicited_report_interval"),
DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
"igmpv3_unsolicited_report_interval"),
+ DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
+ "ignore_routes_with_linkdown"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 30b544f02..477937465 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -49,7 +49,7 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
len = ALIGN(len, crypto_tfm_ctx_alignment());
}
- len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead);
+ len += sizeof(struct aead_request) + crypto_aead_reqsize(aead);
len = ALIGN(len, __alignof__(struct scatterlist));
len += sizeof(struct scatterlist) * nfrags;
@@ -68,17 +68,6 @@ static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
}
-static inline struct aead_givcrypt_request *esp_tmp_givreq(
- struct crypto_aead *aead, u8 *iv)
-{
- struct aead_givcrypt_request *req;
-
- req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
- crypto_tfm_ctx_alignment());
- aead_givcrypt_set_tfm(req, aead);
- return req;
-}
-
static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
{
struct aead_request *req;
@@ -97,14 +86,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
-static inline struct scatterlist *esp_givreq_sg(
- struct crypto_aead *aead, struct aead_givcrypt_request *req)
-{
- return (void *)ALIGN((unsigned long)(req + 1) +
- crypto_aead_reqsize(aead),
- __alignof__(struct scatterlist));
-}
-
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -113,14 +94,37 @@ static void esp_output_done(struct crypto_async_request *base, int err)
xfrm_output_resume(skb, err);
}
+/* Move ESP header back into place. */
+static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
+{
+ struct ip_esp_hdr *esph = (void *)(skb->data + offset);
+ void *tmp = ESP_SKB_CB(skb)->tmp;
+ __be32 *seqhi = esp_tmp_seqhi(tmp);
+
+ esph->seq_no = esph->spi;
+ esph->spi = *seqhi;
+}
+
+static void esp_output_restore_header(struct sk_buff *skb)
+{
+ esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
+}
+
+static void esp_output_done_esn(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+
+ esp_output_restore_header(skb);
+ esp_output_done(base, err);
+}
+
static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
- struct aead_givcrypt_request *req;
+ struct aead_request *req;
struct scatterlist *sg;
- struct scatterlist *asg;
struct sk_buff *trailer;
void *tmp;
u8 *iv;
@@ -129,17 +133,19 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
int clen;
int alen;
int plen;
+ int ivlen;
int tfclen;
int nfrags;
int assoclen;
- int sglists;
int seqhilen;
__be32 *seqhi;
+ __be64 seqno;
/* skb is pure payload to encrypt */
aead = x->data;
alen = crypto_aead_authsize(aead);
+ ivlen = crypto_aead_ivsize(aead);
tfclen = 0;
if (x->tfcpad) {
@@ -160,16 +166,14 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
nfrags = err;
assoclen = sizeof(*esph);
- sglists = 1;
seqhilen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
- sglists += 2;
seqhilen += sizeof(__be32);
assoclen += seqhilen;
}
- tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
+ tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
if (!tmp) {
err = -ENOMEM;
goto error;
@@ -177,9 +181,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
seqhi = esp_tmp_seqhi(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
- req = esp_tmp_givreq(aead, iv);
- asg = esp_givreq_sg(aead, req);
- sg = asg + sglists;
+ req = esp_tmp_req(aead, iv);
+ sg = esp_req_sg(aead, req);
/* Fill padding... */
tail = skb_tail_pointer(trailer);
@@ -235,37 +238,53 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
*skb_mac_header(skb) = IPPROTO_UDP;
}
- esph->spi = x->id.spi;
esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ aead_request_set_callback(req, 0, esp_output_done, skb);
+
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * encryption.
+ */
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
+ *seqhi = esph->spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ }
+
+ esph->spi = x->id.spi;
+
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg,
- esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
- clen + alen);
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + clen + alen);
- if ((x->props.flags & XFRM_STATE_ESN)) {
- sg_init_table(asg, 3);
- sg_set_buf(asg, &esph->spi, sizeof(__be32));
- *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
- sg_set_buf(asg + 1, seqhi, seqhilen);
- sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
- } else
- sg_init_one(asg, esph, sizeof(*esph));
-
- aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
- aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
- aead_givcrypt_set_assoc(req, asg, assoclen);
- aead_givcrypt_set_giv(req, esph->enc_data,
- XFRM_SKB_CB(skb)->seq.output.low +
- ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
+ aead_request_set_crypt(req, sg, sg, ivlen + clen, iv);
+ aead_request_set_ad(req, assoclen);
+
+ seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
+ ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
+
+ memset(iv, 0, ivlen);
+ memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8),
+ min(ivlen, 8));
ESP_SKB_CB(skb)->tmp = tmp;
- err = crypto_aead_givencrypt(req);
- if (err == -EINPROGRESS)
+ err = crypto_aead_encrypt(req);
+
+ switch (err) {
+ case -EINPROGRESS:
goto error;
- if (err == -EBUSY)
+ case -EBUSY:
err = NET_XMIT_DROP;
+ break;
+
+ case 0:
+ if ((x->props.flags & XFRM_STATE_ESN))
+ esp_output_restore_header(skb);
+ }
kfree(tmp);
@@ -364,6 +383,20 @@ static void esp_input_done(struct crypto_async_request *base, int err)
xfrm_input_resume(skb, esp_input_done2(skb, err));
}
+static void esp_input_restore_header(struct sk_buff *skb)
+{
+ esp_restore_header(skb, 0);
+ __skb_pull(skb, 4);
+}
+
+static void esp_input_done_esn(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+
+ esp_input_restore_header(skb);
+ esp_input_done(base, err);
+}
+
/*
* Note: detecting truncated vs. non-truncated authentication data is very
* expensive, so we only support truncated data, which is the recommended
@@ -375,19 +408,18 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
struct crypto_aead *aead = x->data;
struct aead_request *req;
struct sk_buff *trailer;
- int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
+ int ivlen = crypto_aead_ivsize(aead);
+ int elen = skb->len - sizeof(*esph) - ivlen;
int nfrags;
int assoclen;
- int sglists;
int seqhilen;
__be32 *seqhi;
void *tmp;
u8 *iv;
struct scatterlist *sg;
- struct scatterlist *asg;
int err = -EINVAL;
- if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
+ if (!pskb_may_pull(skb, sizeof(*esph) + ivlen))
goto out;
if (elen <= 0)
@@ -400,17 +432,15 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
nfrags = err;
assoclen = sizeof(*esph);
- sglists = 1;
seqhilen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
- sglists += 2;
seqhilen += sizeof(__be32);
assoclen += seqhilen;
}
err = -ENOMEM;
- tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
+ tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
if (!tmp)
goto out;
@@ -418,36 +448,39 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
seqhi = esp_tmp_seqhi(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_req(aead, iv);
- asg = esp_req_sg(aead, req);
- sg = asg + sglists;
+ sg = esp_req_sg(aead, req);
skb->ip_summed = CHECKSUM_NONE;
esph = (struct ip_esp_hdr *)skb->data;
- /* Get ivec. This can be wrong, check against another impls. */
- iv = esph->enc_data;
-
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
+ aead_request_set_callback(req, 0, esp_input_done, skb);
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * decryption.
+ */
if ((x->props.flags & XFRM_STATE_ESN)) {
- sg_init_table(asg, 3);
- sg_set_buf(asg, &esph->spi, sizeof(__be32));
- *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
- sg_set_buf(asg + 1, seqhi, seqhilen);
- sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
- } else
- sg_init_one(asg, esph, sizeof(*esph));
+ esph = (void *)skb_push(skb, 4);
+ *seqhi = esph->spi;
+ esph->spi = esph->seq_no;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi);
+ aead_request_set_callback(req, 0, esp_input_done_esn, skb);
+ }
- aead_request_set_callback(req, 0, esp_input_done, skb);
- aead_request_set_crypt(req, sg, sg, elen, iv);
- aead_request_set_assoc(req, asg, assoclen);
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, 0, skb->len);
+
+ aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
+ aead_request_set_ad(req, assoclen);
err = crypto_aead_decrypt(req);
if (err == -EINPROGRESS)
goto out;
+ if ((x->props.flags & XFRM_STATE_ESN))
+ esp_input_restore_header(skb);
+
err = esp_input_done2(skb, err);
out:
@@ -519,10 +552,16 @@ static void esp_destroy(struct xfrm_state *x)
static int esp_init_aead(struct xfrm_state *x)
{
+ char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
- aead = crypto_alloc_aead(x->aead->alg_name, 0, 0);
+ err = -ENAMETOOLONG;
+ if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
+ x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ goto error;
+
+ aead = crypto_alloc_aead(aead_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -561,15 +600,19 @@ static int esp_init_authenc(struct xfrm_state *x)
if ((x->props.flags & XFRM_STATE_ESN)) {
if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
- "authencesn(%s,%s)",
+ "%s%sauthencesn(%s,%s)%s",
+ x->geniv ?: "", x->geniv ? "(" : "",
x->aalg ? x->aalg->alg_name : "digest_null",
- x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ x->ealg->alg_name,
+ x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
goto error;
} else {
if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
- "authenc(%s,%s)",
+ "%s%sauthenc(%s,%s)%s",
+ x->geniv ?: "", x->geniv ? "(" : "",
x->aalg ? x->aalg->alg_name : "digest_null",
- x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ x->ealg->alg_name,
+ x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
goto error;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 872494e6e..6bbc54940 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -280,7 +280,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_scope = scope;
fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
- if (!fib_lookup(net, &fl4, &res))
+ if (!fib_lookup(net, &fl4, &res, 0))
return FIB_RES_PREFSRC(net, res);
} else {
scope = RT_SCOPE_LINK;
@@ -319,7 +319,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
net = dev_net(dev);
- if (fib_lookup(net, &fl4, &res))
+ if (fib_lookup(net, &fl4, &res, 0))
goto last_resort;
if (res.type != RTN_UNICAST &&
(res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
@@ -354,7 +354,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.flowi4_oif = dev->ifindex;
ret = 0;
- if (fib_lookup(net, &fl4, &res) == 0) {
+ if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
if (res.type == RTN_UNICAST)
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
}
@@ -1063,9 +1063,9 @@ static void nl_fib_lookup_exit(struct net *net)
net->ipv4.fibnl = NULL;
}
-static void fib_disable_ip(struct net_device *dev, int force)
+static void fib_disable_ip(struct net_device *dev, unsigned long event)
{
- if (fib_sync_down_dev(dev, force))
+ if (fib_sync_down_dev(dev, event))
fib_flush(dev_net(dev));
rt_cache_flush(dev_net(dev));
arp_ifdown(dev);
@@ -1081,7 +1081,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
case NETDEV_UP:
fib_add_ifaddr(ifa);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- fib_sync_up(dev);
+ fib_sync_up(dev, RTNH_F_DEAD);
#endif
atomic_inc(&net->ipv4.dev_addr_genid);
rt_cache_flush(dev_net(dev));
@@ -1093,7 +1093,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
/* Last address was deleted from this interface.
* Disable IP.
*/
- fib_disable_ip(dev, 1);
+ fib_disable_ip(dev, event);
} else {
rt_cache_flush(dev_net(dev));
}
@@ -1107,9 +1107,10 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct in_device *in_dev;
struct net *net = dev_net(dev);
+ unsigned int flags;
if (event == NETDEV_UNREGISTER) {
- fib_disable_ip(dev, 2);
+ fib_disable_ip(dev, event);
rt_flush_dev(dev);
return NOTIFY_DONE;
}
@@ -1124,16 +1125,22 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
fib_add_ifaddr(ifa);
} endfor_ifa(in_dev);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- fib_sync_up(dev);
+ fib_sync_up(dev, RTNH_F_DEAD);
#endif
atomic_inc(&net->ipv4.dev_addr_genid);
rt_cache_flush(net);
break;
case NETDEV_DOWN:
- fib_disable_ip(dev, 0);
+ fib_disable_ip(dev, event);
break;
- case NETDEV_CHANGEMTU:
case NETDEV_CHANGE:
+ flags = dev_get_flags(dev);
+ if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+ fib_sync_up(dev, RTNH_F_LINKDOWN);
+ else
+ fib_sync_down_dev(dev, event);
+ /* fall through */
+ case NETDEV_CHANGEMTU:
rt_cache_flush(net);
break;
}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index c6211ed60..9c0292072 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -13,6 +13,7 @@ struct fib_alias {
u8 fa_state;
u8 fa_slen;
u32 tb_id;
+ s16 fa_default;
struct rcu_head rcu;
};
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 56151982f..18123d50f 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -47,11 +47,12 @@ struct fib4_rule {
#endif
};
-int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
+int __fib_lookup(struct net *net, struct flowi4 *flp,
+ struct fib_result *res, unsigned int flags)
{
struct fib_lookup_arg arg = {
.result = res,
- .flags = FIB_LOOKUP_NOREF,
+ .flags = flags,
};
int err;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8d695b665..3a06586b1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -266,7 +266,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
#ifdef CONFIG_IP_ROUTE_CLASSID
nh->nh_tclassid != onh->nh_tclassid ||
#endif
- ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
+ ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
return -1;
onh++;
} endfor_nexthops(fi);
@@ -318,7 +318,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
nfi->fib_type == fi->fib_type &&
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(u32) * RTAX_MAX) == 0 &&
- ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
+ !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
(nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
return fi;
}
@@ -604,6 +604,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
return -ENODEV;
if (!(dev->flags & IFF_UP))
return -ENETDOWN;
+ if (!netif_carrier_ok(dev))
+ nh->nh_flags |= RTNH_F_LINKDOWN;
nh->nh_dev = dev;
dev_hold(dev);
nh->nh_scope = RT_SCOPE_LINK;
@@ -621,7 +623,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
/* It is not necessary, but requires a bit of thinking */
if (fl4.flowi4_scope < RT_SCOPE_LINK)
fl4.flowi4_scope = RT_SCOPE_LINK;
- err = fib_lookup(net, &fl4, &res);
+ err = fib_lookup(net, &fl4, &res,
+ FIB_LOOKUP_IGNORE_LINKSTATE);
if (err) {
rcu_read_unlock();
return err;
@@ -636,6 +639,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
if (!dev)
goto out;
dev_hold(dev);
+ if (!netif_carrier_ok(dev))
+ nh->nh_flags |= RTNH_F_LINKDOWN;
err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
} else {
struct in_device *in_dev;
@@ -654,6 +659,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
nh->nh_dev = in_dev->dev;
dev_hold(nh->nh_dev);
nh->nh_scope = RT_SCOPE_HOST;
+ if (!netif_carrier_ok(nh->nh_dev))
+ nh->nh_flags |= RTNH_F_LINKDOWN;
err = 0;
}
out:
@@ -713,8 +720,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
struct hlist_head *dest;
unsigned int new_hash;
- hlist_del(&fi->fib_hash);
-
new_hash = fib_info_hashfn(fi);
dest = &new_info_hash[new_hash];
hlist_add_head(&fi->fib_hash, dest);
@@ -731,8 +736,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
struct hlist_head *ldest;
unsigned int new_hash;
- hlist_del(&fi->fib_lhash);
-
new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
ldest = &new_laddrhash[new_hash];
hlist_add_head(&fi->fib_lhash, ldest);
@@ -924,11 +927,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (!nh->nh_dev)
goto failure;
} else {
+ int linkdown = 0;
+
change_nexthops(fi) {
err = fib_check_nh(cfg, fi, nexthop_nh);
if (err != 0)
goto failure;
+ if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
+ linkdown++;
} endfor_nexthops(fi)
+ if (linkdown == fi->fib_nhs)
+ fi->fib_flags |= RTNH_F_LINKDOWN;
}
if (fi->fib_prefsrc) {
@@ -1027,12 +1036,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
goto nla_put_failure;
if (fi->fib_nhs == 1) {
+ struct in_device *in_dev;
+
if (fi->fib_nh->nh_gw &&
nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
goto nla_put_failure;
if (fi->fib_nh->nh_oif &&
nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
goto nla_put_failure;
+ if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
+ in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev);
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
+ rtm->rtm_flags |= RTNH_F_DEAD;
+ }
#ifdef CONFIG_IP_ROUTE_CLASSID
if (fi->fib_nh[0].nh_tclassid &&
nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
@@ -1049,11 +1066,19 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
goto nla_put_failure;
for_nexthops(fi) {
+ struct in_device *in_dev;
+
rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
if (!rtnh)
goto nla_put_failure;
rtnh->rtnh_flags = nh->nh_flags & 0xFF;
+ if (nh->nh_flags & RTNH_F_LINKDOWN) {
+ in_dev = __in_dev_get_rtnl(nh->nh_dev);
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
+ rtnh->rtnh_flags |= RTNH_F_DEAD;
+ }
rtnh->rtnh_hops = nh->nh_weight - 1;
rtnh->rtnh_ifindex = nh->nh_oif;
@@ -1107,7 +1132,7 @@ int fib_sync_down_addr(struct net *net, __be32 local)
return ret;
}
-int fib_sync_down_dev(struct net_device *dev, int force)
+int fib_sync_down_dev(struct net_device *dev, unsigned long event)
{
int ret = 0;
int scope = RT_SCOPE_NOWHERE;
@@ -1116,7 +1141,8 @@ int fib_sync_down_dev(struct net_device *dev, int force)
struct hlist_head *head = &fib_info_devhash[hash];
struct fib_nh *nh;
- if (force)
+ if (event == NETDEV_UNREGISTER ||
+ event == NETDEV_DOWN)
scope = -1;
hlist_for_each_entry(nh, head, nh_hash) {
@@ -1133,7 +1159,15 @@ int fib_sync_down_dev(struct net_device *dev, int force)
dead++;
else if (nexthop_nh->nh_dev == dev &&
nexthop_nh->nh_scope != scope) {
- nexthop_nh->nh_flags |= RTNH_F_DEAD;
+ switch (event) {
+ case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
+ nexthop_nh->nh_flags |= RTNH_F_DEAD;
+ /* fall through */
+ case NETDEV_CHANGE:
+ nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
+ break;
+ }
#ifdef CONFIG_IP_ROUTE_MULTIPATH
spin_lock_bh(&fib_multipath_lock);
fi->fib_power -= nexthop_nh->nh_power;
@@ -1143,14 +1177,23 @@ int fib_sync_down_dev(struct net_device *dev, int force)
dead++;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (force > 1 && nexthop_nh->nh_dev == dev) {
+ if (event == NETDEV_UNREGISTER &&
+ nexthop_nh->nh_dev == dev) {
dead = fi->fib_nhs;
break;
}
#endif
} endfor_nexthops(fi)
if (dead == fi->fib_nhs) {
- fi->fib_flags |= RTNH_F_DEAD;
+ switch (event) {
+ case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
+ fi->fib_flags |= RTNH_F_DEAD;
+ /* fall through */
+ case NETDEV_CHANGE:
+ fi->fib_flags |= RTNH_F_LINKDOWN;
+ break;
+ }
ret++;
}
}
@@ -1159,23 +1202,40 @@ int fib_sync_down_dev(struct net_device *dev, int force)
}
/* Must be invoked inside of an RCU protected region. */
-void fib_select_default(struct fib_result *res)
+void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
{
struct fib_info *fi = NULL, *last_resort = NULL;
struct hlist_head *fa_head = res->fa_head;
struct fib_table *tb = res->table;
+ u8 slen = 32 - res->prefixlen;
int order = -1, last_idx = -1;
- struct fib_alias *fa;
+ struct fib_alias *fa, *fa1 = NULL;
+ u32 last_prio = res->fi->fib_priority;
+ u8 last_tos = 0;
hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
+ if (fa->fa_slen != slen)
+ continue;
+ if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+ continue;
+ if (fa->tb_id != tb->tb_id)
+ continue;
+ if (next_fi->fib_priority > last_prio &&
+ fa->fa_tos == last_tos) {
+ if (last_tos)
+ continue;
+ break;
+ }
+ if (next_fi->fib_flags & RTNH_F_DEAD)
+ continue;
+ last_tos = fa->fa_tos;
+ last_prio = next_fi->fib_priority;
+
if (next_fi->fib_scope != res->scope ||
fa->fa_type != RTN_UNICAST)
continue;
-
- if (next_fi->fib_priority > res->fi->fib_priority)
- break;
if (!next_fi->fib_nh[0].nh_gw ||
next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
continue;
@@ -1185,10 +1245,11 @@ void fib_select_default(struct fib_result *res)
if (!fi) {
if (next_fi != res->fi)
break;
+ fa1 = fa;
} else if (!fib_detect_death(fi, order, &last_resort,
- &last_idx, tb->tb_default)) {
+ &last_idx, fa1->fa_default)) {
fib_result_assign(res, fi);
- tb->tb_default = order;
+ fa1->fa_default = order;
goto out;
}
fi = next_fi;
@@ -1196,31 +1257,30 @@ void fib_select_default(struct fib_result *res)
}
if (order <= 0 || !fi) {
- tb->tb_default = -1;
+ if (fa1)
+ fa1->fa_default = -1;
goto out;
}
if (!fib_detect_death(fi, order, &last_resort, &last_idx,
- tb->tb_default)) {
+ fa1->fa_default)) {
fib_result_assign(res, fi);
- tb->tb_default = order;
+ fa1->fa_default = order;
goto out;
}
if (last_idx >= 0)
fib_result_assign(res, last_resort);
- tb->tb_default = last_idx;
+ fa1->fa_default = last_idx;
out:
return;
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
/*
* Dead device goes up. We wake up dead nexthops.
* It takes sense only on multipath routes.
*/
-int fib_sync_up(struct net_device *dev)
+int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
{
struct fib_info *prev_fi;
unsigned int hash;
@@ -1247,7 +1307,7 @@ int fib_sync_up(struct net_device *dev)
prev_fi = fi;
alive = 0;
change_nexthops(fi) {
- if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
+ if (!(nexthop_nh->nh_flags & nh_flags)) {
alive++;
continue;
}
@@ -1258,14 +1318,18 @@ int fib_sync_up(struct net_device *dev)
!__in_dev_get_rtnl(dev))
continue;
alive++;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
spin_lock_bh(&fib_multipath_lock);
nexthop_nh->nh_power = 0;
- nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
+ nexthop_nh->nh_flags &= ~nh_flags;
spin_unlock_bh(&fib_multipath_lock);
+#else
+ nexthop_nh->nh_flags &= ~nh_flags;
+#endif
} endfor_nexthops(fi)
if (alive > 0) {
- fi->fib_flags &= ~RTNH_F_DEAD;
+ fi->fib_flags &= ~nh_flags;
ret++;
}
}
@@ -1273,6 +1337,8 @@ int fib_sync_up(struct net_device *dev)
return ret;
}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
/*
* The algorithm is suboptimal, but it provides really
* fair weighted route distribution.
@@ -1280,16 +1346,22 @@ int fib_sync_up(struct net_device *dev)
void fib_select_multipath(struct fib_result *res)
{
struct fib_info *fi = res->fi;
+ struct in_device *in_dev;
int w;
spin_lock_bh(&fib_multipath_lock);
if (fi->fib_power <= 0) {
int power = 0;
change_nexthops(fi) {
- if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
- power += nexthop_nh->nh_weight;
- nexthop_nh->nh_power = nexthop_nh->nh_weight;
- }
+ in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev);
+ if (nexthop_nh->nh_flags & RTNH_F_DEAD)
+ continue;
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
+ continue;
+ power += nexthop_nh->nh_weight;
+ nexthop_nh->nh_power = nexthop_nh->nh_weight;
} endfor_nexthops(fi);
fi->fib_power = power;
if (power <= 0) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 09b62e17d..b0c6258ff 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -72,6 +72,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/vmalloc.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
@@ -324,13 +325,15 @@ static inline void empty_child_dec(struct key_vector *n)
static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
{
- struct tnode *kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
- struct key_vector *l = kv->kv;
+ struct key_vector *l;
+ struct tnode *kv;
+ kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
if (!kv)
return NULL;
/* initialize key vector */
+ l = kv->kv;
l->key = key;
l->pos = 0;
l->bits = 0;
@@ -345,24 +348,26 @@ static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
static struct key_vector *tnode_new(t_key key, int pos, int bits)
{
- struct tnode *tnode = tnode_alloc(bits);
unsigned int shift = pos + bits;
- struct key_vector *tn = tnode->kv;
+ struct key_vector *tn;
+ struct tnode *tnode;
/* verify bits and pos their msb bits clear and values are valid */
BUG_ON(!bits || (shift > KEYLENGTH));
- pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
- sizeof(struct key_vector *) << bits);
-
+ tnode = tnode_alloc(bits);
if (!tnode)
return NULL;
+ pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
+ sizeof(struct key_vector *) << bits);
+
if (bits == KEYLENGTH)
tnode->full_children = 1;
else
tnode->empty_children = 1ul << bits;
+ tn = tnode->kv;
tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
tn->pos = pos;
tn->bits = bits;
@@ -1077,6 +1082,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
struct trie *t = (struct trie *)tb->tb_data;
struct fib_alias *fa, *new_fa;
struct key_vector *l, *tp;
+ unsigned int nlflags = 0;
struct fib_info *fi;
u8 plen = cfg->fc_dst_len;
u8 slen = KEYLENGTH - plen;
@@ -1165,14 +1171,15 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_state = state & ~FA_S_ACCESSED;
new_fa->fa_slen = fa->fa_slen;
new_fa->tb_id = tb->tb_id;
+ new_fa->fa_default = -1;
- err = netdev_switch_fib_ipv4_add(key, plen, fi,
- new_fa->fa_tos,
- cfg->fc_type,
- cfg->fc_nlflags,
- tb->tb_id);
+ err = switchdev_fib_ipv4_add(key, plen, fi,
+ new_fa->fa_tos,
+ cfg->fc_type,
+ cfg->fc_nlflags,
+ tb->tb_id);
if (err) {
- netdev_switch_fib_ipv4_abort(fi);
+ switchdev_fib_ipv4_abort(fi);
kmem_cache_free(fn_alias_kmem, new_fa);
goto out;
}
@@ -1196,7 +1203,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
if (fa_match)
goto out;
- if (!(cfg->fc_nlflags & NLM_F_APPEND))
+ if (cfg->fc_nlflags & NLM_F_APPEND)
+ nlflags = NLM_F_APPEND;
+ else
fa = fa_first;
}
err = -ENOENT;
@@ -1214,14 +1223,13 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_state = 0;
new_fa->fa_slen = slen;
new_fa->tb_id = tb->tb_id;
+ new_fa->fa_default = -1;
/* (Optionally) offload fib entry to switch hardware. */
- err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
- cfg->fc_type,
- cfg->fc_nlflags,
- tb->tb_id);
+ err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type,
+ cfg->fc_nlflags, tb->tb_id);
if (err) {
- netdev_switch_fib_ipv4_abort(fi);
+ switchdev_fib_ipv4_abort(fi);
goto out_free_new_fa;
}
@@ -1235,12 +1243,12 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
rt_cache_flush(cfg->fc_nlinfo.nl_net);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
- &cfg->fc_nlinfo, 0);
+ &cfg->fc_nlinfo, nlflags);
succeeded:
return 0;
out_sw_fib_del:
- netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
+ switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
@@ -1406,9 +1414,15 @@ found:
continue;
for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
const struct fib_nh *nh = &fi->fib_nh[nhsel];
+ struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
if (nh->nh_flags & RTNH_F_DEAD)
continue;
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ nh->nh_flags & RTNH_F_LINKDOWN &&
+ !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
+ continue;
if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
continue;
@@ -1518,8 +1532,8 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
if (!fa_to_delete)
return -ESRCH;
- netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
- cfg->fc_type, tb->tb_id);
+ switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
+ cfg->fc_type, tb->tb_id);
rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -1768,10 +1782,9 @@ void fib_table_flush_external(struct fib_table *tb)
if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD))
continue;
- netdev_switch_fib_ipv4_del(n->key,
- KEYLENGTH - fa->fa_slen,
- fi, fa->fa_tos,
- fa->fa_type, tb->tb_id);
+ switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen,
+ fi, fa->fa_tos, fa->fa_type,
+ tb->tb_id);
}
/* update leaf slen */
@@ -1780,8 +1793,6 @@ void fib_table_flush_external(struct fib_table *tb)
if (hlist_empty(&n->leaf)) {
put_child_root(pn, n->key, NULL);
node_free(n);
- } else {
- leaf_pull_suffix(pn, n);
}
}
}
@@ -1836,10 +1847,9 @@ int fib_table_flush(struct fib_table *tb)
continue;
}
- netdev_switch_fib_ipv4_del(n->key,
- KEYLENGTH - fa->fa_slen,
- fi, fa->fa_tos,
- fa->fa_type, tb->tb_id);
+ switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen,
+ fi, fa->fa_tos, fa->fa_type,
+ tb->tb_id);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
@@ -1852,8 +1862,6 @@ int fib_table_flush(struct fib_table *tb)
if (hlist_empty(&n->leaf)) {
put_child_root(pn, n->key, NULL);
node_free(n);
- } else {
- leaf_pull_suffix(pn, n);
}
}
@@ -1980,7 +1988,6 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
return NULL;
tb->tb_id = id;
- tb->tb_default = -1;
tb->tb_num_default = 0;
tb->tb_data = (alias ? alias->__data : tb->__data);
@@ -2057,11 +2064,12 @@ static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
struct trie *t)
{
- struct key_vector *n, *pn = t->kv;
+ struct key_vector *n, *pn;
if (!t)
return NULL;
+ pn = t->kv;
n = rcu_dereference(pn->tnode[0]);
if (!n)
return NULL;
@@ -2457,7 +2465,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
key = l->key + 1;
iter->pos++;
- if (pos-- <= 0)
+ if (--pos <= 0)
break;
l = NULL;
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve_core.c
index 8986e63f3..311a4ba69 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve_core.c
@@ -60,11 +60,6 @@ struct geneve_net {
static int geneve_net_id;
-static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
-{
- return (struct genevehdr *)(udp_hdr(skb) + 1);
-}
-
static struct geneve_sock *geneve_find_sock(struct net *net,
sa_family_t family, __be16 port)
{
@@ -435,7 +430,7 @@ static int __init geneve_init_module(void)
if (rc)
return rc;
- pr_info("Geneve driver\n");
+ pr_info("Geneve core logic\n");
return 0;
}
@@ -449,5 +444,4 @@ module_exit(geneve_cleanup_module);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>");
-MODULE_DESCRIPTION("Driver for GENEVE encapsulated traffic");
-MODULE_ALIAS_RTNL_LINK("geneve");
+MODULE_DESCRIPTION("Driver library for GENEVE encapsulated traffic");
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a3a697f5f..9fdfd9dea 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1339,6 +1339,171 @@ out:
}
EXPORT_SYMBOL(ip_mc_inc_group);
+static int ip_mc_check_iphdr(struct sk_buff *skb)
+{
+ const struct iphdr *iph;
+ unsigned int len;
+ unsigned int offset = skb_network_offset(skb) + sizeof(*iph);
+
+ if (!pskb_may_pull(skb, offset))
+ return -EINVAL;
+
+ iph = ip_hdr(skb);
+
+ if (iph->version != 4 || ip_hdrlen(skb) < sizeof(*iph))
+ return -EINVAL;
+
+ offset += ip_hdrlen(skb) - sizeof(*iph);
+
+ if (!pskb_may_pull(skb, offset))
+ return -EINVAL;
+
+ iph = ip_hdr(skb);
+
+ if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
+ return -EINVAL;
+
+ len = skb_network_offset(skb) + ntohs(iph->tot_len);
+ if (skb->len < len || len < offset)
+ return -EINVAL;
+
+ skb_set_transport_header(skb, offset);
+
+ return 0;
+}
+
+static int ip_mc_check_igmp_reportv3(struct sk_buff *skb)
+{
+ unsigned int len = skb_transport_offset(skb);
+
+ len += sizeof(struct igmpv3_report);
+
+ return pskb_may_pull(skb, len) ? 0 : -EINVAL;
+}
+
+static int ip_mc_check_igmp_query(struct sk_buff *skb)
+{
+ unsigned int len = skb_transport_offset(skb);
+
+ len += sizeof(struct igmphdr);
+ if (skb->len < len)
+ return -EINVAL;
+
+ /* IGMPv{1,2}? */
+ if (skb->len != len) {
+ /* or IGMPv3? */
+ len += sizeof(struct igmpv3_query) - sizeof(struct igmphdr);
+ if (skb->len < len || !pskb_may_pull(skb, len))
+ return -EINVAL;
+ }
+
+ /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer
+ * all-systems destination addresses (224.0.0.1) for general queries
+ */
+ if (!igmp_hdr(skb)->group &&
+ ip_hdr(skb)->daddr != htonl(INADDR_ALLHOSTS_GROUP))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ip_mc_check_igmp_msg(struct sk_buff *skb)
+{
+ switch (igmp_hdr(skb)->type) {
+ case IGMP_HOST_LEAVE_MESSAGE:
+ case IGMP_HOST_MEMBERSHIP_REPORT:
+ case IGMPV2_HOST_MEMBERSHIP_REPORT:
+ /* fall through */
+ return 0;
+ case IGMPV3_HOST_MEMBERSHIP_REPORT:
+ return ip_mc_check_igmp_reportv3(skb);
+ case IGMP_HOST_MEMBERSHIP_QUERY:
+ return ip_mc_check_igmp_query(skb);
+ default:
+ return -ENOMSG;
+ }
+}
+
+static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb)
+{
+ return skb_checksum_simple_validate(skb);
+}
+
+static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+
+{
+ struct sk_buff *skb_chk;
+ unsigned int transport_len;
+ unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr);
+ int ret = -EINVAL;
+
+ transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb);
+
+ skb_chk = skb_checksum_trimmed(skb, transport_len,
+ ip_mc_validate_checksum);
+ if (!skb_chk)
+ goto err;
+
+ if (!pskb_may_pull(skb_chk, len))
+ goto err;
+
+ ret = ip_mc_check_igmp_msg(skb_chk);
+ if (ret)
+ goto err;
+
+ if (skb_trimmed)
+ *skb_trimmed = skb_chk;
+ /* free now unneeded clone */
+ else if (skb_chk != skb)
+ kfree_skb(skb_chk);
+
+ ret = 0;
+
+err:
+ if (ret && skb_chk && skb_chk != skb)
+ kfree_skb(skb_chk);
+
+ return ret;
+}
+
+/**
+ * ip_mc_check_igmp - checks whether this is a sane IGMP packet
+ * @skb: the skb to validate
+ * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional)
+ *
+ * Checks whether an IPv4 packet is a valid IGMP packet. If so sets
+ * skb transport header accordingly and returns zero.
+ *
+ * -EINVAL: A broken packet was detected, i.e. it violates some internet
+ * standard
+ * -ENOMSG: IP header validation succeeded but it is not an IGMP packet.
+ * -ENOMEM: A memory allocation failure happened.
+ *
+ * Optionally, an skb pointer might be provided via skb_trimmed (or set it
+ * to NULL): After parsing an IGMP packet successfully it will point to
+ * an skb which has its tail aligned to the IP packet end. This might
+ * either be the originally provided skb or a trimmed, cloned version if
+ * the skb frame had data beyond the IP packet. A cloned skb allows us
+ * to leave the original skb and its full frame unchanged (which might be
+ * desirable for layer 2 frame jugglers).
+ *
+ * Caller needs to set the skb network header and free any returned skb if it
+ * differs from the provided skb.
+ */
+int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+{
+ int ret = ip_mc_check_iphdr(skb);
+
+ if (ret < 0)
+ return ret;
+
+ if (ip_hdr(skb)->protocol != IPPROTO_IGMP)
+ return -ENOMSG;
+
+ return __ip_mc_check_igmp(skb, skb_trimmed);
+}
+EXPORT_SYMBOL(ip_mc_check_igmp);
+
/*
* Resend IGMP JOIN report; used by netdev notifier.
*/
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8976ca423..134957159 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -99,6 +99,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
struct net *net = sock_net(sk);
int smallest_size = -1, smallest_rover;
kuid_t uid = sock_i_uid(sk);
+ int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
local_bh_disable();
if (!snum) {
@@ -106,6 +107,14 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
again:
inet_get_local_port_range(net, &low, &high);
+ if (attempt_half) {
+ int half = low + ((high - low) >> 1);
+
+ if (attempt_half == 1)
+ high = half;
+ else
+ low = half;
+ }
remaining = (high - low) + 1;
smallest_rover = rover = prandom_u32() % remaining + low;
@@ -127,11 +136,6 @@ again:
(tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners;
smallest_rover = rover;
- if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
- !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
- snum = smallest_rover;
- goto tb_found;
- }
}
if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
snum = rover;
@@ -159,6 +163,11 @@ again:
snum = smallest_rover;
goto have_snum;
}
+ if (attempt_half == 1) {
+ /* OK we now try the upper half of the range */
+ attempt_half = 2;
+ goto again;
+ }
goto fail;
}
/* OK, here is the one we will use. HEAD is
@@ -584,7 +593,7 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue,
}
spin_unlock(&queue->syn_wait_lock);
- if (del_timer(&req->rsk_timer))
+ if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
reqsk_put(req);
return found;
}
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 4d32262c7..c3b1f3a0f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -151,6 +151,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
if (nla_put_u8(skb, INET_DIAG_TCLASS,
inet6_sk(sk)->tclass) < 0)
goto errout;
+
+ if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
+ nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk)))
+ goto errout;
}
#endif
@@ -200,9 +204,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
}
#undef EXPIRES_IN_MS
- if (ext & (1 << (INET_DIAG_INFO - 1))) {
+ if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
attr = nla_reserve(skb, INET_DIAG_INFO,
- sizeof(struct tcp_info));
+ handler->idiag_info_size);
if (!attr)
goto errout;
@@ -746,7 +750,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
entry.family = sk->sk_family;
- spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
lopt = icsk->icsk_accept_queue.listen_opt;
if (!lopt || !listen_sock_qlen(lopt))
@@ -794,7 +798,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
}
out:
- spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
return err;
}
@@ -1078,14 +1082,62 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
return inet_diag_get_exact(skb, h, nlmsg_data(h));
}
+static
+int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
+{
+ const struct inet_diag_handler *handler;
+ struct nlmsghdr *nlh;
+ struct nlattr *attr;
+ struct inet_diag_msg *r;
+ void *info = NULL;
+ int err = 0;
+
+ nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0);
+ if (!nlh)
+ return -ENOMEM;
+
+ r = nlmsg_data(nlh);
+ memset(r, 0, sizeof(*r));
+ inet_diag_msg_common_fill(r, sk);
+ if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_STREAM)
+ r->id.idiag_sport = inet_sk(sk)->inet_sport;
+ r->idiag_state = sk->sk_state;
+
+ if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) {
+ nlmsg_cancel(skb, nlh);
+ return err;
+ }
+
+ handler = inet_diag_lock_handler(sk->sk_protocol);
+ if (IS_ERR(handler)) {
+ inet_diag_unlock_handler(handler);
+ nlmsg_cancel(skb, nlh);
+ return PTR_ERR(handler);
+ }
+
+ attr = handler->idiag_info_size
+ ? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size)
+ : NULL;
+ if (attr)
+ info = nla_data(attr);
+
+ handler->idiag_get_info(sk, r, info);
+ inet_diag_unlock_handler(handler);
+
+ nlmsg_end(skb, nlh);
+ return 0;
+}
+
static const struct sock_diag_handler inet_diag_handler = {
.family = AF_INET,
.dump = inet_diag_handler_dump,
+ .get_info = inet_diag_handler_get_info,
};
static const struct sock_diag_handler inet6_diag_handler = {
.family = AF_INET6,
.dump = inet_diag_handler_dump,
+ .get_info = inet_diag_handler_get_info,
};
int inet_diag_register(const struct inet_diag_handler *h)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 5e346a082..d0a7c0319 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -131,34 +131,22 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
unsigned int evicted = 0;
HLIST_HEAD(expired);
-evict_again:
spin_lock(&hb->chain_lock);
hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
if (!inet_fragq_should_evict(fq))
continue;
- if (!del_timer(&fq->timer)) {
- /* q expiring right now thus increment its refcount so
- * it won't be freed under us and wait until the timer
- * has finished executing then destroy it
- */
- atomic_inc(&fq->refcnt);
- spin_unlock(&hb->chain_lock);
- del_timer_sync(&fq->timer);
- inet_frag_put(fq, f);
- goto evict_again;
- }
+ if (!del_timer(&fq->timer))
+ continue;
- fq->flags |= INET_FRAG_EVICTED;
- hlist_del(&fq->list);
- hlist_add_head(&fq->list, &expired);
+ hlist_add_head(&fq->list_evictor, &expired);
++evicted;
}
spin_unlock(&hb->chain_lock);
- hlist_for_each_entry_safe(fq, n, &expired, list)
+ hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
f->frag_expire((unsigned long) fq);
return evicted;
@@ -240,18 +228,20 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
int i;
nf->low_thresh = 0;
- local_bh_disable();
evict_again:
+ local_bh_disable();
seq = read_seqbegin(&f->rnd_seqlock);
for (i = 0; i < INETFRAGS_HASHSZ ; i++)
inet_evict_bucket(f, &f->hash[i]);
- if (read_seqretry(&f->rnd_seqlock, seq))
- goto evict_again;
-
local_bh_enable();
+ cond_resched();
+
+ if (read_seqretry(&f->rnd_seqlock, seq) ||
+ percpu_counter_sum(&nf->mem))
+ goto evict_again;
percpu_counter_destroy(&nf->mem);
}
@@ -284,8 +274,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
struct inet_frag_bucket *hb;
hb = get_frag_bucket_locked(fq, f);
- if (!(fq->flags & INET_FRAG_EVICTED))
- hlist_del(&fq->list);
+ hlist_del(&fq->list);
+ fq->flags |= INET_FRAG_COMPLETE;
spin_unlock(&hb->chain_lock);
}
@@ -297,7 +287,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
if (!(fq->flags & INET_FRAG_COMPLETE)) {
fq_unlink(fq, f);
atomic_dec(&fq->refcnt);
- fq->flags |= INET_FRAG_COMPLETE;
}
}
EXPORT_SYMBOL(inet_frag_kill);
@@ -330,11 +319,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
fp = xp;
}
sum = sum_truesize + f->qsize;
- sub_frag_mem_limit(q, sum);
if (f->destructor)
f->destructor(q);
kmem_cache_free(f->frags_cachep, q);
+
+ sub_frag_mem_limit(nf, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);
@@ -390,7 +380,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
q->net = nf;
f->constructor(q, arg);
- add_frag_mem_limit(q, f->qsize);
+ add_frag_mem_limit(nf, f->qsize);
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index c6fb80bd5..0cb916542 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/wait.h>
+#include <linux/vmalloc.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
@@ -90,10 +91,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
const unsigned short snum)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-
- atomic_inc(&hashinfo->bsockets);
-
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
tb->num_owners++;
@@ -111,8 +108,6 @@ static void __inet_put_port(struct sock *sk)
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
struct inet_bind_bucket *tb;
- atomic_dec(&hashinfo->bsockets);
-
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
__sk_del_bind_node(sk);
@@ -399,9 +394,10 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static inline u32 inet_sk_port_offset(const struct sock *sk)
+static u32 inet_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
+
return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
inet->inet_daddr,
inet->inet_dport);
@@ -507,8 +503,14 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
+ /* By starting with offset being an even number,
+ * we tend to leave about 50% of ports for other uses,
+ * like bind(0).
+ */
+ offset &= ~1;
+
local_bh_disable();
- for (i = 1; i <= remaining; i++) {
+ for (i = 0; i < remaining; i++) {
port = low + (i + offset) % remaining;
if (inet_is_local_reserved_port(net, port))
continue;
@@ -552,7 +554,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
return -EADDRNOTAVAIL;
ok:
- hint += i;
+ hint += (i + 2) & ~1;
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, port);
@@ -599,7 +601,11 @@ out:
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
+ u32 port_offset = 0;
+
+ if (!inet_sk(sk)->inet_num)
+ port_offset = inet_sk_port_offset(sk);
+ return __inet_hash_connect(death_row, sk, port_offset,
__inet_check_established);
}
EXPORT_SYMBOL_GPL(inet_hash_connect);
@@ -608,7 +614,6 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
{
int i;
- atomic_set(&h->bsockets, 0);
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
spin_lock_init(&h->listening_hash[i].lock);
INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
@@ -616,3 +621,32 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
}
}
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
+
+int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
+{
+ unsigned int locksz = sizeof(spinlock_t);
+ unsigned int i, nblocks = 1;
+
+ if (locksz != 0) {
+ /* allocate 2 cache lines or at least one spinlock per cpu */
+ nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U);
+ nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());
+
+ /* no more locks than number of hash buckets */
+ nblocks = min(nblocks, hashinfo->ehash_mask + 1);
+
+ hashinfo->ehash_locks = kmalloc_array(nblocks, locksz,
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!hashinfo->ehash_locks)
+ hashinfo->ehash_locks = vmalloc(nblocks * locksz);
+
+ if (!hashinfo->ehash_locks)
+ return -ENOMEM;
+
+ for (i = 0; i < nblocks; i++)
+ spin_lock_init(&hashinfo->ehash_locks[i]);
+ }
+ hashinfo->ehash_locks_mask = nblocks - 1;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 00ec8d5d7..2ffbd16b7 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -170,7 +170,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
-void tw_timer_handler(unsigned long data)
+static void tw_timer_handler(unsigned long data)
{
struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 367448494..2d3aa408f 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -39,17 +39,21 @@
#include <net/route.h>
#include <net/xfrm.h>
-static bool ip_may_fragment(const struct sk_buff *skb)
-{
- return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
- skb->ignore_df;
-}
-
static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)
return false;
+ if (unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0))
+ return false;
+
+ /* original fragment exceeds mtu and DF is set */
+ if (unlikely(IPCB(skb)->frag_max_size > mtu))
+ return true;
+
+ if (skb->ignore_df)
+ return false;
+
if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
return false;
@@ -114,7 +118,7 @@ int ip_forward(struct sk_buff *skb)
IPCB(skb)->flags |= IPSKB_FORWARDED;
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
- if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) {
+ if (ip_exceeds_mtu(skb, mtu)) {
IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cc1da6d9c..921138f6c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -75,6 +75,7 @@ struct ipq {
__be16 id;
u8 protocol;
u8 ecn; /* RFC3168 support */
+ u16 max_df_size; /* largest frag with DF set seen */
int iif;
unsigned int rid;
struct inet_peer *peer;
@@ -173,6 +174,15 @@ static void ipq_kill(struct ipq *ipq)
inet_frag_kill(&ipq->q, &ip4_frags);
}
+static bool frag_expire_skip_icmp(u32 user)
+{
+ return user == IP_DEFRAG_AF_PACKET ||
+ ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN,
+ __IP_DEFRAG_CONNTRACK_IN_END) ||
+ ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN,
+ __IP_DEFRAG_CONNTRACK_BRIDGE_IN);
+}
+
/*
* Oops, a fragment queue timed out. Kill it and send an ICMP reply.
*/
@@ -192,7 +202,7 @@ static void ip_expire(unsigned long arg)
ipq_kill(qp);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
- if (!(qp->q.flags & INET_FRAG_EVICTED)) {
+ if (!inet_frag_evicting(&qp->q)) {
struct sk_buff *head = qp->q.fragments;
const struct iphdr *iph;
int err;
@@ -217,10 +227,8 @@ static void ip_expire(unsigned long arg)
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
- if (qp->user == IP_DEFRAG_AF_PACKET ||
- ((qp->user >= IP_DEFRAG_CONNTRACK_IN) &&
- (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) &&
- (skb_rtable(head)->rt_type != RTN_LOCAL)))
+ if (frag_expire_skip_icmp(qp->user) &&
+ (skb_rtable(head)->rt_type != RTN_LOCAL))
goto out_rcu_unlock;
/* Send an ICMP "Fragment Reassembly Timeout" message. */
@@ -301,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp)
kfree_skb(fp);
fp = xp;
} while (fp);
- sub_frag_mem_limit(&qp->q, sum_truesize);
+ sub_frag_mem_limit(qp->q.net, sum_truesize);
qp->q.flags = 0;
qp->q.len = 0;
@@ -319,6 +327,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
struct sk_buff *prev, *next;
struct net_device *dev;
+ unsigned int fragsize;
int flags, offset;
int ihl, end;
int err = -ENOENT;
@@ -342,7 +351,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
ihl = ip_hdrlen(skb);
/* Determine the position of this fragment. */
- end = offset + skb->len - ihl;
+ end = offset + skb->len - skb_network_offset(skb) - ihl;
err = -EINVAL;
/* Is this the final fragment? */
@@ -372,7 +381,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
goto err;
err = -ENOMEM;
- if (!pskb_pull(skb, ihl))
+ if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
goto err;
err = pskb_trim_rcsum(skb, end - offset);
@@ -446,7 +455,7 @@ found:
qp->q.fragments = next;
qp->q.meat -= free_it->len;
- sub_frag_mem_limit(&qp->q, free_it->truesize);
+ sub_frag_mem_limit(qp->q.net, free_it->truesize);
kfree_skb(free_it);
}
}
@@ -470,13 +479,18 @@ found:
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
- add_frag_mem_limit(&qp->q, skb->truesize);
+ add_frag_mem_limit(qp->q.net, skb->truesize);
if (offset == 0)
qp->q.flags |= INET_FRAG_FIRST_IN;
+ fragsize = skb->len + ihl;
+
+ if (fragsize > qp->q.max_size)
+ qp->q.max_size = fragsize;
+
if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
- skb->len + ihl > qp->q.max_size)
- qp->q.max_size = skb->len + ihl;
+ fragsize > qp->max_df_size)
+ qp->max_df_size = fragsize;
if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
qp->q.meat == qp->q.len) {
@@ -573,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(&qp->q, clone->truesize);
+ add_frag_mem_limit(qp->q.net, clone->truesize);
}
skb_push(head, head->data - skb_network_header(head));
@@ -601,18 +615,34 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
fp = next;
}
- sub_frag_mem_limit(&qp->q, sum_truesize);
+ sub_frag_mem_limit(qp->q.net, sum_truesize);
head->next = NULL;
head->dev = dev;
head->tstamp = qp->q.stamp;
- IPCB(head)->frag_max_size = qp->q.max_size;
+ IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
iph = ip_hdr(head);
- /* max_size != 0 implies at least one fragment had IP_DF set */
- iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0;
iph->tot_len = htons(len);
iph->tos |= ecn;
+
+ /* When we set IP_DF on a refragmented skb we must also force a
+ * call to ip_fragment to avoid forwarding a DF-skb of size s while
+ * original sender only sent fragments of size f (where f < s).
+ *
+ * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest
+ * frag seen to avoid sending tiny DF-fragments in case skb was built
+ * from one very small df-fragment and one large non-df frag.
+ */
+ if (qp->max_df_size == qp->q.max_size) {
+ IPCB(head)->flags |= IPSKB_FRAG_PMTU;
+ iph->frag_off = htons(IP_DF);
+ } else {
+ iph->frag_off = 0;
+ }
+
+ ip_send_check(iph);
+
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
qp->q.fragments = NULL;
qp->q.fragments_tail = NULL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c65b93a7b..6bf89a631 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,6 +83,10 @@
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl);
+static int ip_fragment(struct sock *sk, struct sk_buff *skb,
+ unsigned int mtu,
+ int (*output)(struct sock *, struct sk_buff *));
+
/* Generate a checksum for an outgoing IP datagram. */
void ip_send_check(struct iphdr *iph)
{
@@ -91,7 +95,7 @@ void ip_send_check(struct iphdr *iph)
}
EXPORT_SYMBOL(ip_send_check);
-int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
+static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
@@ -168,7 +172,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
}
EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
-static inline int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
+static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = (struct rtable *)dst;
@@ -216,7 +220,8 @@ static inline int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
return -EINVAL;
}
-static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb)
+static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
+ unsigned int mtu)
{
netdev_features_t features;
struct sk_buff *segs;
@@ -224,7 +229,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb)
/* common case: locally created skb or seglen is <= mtu */
if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
- skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb))
+ skb_gso_network_seglen(skb) <= mtu)
return ip_finish_output2(sk, skb);
/* Slowpath - GSO segment length is exceeding the dst MTU.
@@ -248,7 +253,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb)
int err;
segs->next = NULL;
- err = ip_fragment(sk, segs, ip_finish_output2);
+ err = ip_fragment(sk, segs, mtu, ip_finish_output2);
if (err && ret == 0)
ret = err;
@@ -260,6 +265,8 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb)
static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
{
+ unsigned int mtu;
+
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
if (skb_dst(skb)->xfrm) {
@@ -267,11 +274,12 @@ static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
return dst_output_sk(sk, skb);
}
#endif
+ mtu = ip_skb_dst_mtu(skb);
if (skb_is_gso(skb))
- return ip_finish_output_gso(sk, skb);
+ return ip_finish_output_gso(sk, skb, mtu);
- if (skb->len > ip_skb_dst_mtu(skb))
- return ip_fragment(sk, skb, ip_finish_output2);
+ if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
+ return ip_fragment(sk, skb, mtu, ip_finish_output2);
return ip_finish_output2(sk, skb);
}
@@ -478,6 +486,31 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
+static int ip_fragment(struct sock *sk, struct sk_buff *skb,
+ unsigned int mtu,
+ int (*output)(struct sock *, struct sk_buff *))
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ if ((iph->frag_off & htons(IP_DF)) == 0)
+ return ip_do_fragment(sk, skb, output);
+
+ if (unlikely(!skb->ignore_df ||
+ (IPCB(skb)->frag_max_size &&
+ IPCB(skb)->frag_max_size > mtu))) {
+ struct rtable *rt = skb_rtable(skb);
+ struct net_device *dev = rt->dst.dev;
+
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ return ip_do_fragment(sk, skb, output);
+}
+
/*
* This IP datagram is too large to be sent in one piece. Break it up into
* smaller pieces (each of size equal to IP header plus
@@ -485,8 +518,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
* single device frame, and queue such a frame for sending.
*/
-int ip_fragment(struct sock *sk, struct sk_buff *skb,
- int (*output)(struct sock *, struct sk_buff *))
+int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *))
{
struct iphdr *iph;
int ptr;
@@ -507,15 +540,8 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb,
iph = ip_hdr(skb);
mtu = ip_skb_dst_mtu(skb);
- if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
- (IPCB(skb)->frag_max_size &&
- IPCB(skb)->frag_max_size > mtu))) {
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
- kfree_skb(skb);
- return -EMSGSIZE;
- }
+ if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
+ mtu = IPCB(skb)->frag_max_size;
/*
* Setup starting values.
@@ -523,10 +549,6 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb,
hlen = iph->ihl * 4;
mtu = mtu - hlen; /* Size of data space */
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge)
- mtu -= nf_bridge_mtu_reduction(skb);
-#endif
IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
/* When frag_list is given, use it. First, check its validity:
@@ -711,6 +733,9 @@ slow_path:
iph = ip_hdr(skb2);
iph->frag_off = htons((offset >> 3));
+ if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
+ iph->frag_off |= htons(IP_DF);
+
/* ANK: dirty, but effective trick. Upgrade options only if
* the segment to be fragmented was THE FIRST (otherwise,
* options are already fixed) and make it ONCE
@@ -751,7 +776,7 @@ fail:
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
return err;
}
-EXPORT_SYMBOL(ip_fragment);
+EXPORT_SYMBOL(ip_do_fragment);
int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
@@ -1217,11 +1242,9 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
}
while (size > 0) {
- int i;
-
- if (skb_is_gso(skb))
+ if (skb_is_gso(skb)) {
len = size;
- else {
+ } else {
/* Check if the remaining data fits into current packet. */
len = mtu - skb->len;
@@ -1273,15 +1296,10 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
continue;
}
- i = skb_shinfo(skb)->nr_frags;
if (len > size)
len = size;
- if (skb_can_coalesce(skb, i, page, offset)) {
- skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
- } else if (i < MAX_SKB_FRAGS) {
- get_page(page);
- skb_fill_page_desc(skb, i, page, offset, len);
- } else {
+
+ if (skb_append_pagefrags(skb, page, offset, len)) {
err = -EMSGSIZE;
goto error;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 6ddde8999..c3c359ad6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -591,6 +591,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_TRANSPARENT:
case IP_MINTTL:
case IP_NODEFRAG:
+ case IP_BIND_ADDRESS_NO_PORT:
case IP_UNICAST_IF:
case IP_MULTICAST_TTL:
case IP_MULTICAST_ALL:
@@ -741,6 +742,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
inet->nodefrag = val ? 1 : 0;
break;
+ case IP_BIND_ADDRESS_NO_PORT:
+ inet->bind_address_no_port = val ? 1 : 0;
+ break;
case IP_MTU_DISCOVER:
if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
goto e_inval;
@@ -1333,6 +1337,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_NODEFRAG:
val = inet->nodefrag;
break;
+ case IP_BIND_ADDRESS_NO_PORT:
+ val = inet->bind_address_no_port;
+ break;
case IP_MTU_DISCOVER:
val = inet->pmtudisc;
break;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 4c2c3ba4b..626d9e56a 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -586,7 +586,8 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
EXPORT_SYMBOL(ip_tunnel_encap);
static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
- struct rtable *rt, __be16 df)
+ struct rtable *rt, __be16 df,
+ const struct iphdr *inner_iph)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
@@ -603,7 +604,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
if (skb->protocol == htons(ETH_P_IP)) {
if (!skb_is_gso(skb) &&
- (df & htons(IP_DF)) && mtu < pkt_size) {
+ (inner_iph->frag_off & htons(IP_DF)) &&
+ mtu < pkt_size) {
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
return -E2BIG;
@@ -737,7 +739,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
+ if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
ip_rt_put(rt);
goto tx_error;
}
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index ce63ab21b..6a51a71a6 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -98,7 +98,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
return -ENOMEM;
eh = (struct ethhdr *)skb->data;
- if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
+ if (likely(eth_proto_is_802_3(eh->h_proto)))
skb->protocol = eh->h_proto;
else
skb->protocol = htons(ETH_P_802_2);
@@ -165,6 +165,8 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
{
int i;
+ netdev_stats_to_stats64(tot, &dev->stats);
+
for_each_possible_cpu(i) {
const struct pcpu_sw_netstats *tstats =
per_cpu_ptr(dev->tstats, i);
@@ -185,22 +187,6 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
tot->tx_bytes += tx_bytes;
}
- tot->multicast = dev->stats.multicast;
-
- tot->rx_crc_errors = dev->stats.rx_crc_errors;
- tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
- tot->rx_length_errors = dev->stats.rx_length_errors;
- tot->rx_frame_errors = dev->stats.rx_frame_errors;
- tot->rx_errors = dev->stats.rx_errors;
-
- tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
- tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
- tot->tx_dropped = dev->stats.tx_dropped;
- tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
- tot->tx_errors = dev->stats.tx_errors;
-
- tot->collisions = dev->stats.collisions;
-
return tot;
}
EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ff96396eb..254238daf 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -251,7 +251,8 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EINVAL;
}
- p.i_key = p.o_key = p.i_flags = p.o_flags = 0;
+ p.i_key = p.o_key = 0;
+ p.i_flags = p.o_flags = 0;
if (p.iph.ttl)
p.iph.frag_off |= htons(IP_DF);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 65de0684e..61eafc9b4 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -197,11 +197,4 @@ static int __init ipv4_netfilter_init(void)
{
return nf_register_afinfo(&nf_ip_afinfo);
}
-
-static void __exit ipv4_netfilter_fini(void)
-{
- nf_unregister_afinfo(&nf_ip_afinfo);
-}
-
-module_init(ipv4_netfilter_init);
-module_exit(ipv4_netfilter_fini);
+subsys_initcall(ipv4_netfilter_init);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index fb20f3631..2199a5db2 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -195,7 +195,8 @@ config IP_NF_MATCH_ECN
config IP_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
- depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW)
+ depends on NETFILTER_ADVANCED
+ depends on IP_NF_MANGLE || IP_NF_RAW
---help---
This option allows you to match packets whose replies would
go out via the interface the packet came in.
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index a61200754..92305a1a0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -254,9 +254,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
unsigned int verdict = NF_DROP;
const struct arphdr *arp;
- struct arpt_entry *e, *back;
+ struct arpt_entry *e, **jumpstack;
const char *indev, *outdev;
- void *table_base;
+ const void *table_base;
+ unsigned int cpu, stackidx = 0;
const struct xt_table_info *private;
struct xt_action_param acpar;
unsigned int addend;
@@ -270,15 +271,16 @@ unsigned int arpt_do_table(struct sk_buff *skb,
local_bh_disable();
addend = xt_write_recseq_begin();
private = table->private;
+ cpu = smp_processor_id();
/*
* Ensure we load private-> members after we've fetched the base
* pointer.
*/
smp_read_barrier_depends();
- table_base = private->entries[smp_processor_id()];
+ table_base = private->entries;
+ jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
e = get_entry(table_base, private->hook_entry[hook]);
- back = get_entry(table_base, private->underflow[hook]);
acpar.in = state->in;
acpar.out = state->out;
@@ -289,13 +291,15 @@ unsigned int arpt_do_table(struct sk_buff *skb,
arp = arp_hdr(skb);
do {
const struct xt_entry_target *t;
+ struct xt_counters *counter;
if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
e = arpt_next_entry(e);
continue;
}
- ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1);
+ counter = xt_get_this_cpu_counter(&e->counters);
+ ADD_COUNTER(*counter, arp_hdr_len(skb->dev), 1);
t = arpt_get_target_c(e);
@@ -310,18 +314,23 @@ unsigned int arpt_do_table(struct sk_buff *skb,
verdict = (unsigned int)(-v) - 1;
break;
}
- e = back;
- back = get_entry(table_base, back->comefrom);
+ if (stackidx == 0) {
+ e = get_entry(table_base,
+ private->underflow[hook]);
+ } else {
+ e = jumpstack[--stackidx];
+ e = arpt_next_entry(e);
+ }
continue;
}
if (table_base + v
!= arpt_next_entry(e)) {
- /* Save old back ptr in next entry */
- struct arpt_entry *next = arpt_next_entry(e);
- next->comefrom = (void *)back - table_base;
- /* set back pointer to next entry */
- back = next;
+ if (stackidx >= private->stacksize) {
+ verdict = NF_DROP;
+ break;
+ }
+ jumpstack[stackidx++] = e;
}
e = get_entry(table_base, v);
@@ -521,6 +530,10 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
if (ret)
return ret;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
+
t = arpt_get_target(e);
target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
t->u.user.revision);
@@ -538,6 +551,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
err:
module_put(t->u.kernel.target->me);
out:
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -614,6 +629,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
+ xt_percpu_counter_free(e->counters.pcnt);
}
/* Checks and translates the user-supplied table segment (held in
@@ -702,12 +718,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i) {
- if (newinfo->entries[i] && newinfo->entries[i] != entry0)
- memcpy(newinfo->entries[i], entry0, newinfo->size);
- }
-
return ret;
}
@@ -722,14 +732,16 @@ static void get_counters(const struct xt_table_info *t,
seqcount_t *s = &per_cpu(xt_recseq, cpu);
i = 0;
- xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ xt_entry_foreach(iter, t->entries, t->size) {
+ struct xt_counters *tmp;
u64 bcnt, pcnt;
unsigned int start;
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
do {
start = read_seqcount_begin(s);
- bcnt = iter->counters.bcnt;
- pcnt = iter->counters.pcnt;
+ bcnt = tmp->bcnt;
+ pcnt = tmp->pcnt;
} while (read_seqcount_retry(s, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -774,7 +786,7 @@ static int copy_entries_to_user(unsigned int total_size,
if (IS_ERR(counters))
return PTR_ERR(counters);
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
/* ... then copy entire thing ... */
if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
ret = -EFAULT;
@@ -863,16 +875,16 @@ static int compat_table_info(const struct xt_table_info *info,
struct xt_table_info *newinfo)
{
struct arpt_entry *iter;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
int ret;
if (!newinfo || !info)
return -EINVAL;
- /* we dont care about newinfo->entries[] */
+ /* we dont care about newinfo->entries */
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
- loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ loc_cpu_entry = info->entries;
xt_compat_init_offsets(NFPROTO_ARP, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1037,7 +1049,7 @@ static int __do_replace(struct net *net, const char *name,
get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
- loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+ loc_cpu_old_entry = oldinfo->entries;
xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
cleanup_entry(iter);
@@ -1084,8 +1096,7 @@ static int do_replace(struct net *net, const void __user *user,
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1115,7 +1126,7 @@ static int do_replace(struct net *net, const void __user *user,
static int do_add_counters(struct net *net, const void __user *user,
unsigned int len, int compat)
{
- unsigned int i, curcpu;
+ unsigned int i;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1125,7 +1136,6 @@ static int do_add_counters(struct net *net, const void __user *user,
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
- void *loc_cpu_entry;
struct arpt_entry *iter;
unsigned int addend;
#ifdef CONFIG_COMPAT
@@ -1181,12 +1191,13 @@ static int do_add_counters(struct net *net, const void __user *user,
}
i = 0;
- /* Choose the copy that is on our node */
- curcpu = smp_processor_id();
- loc_cpu_entry = private->entries[curcpu];
+
addend = xt_write_recseq_begin();
- xt_entry_foreach(iter, loc_cpu_entry, private->size) {
- ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ xt_entry_foreach(iter, private->entries, private->size) {
+ struct xt_counters *tmp;
+
+ tmp = xt_get_this_cpu_counter(&iter->counters);
+ ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
++i;
}
xt_write_recseq_end(addend);
@@ -1396,7 +1407,7 @@ static int translate_compat_table(const char *name,
newinfo->hook_entry[i] = info->hook_entry[i];
newinfo->underflow[i] = info->underflow[i];
}
- entry1 = newinfo->entries[raw_smp_processor_id()];
+ entry1 = newinfo->entries;
pos = entry1;
size = total_size;
xt_entry_foreach(iter0, entry0, total_size) {
@@ -1416,9 +1427,17 @@ static int translate_compat_table(const char *name,
i = 0;
xt_entry_foreach(iter1, entry1, newinfo->size) {
+ iter1->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(iter1->counters.pcnt)) {
+ ret = -ENOMEM;
+ break;
+ }
+
ret = check_target(iter1, name);
- if (ret != 0)
+ if (ret != 0) {
+ xt_percpu_counter_free(iter1->counters.pcnt);
break;
+ }
++i;
if (strcmp(arpt_get_target(iter1)->u.user.name,
XT_ERROR_TARGET) == 0)
@@ -1448,11 +1467,6 @@ static int translate_compat_table(const char *name,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
-
*pinfo = newinfo;
*pentry0 = entry1;
xt_free_table_info(info);
@@ -1511,8 +1525,7 @@ static int compat_do_replace(struct net *net, void __user *user,
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
@@ -1609,7 +1622,6 @@ static int compat_copy_entries_to_user(unsigned int total_size,
void __user *pos;
unsigned int size;
int ret = 0;
- void *loc_cpu_entry;
unsigned int i = 0;
struct arpt_entry *iter;
@@ -1617,11 +1629,9 @@ static int compat_copy_entries_to_user(unsigned int total_size,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy on our node/cpu */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
pos = userptr;
size = total_size;
- xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ xt_entry_foreach(iter, private->entries, total_size) {
ret = compat_copy_entry_to_user(iter, &pos,
&size, counters, i++);
if (ret != 0)
@@ -1790,8 +1800,7 @@ struct xt_table *arpt_register_table(struct net *net,
goto out;
}
- /* choose the copy on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(newinfo, loc_cpu_entry, repl);
@@ -1822,7 +1831,7 @@ void arpt_unregister_table(struct xt_table *table)
private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
cleanup_entry(iter);
if (private->number > private->initial_entries)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2d0e265fe..6c72fbb7b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -254,15 +254,13 @@ static void trace_packet(const struct sk_buff *skb,
const struct xt_table_info *private,
const struct ipt_entry *e)
{
- const void *table_base;
const struct ipt_entry *root;
const char *hookname, *chainname, *comment;
const struct ipt_entry *iter;
unsigned int rulenum = 0;
struct net *net = dev_net(in ? in : out);
- table_base = private->entries[smp_processor_id()];
- root = get_entry(table_base, private->hook_entry[hook]);
+ root = get_entry(private->entries, private->hook_entry[hook]);
hookname = chainname = hooknames[hook];
comment = comments[NF_IP_TRACE_COMMENT_RULE];
@@ -331,7 +329,7 @@ ipt_do_table(struct sk_buff *skb,
* pointer.
*/
smp_read_barrier_depends();
- table_base = private->entries[cpu];
+ table_base = private->entries;
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
stackptr = per_cpu_ptr(private->stackptr, cpu);
origptr = *stackptr;
@@ -345,6 +343,7 @@ ipt_do_table(struct sk_buff *skb,
do {
const struct xt_entry_target *t;
const struct xt_entry_match *ematch;
+ struct xt_counters *counter;
IP_NF_ASSERT(e);
if (!ip_packet_match(ip, indev, outdev,
@@ -361,7 +360,8 @@ ipt_do_table(struct sk_buff *skb,
goto no_match;
}
- ADD_COUNTER(e->counters, skb->len, 1);
+ counter = xt_get_this_cpu_counter(&e->counters);
+ ADD_COUNTER(*counter, skb->len, 1);
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -665,6 +665,10 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
if (ret)
return ret;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
+
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -691,6 +695,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
ret = check_target(e, net, name);
if (ret)
goto err;
+
return 0;
err:
module_put(t->u.kernel.target->me);
@@ -700,6 +705,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
break;
cleanup_match(ematch, net);
}
+
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -784,6 +792,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
+ xt_percpu_counter_free(e->counters.pcnt);
}
/* Checks and translates the user-supplied table segment (held in
@@ -866,12 +875,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i) {
- if (newinfo->entries[i] && newinfo->entries[i] != entry0)
- memcpy(newinfo->entries[i], entry0, newinfo->size);
- }
-
return ret;
}
@@ -887,14 +890,16 @@ get_counters(const struct xt_table_info *t,
seqcount_t *s = &per_cpu(xt_recseq, cpu);
i = 0;
- xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ xt_entry_foreach(iter, t->entries, t->size) {
+ struct xt_counters *tmp;
u64 bcnt, pcnt;
unsigned int start;
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
do {
start = read_seqcount_begin(s);
- bcnt = iter->counters.bcnt;
- pcnt = iter->counters.pcnt;
+ bcnt = tmp->bcnt;
+ pcnt = tmp->pcnt;
} while (read_seqcount_retry(s, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -939,11 +944,7 @@ copy_entries_to_user(unsigned int total_size,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy that is on our node/cpu, ...
- * This choice is lazy (because current thread is
- * allowed to migrate to another cpu)
- */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
ret = -EFAULT;
goto free_counters;
@@ -1051,16 +1052,16 @@ static int compat_table_info(const struct xt_table_info *info,
struct xt_table_info *newinfo)
{
struct ipt_entry *iter;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
int ret;
if (!newinfo || !info)
return -EINVAL;
- /* we dont care about newinfo->entries[] */
+ /* we dont care about newinfo->entries */
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
- loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ loc_cpu_entry = info->entries;
xt_compat_init_offsets(AF_INET, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1181,7 +1182,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_table *t;
struct xt_table_info *oldinfo;
struct xt_counters *counters;
- void *loc_cpu_old_entry;
struct ipt_entry *iter;
ret = 0;
@@ -1224,8 +1224,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
- loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
- xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
cleanup_entry(iter, net);
xt_free_table_info(oldinfo);
@@ -1271,8 +1270,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1303,7 +1301,7 @@ static int
do_add_counters(struct net *net, const void __user *user,
unsigned int len, int compat)
{
- unsigned int i, curcpu;
+ unsigned int i;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1313,7 +1311,6 @@ do_add_counters(struct net *net, const void __user *user,
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
- void *loc_cpu_entry;
struct ipt_entry *iter;
unsigned int addend;
#ifdef CONFIG_COMPAT
@@ -1369,12 +1366,12 @@ do_add_counters(struct net *net, const void __user *user,
}
i = 0;
- /* Choose the copy that is on our node */
- curcpu = smp_processor_id();
- loc_cpu_entry = private->entries[curcpu];
addend = xt_write_recseq_begin();
- xt_entry_foreach(iter, loc_cpu_entry, private->size) {
- ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ xt_entry_foreach(iter, private->entries, private->size) {
+ struct xt_counters *tmp;
+
+ tmp = xt_get_this_cpu_counter(&iter->counters);
+ ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
++i;
}
xt_write_recseq_end(addend);
@@ -1444,7 +1441,6 @@ static int
compat_find_calc_match(struct xt_entry_match *m,
const char *name,
const struct ipt_ip *ip,
- unsigned int hookmask,
int *size)
{
struct xt_match *match;
@@ -1513,8 +1509,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
entry_offset = (void *)e - (void *)base;
j = 0;
xt_ematch_foreach(ematch, e) {
- ret = compat_find_calc_match(ematch, name,
- &e->ip, e->comefrom, &off);
+ ret = compat_find_calc_match(ematch, name, &e->ip, &off);
if (ret != 0)
goto release_matches;
++j;
@@ -1610,6 +1605,10 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
unsigned int j;
int ret = 0;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
+
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -1634,6 +1633,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
break;
cleanup_match(ematch, net);
}
+
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -1718,7 +1720,7 @@ translate_compat_table(struct net *net,
newinfo->hook_entry[i] = info->hook_entry[i];
newinfo->underflow[i] = info->underflow[i];
}
- entry1 = newinfo->entries[raw_smp_processor_id()];
+ entry1 = newinfo->entries;
pos = entry1;
size = total_size;
xt_entry_foreach(iter0, entry0, total_size) {
@@ -1770,11 +1772,6 @@ translate_compat_table(struct net *net,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
-
*pinfo = newinfo;
*pentry0 = entry1;
xt_free_table_info(info);
@@ -1821,8 +1818,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1893,7 +1889,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
void __user *pos;
unsigned int size;
int ret = 0;
- const void *loc_cpu_entry;
unsigned int i = 0;
struct ipt_entry *iter;
@@ -1901,14 +1896,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy that is on our node/cpu, ...
- * This choice is lazy (because current thread is
- * allowed to migrate to another cpu)
- */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
pos = userptr;
size = total_size;
- xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ xt_entry_foreach(iter, private->entries, total_size) {
ret = compat_copy_entry_to_user(iter, &pos,
&size, counters, i++);
if (ret != 0)
@@ -2083,8 +2073,7 @@ struct xt_table *ipt_register_table(struct net *net,
goto out;
}
- /* choose the copy on our node/cpu, but dont care about preemption */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(net, newinfo, loc_cpu_entry, repl);
@@ -2115,7 +2104,7 @@ void ipt_unregister_table(struct net *net, struct xt_table *table)
private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
cleanup_entry(iter, net);
if (private->number > private->initial_entries)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 771ab3d01..45cb16a6a 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -367,6 +367,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
struct clusterip_config *config;
int ret;
+ if (par->nft_compat) {
+ pr_err("cannot use CLUSTERIP target from nftables compat\n");
+ return -EOPNOTSUPP;
+ }
+
if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index e9e677930..95ea633e8 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -18,7 +18,7 @@
#include <net/netfilter/nf_conntrack_synproxy.h>
static struct iphdr *
-synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr)
+synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct iphdr *iph;
@@ -220,13 +220,14 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
nth->ack_seq = th->ack_seq;
tcp_flag_word(nth) = TCP_FLAG_ACK;
nth->doff = tcp_hdr_size / 4;
- nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->window = htons(ntohs(th->window) >> opts->wscale);
nth->check = 0;
nth->urg_ptr = 0;
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+ synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
}
static bool
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 4bfaedf9b..8618fd150 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -40,7 +40,7 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
struct net *net = dev_net(dev);
int ret __maybe_unused;
- if (fib_lookup(net, fl4, &res))
+ if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
return false;
if (res.type != RTN_UNICAST) {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e1f3b911d..da5d483e2 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -298,6 +298,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2),
SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT),
SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE),
+ SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE),
+ SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f45f2a12f..e681b852c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -457,12 +457,9 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
}
#define IP_IDENTS_SZ 2048u
-struct ip_ident_bucket {
- atomic_t id;
- u32 stamp32;
-};
-static struct ip_ident_bucket *ip_idents __read_mostly;
+static atomic_t *ip_idents __read_mostly;
+static u32 *ip_tstamps __read_mostly;
/* In order to protect privacy, we add a perturbation to identifiers
* if one generator is seldom used. This makes hard for an attacker
@@ -470,15 +467,16 @@ static struct ip_ident_bucket *ip_idents __read_mostly;
*/
u32 ip_idents_reserve(u32 hash, int segs)
{
- struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
- u32 old = ACCESS_ONCE(bucket->stamp32);
+ u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
+ atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
+ u32 old = ACCESS_ONCE(*p_tstamp);
u32 now = (u32)jiffies;
u32 delta = 0;
- if (old != now && cmpxchg(&bucket->stamp32, old, now) == old)
+ if (old != now && cmpxchg(p_tstamp, old, now) == old)
delta = prandom_u32_max(now - old);
- return atomic_add_return(segs + delta, &bucket->id) - segs;
+ return atomic_add_return(segs + delta, p_id) - segs;
}
EXPORT_SYMBOL(ip_idents_reserve);
@@ -749,7 +747,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
if (!(n->nud_state & NUD_VALID)) {
neigh_event_send(n, NULL);
} else {
- if (fib_lookup(net, fl4, &res) == 0) {
+ if (fib_lookup(net, fl4, &res, 0) == 0) {
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, new_gw,
@@ -977,7 +975,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
return;
rcu_read_lock();
- if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
+ if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
@@ -1188,7 +1186,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
fl4.flowi4_mark = skb->mark;
rcu_read_lock();
- if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
+ if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
else
src = inet_select_addr(rt->dst.dev,
@@ -1718,7 +1716,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.daddr = daddr;
fl4.saddr = saddr;
- err = fib_lookup(net, &fl4, &res);
+ err = fib_lookup(net, &fl4, &res, 0);
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
err = -EHOSTUNREACH;
@@ -2097,7 +2095,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
goto out;
}
if (ipv4_is_local_multicast(fl4->daddr) ||
- ipv4_is_lbcast(fl4->daddr)) {
+ ipv4_is_lbcast(fl4->daddr) ||
+ fl4->flowi4_proto == IPPROTO_IGMP) {
if (!fl4->saddr)
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_LINK);
@@ -2124,7 +2123,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
goto make_route;
}
- if (fib_lookup(net, fl4, &res)) {
+ if (fib_lookup(net, fl4, &res, 0)) {
res.fi = NULL;
res.table = NULL;
if (fl4->flowi4_oif) {
@@ -2177,7 +2176,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
if (!res.prefixlen &&
res.table->tb_num_default > 1 &&
res.type == RTN_UNICAST && !fl4->flowi4_oif)
- fib_select_default(&res);
+ fib_select_default(fl4, &res);
if (!fl4->saddr)
fl4->saddr = FIB_RES_PREFSRC(net, res);
@@ -2742,6 +2741,10 @@ int __init ip_rt_init(void)
prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+ ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
+ if (!ip_tstamps)
+ panic("IP: failed to allocate ip_tstamps\n");
+
for_each_possible_cpu(cpu) {
struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index df849e5a1..d70b1f603 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -219,9 +219,9 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
}
EXPORT_SYMBOL_GPL(__cookie_v4_check);
-static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst)
+struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct sock *child;
@@ -235,7 +235,7 @@ static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
}
return child;
}
-
+EXPORT_SYMBOL(tcp_get_cookie_sock);
/*
* when syncookies are in effect and tcp timestamps are enabled we stored
@@ -391,7 +391,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
- ret = get_cookie_sock(sk, skb, req, &rt->dst);
+ ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
/* ip_queue_xmit() depends on our flow being setup
* Normal sockets get it right from inet_csk_route_child_sock()
*/
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c3852a7ff..0330ab2e2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -45,7 +45,13 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
/* Update system visible IP port range */
static void set_local_port_range(struct net *net, int range[2])
{
+ bool same_parity = !((range[0] ^ range[1]) & 1);
+
write_seqlock(&net->ipv4.ip_local_ports.lock);
+ if (same_parity && !net->ipv4.ip_local_ports.warned) {
+ net->ipv4.ip_local_ports.warned = true;
+ pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
+ }
net->ipv4.ip_local_ports.range[0] = range[0];
net->ipv4.ip_local_ports.range[1] = range[1];
write_sequnlock(&net->ipv4.ip_local_ports.lock);
@@ -702,7 +708,7 @@ static struct ctl_table ipv4_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = &one,
.extra2 = &gso_max_segs,
},
{
@@ -821,6 +827,13 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "tcp_ecn_fallback",
+ .data = &init_net.ipv4.sysctl_tcp_ecn_fallback,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "ip_local_port_range",
.maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
.data = &init_net.ipv4.ip_local_ports.range,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ca6faeb44..6fa7e2ebc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -695,8 +695,9 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
struct tcp_splice_state *tss = rd_desc->arg.data;
int ret;
- ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len),
- tss->flags);
+ ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
+ min(rd_desc->count, len), tss->flags,
+ skb_socket_splice);
if (ret > 0)
rd_desc->count -= ret;
return ret;
@@ -779,7 +780,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
ret = -EAGAIN;
break;
}
- sk_wait_data(sk, &timeo);
+ sk_wait_data(sk, &timeo, NULL);
if (signal_pending(current)) {
ret = sock_intr_errno(timeo);
break;
@@ -809,16 +810,28 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
EXPORT_SYMBOL(tcp_splice_read);
-struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
+struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
+ bool force_schedule)
{
struct sk_buff *skb;
/* The TCP header must be at least 32-bit aligned. */
size = ALIGN(size, 4);
+ if (unlikely(tcp_under_memory_pressure(sk)))
+ sk_mem_reclaim_partial(sk);
+
skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
- if (skb) {
- if (sk_wmem_schedule(sk, skb->truesize)) {
+ if (likely(skb)) {
+ bool mem_scheduled;
+
+ if (force_schedule) {
+ mem_scheduled = true;
+ sk_forced_mem_schedule(sk, skb->truesize);
+ } else {
+ mem_scheduled = sk_wmem_schedule(sk, skb->truesize);
+ }
+ if (likely(mem_scheduled)) {
skb_reserve(skb, sk->sk_prot->max_header);
/*
* Make sure that we have exactly size bytes
@@ -908,7 +921,8 @@ new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
- skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+ skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
+ skb_queue_empty(&sk->sk_write_queue));
if (!skb)
goto wait_for_memory;
@@ -987,6 +1001,9 @@ do_error:
if (copied)
goto out;
out_err:
+ /* make sure we wake any epoll edge trigger waiter */
+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+ sk->sk_write_space(sk);
return sk_stream_error(sk, flags, err);
}
@@ -1144,7 +1161,8 @@ new_segment:
skb = sk_stream_alloc_skb(sk,
select_size(sk, sg),
- sk->sk_allocation);
+ sk->sk_allocation,
+ skb_queue_empty(&sk->sk_write_queue));
if (!skb)
goto wait_for_memory;
@@ -1275,6 +1293,9 @@ do_error:
goto out;
out_err:
err = sk_stream_error(sk, flags, err);
+ /* make sure we wake any epoll edge trigger waiter */
+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+ sk->sk_write_space(sk);
release_sock(sk);
return err;
}
@@ -1554,7 +1575,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int target; /* Read at least this many bytes */
long timeo;
struct task_struct *user_recv = NULL;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
u32 urg_hole = 0;
if (unlikely(flags & MSG_ERRQUEUE))
@@ -1614,7 +1635,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
/* Next get a buffer. */
+ last = skb_peek_tail(&sk->sk_receive_queue);
skb_queue_walk(&sk->sk_receive_queue, skb) {
+ last = skb;
/* Now that we have two receive queues this
* shouldn't happen.
*/
@@ -1733,8 +1756,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
/* Do not sleep, just process backlog. */
release_sock(sk);
lock_sock(sk);
- } else
- sk_wait_data(sk, &timeo);
+ } else {
+ sk_wait_data(sk, &timeo, last);
+ }
if (user_recv) {
int chunk;
@@ -2580,6 +2604,13 @@ stealth_integrity_out_1:
icsk->icsk_syn_retries = val;
break;
+ case TCP_SAVE_SYN:
+ if (val < 0 || val > 1)
+ err = -EINVAL;
+ else
+ tp->save_syn = val;
+ break;
+
case TCP_LINGER2:
if (val < 0)
tp->linger2 = -1;
@@ -2708,13 +2739,15 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
/* Return information about state of tcp endpoint in API format. */
void tcp_get_info(struct sock *sk, struct tcp_info *info)
{
- const struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
unsigned int start;
u32 rate;
memset(info, 0, sizeof(*info));
+ if (sk->sk_type != SOCK_STREAM)
+ return;
info->tcpi_state = sk->sk_state;
info->tcpi_ca_state = icsk->icsk_ca_state;
@@ -2784,6 +2817,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_bytes_acked = tp->bytes_acked;
info->tcpi_bytes_received = tp->bytes_received;
} while (u64_stats_fetch_retry_irq(&tp->syncp, start));
+ info->tcpi_segs_out = tp->segs_out;
+ info->tcpi_segs_in = tp->segs_in;
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -2933,6 +2968,42 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_NOTSENT_LOWAT:
val = tp->notsent_lowat;
break;
+ case TCP_SAVE_SYN:
+ val = tp->save_syn;
+ break;
+ case TCP_SAVED_SYN: {
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ lock_sock(sk);
+ if (tp->saved_syn) {
+ if (len < tp->saved_syn[0]) {
+ if (put_user(tp->saved_syn[0], optlen)) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+ release_sock(sk);
+ return -EINVAL;
+ }
+ len = tp->saved_syn[0];
+ if (put_user(len, optlen)) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+ if (copy_to_user(optval, tp->saved_syn + 1, len)) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+ tcp_saved_syn_free(tp);
+ release_sock(sk);
+ } else {
+ release_sock(sk);
+ len = 0;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ }
+ return 0;
+ }
default:
return -ENOPROTOOPT;
}
@@ -3137,11 +3208,12 @@ __setup("thash_entries=", set_thash_entries);
static void __init tcp_init_mem(void)
{
- unsigned long limit = nr_free_buffer_pages() / 8;
+ unsigned long limit = nr_free_buffer_pages() / 16;
+
limit = max(limit, 128UL);
- sysctl_tcp_mem[0] = limit / 4 * 3;
- sysctl_tcp_mem[1] = limit;
- sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
+ sysctl_tcp_mem[0] = limit / 4 * 3; /* 4.68 % */
+ sysctl_tcp_mem[1] = limit; /* 6.25 % */
+ sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */
}
void __init tcp_init(void)
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
new file mode 100644
index 000000000..8c6fd3d5e
--- /dev/null
+++ b/net/ipv4/tcp_cdg.c
@@ -0,0 +1,433 @@
+/*
+ * CAIA Delay-Gradient (CDG) congestion control
+ *
+ * This implementation is based on the paper:
+ * D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using
+ * delay gradients." In IFIP Networking, pages 328-341. Springer, 2011.
+ *
+ * Scavenger traffic (Less-than-Best-Effort) should disable coexistence
+ * heuristics using parameters use_shadow=0 and use_ineff=0.
+ *
+ * Parameters window, backoff_beta, and backoff_factor are crucial for
+ * throughput and delay. Future work is needed to determine better defaults,
+ * and to provide guidelines for use in different environments/contexts.
+ *
+ * Except for window, knobs are configured via /sys/module/tcp_cdg/parameters/.
+ * Parameter window is only configurable when loading tcp_cdg as a module.
+ *
+ * Notable differences from paper/FreeBSD:
+ * o Using Hybrid Slow start and Proportional Rate Reduction.
+ * o Add toggle for shadow window mechanism. Suggested by David Hayes.
+ * o Add toggle for non-congestion loss tolerance.
+ * o Scaling parameter G is changed to a backoff factor;
+ * conversion is given by: backoff_factor = 1000/(G * window).
+ * o Limit shadow window to 2 * cwnd, or to cwnd when application limited.
+ * o More accurate e^-x.
+ */
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/module.h>
+#include <net/tcp.h>
+
+#define HYSTART_ACK_TRAIN 1
+#define HYSTART_DELAY 2
+
+static int window __read_mostly = 8;
+static unsigned int backoff_beta __read_mostly = 0.7071 * 1024; /* sqrt 0.5 */
+static unsigned int backoff_factor __read_mostly = 42;
+static unsigned int hystart_detect __read_mostly = 3;
+static unsigned int use_ineff __read_mostly = 5;
+static bool use_shadow __read_mostly = true;
+static bool use_tolerance __read_mostly;
+
+module_param(window, int, 0444);
+MODULE_PARM_DESC(window, "gradient window size (power of two <= 256)");
+module_param(backoff_beta, uint, 0644);
+MODULE_PARM_DESC(backoff_beta, "backoff beta (0-1024)");
+module_param(backoff_factor, uint, 0644);
+MODULE_PARM_DESC(backoff_factor, "backoff probability scale factor");
+module_param(hystart_detect, uint, 0644);
+MODULE_PARM_DESC(hystart_detect, "use Hybrid Slow start "
+ "(0: disabled, 1: ACK train, 2: delay threshold, 3: both)");
+module_param(use_ineff, uint, 0644);
+MODULE_PARM_DESC(use_ineff, "use ineffectual backoff detection (threshold)");
+module_param(use_shadow, bool, 0644);
+MODULE_PARM_DESC(use_shadow, "use shadow window heuristic");
+module_param(use_tolerance, bool, 0644);
+MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic");
+
+struct minmax {
+ union {
+ struct {
+ s32 min;
+ s32 max;
+ };
+ u64 v64;
+ };
+};
+
+enum cdg_state {
+ CDG_UNKNOWN = 0,
+ CDG_NONFULL = 1,
+ CDG_FULL = 2,
+ CDG_BACKOFF = 3,
+};
+
+struct cdg {
+ struct minmax rtt;
+ struct minmax rtt_prev;
+ struct minmax *gradients;
+ struct minmax gsum;
+ bool gfilled;
+ u8 tail;
+ u8 state;
+ u8 delack;
+ u32 rtt_seq;
+ u32 undo_cwnd;
+ u32 shadow_wnd;
+ u16 backoff_cnt;
+ u16 sample_cnt;
+ s32 delay_min;
+ u32 last_ack;
+ u32 round_start;
+};
+
+/**
+ * nexp_u32 - negative base-e exponential
+ * @ux: x in units of micro
+ *
+ * Returns exp(ux * -1e-6) * U32_MAX.
+ */
+static u32 __pure nexp_u32(u32 ux)
+{
+ static const u16 v[] = {
+ /* exp(-x)*65536-1 for x = 0, 0.000256, 0.000512, ... */
+ 65535,
+ 65518, 65501, 65468, 65401, 65267, 65001, 64470, 63422,
+ 61378, 57484, 50423, 38795, 22965, 8047, 987, 14,
+ };
+ u32 msb = ux >> 8;
+ u32 res;
+ int i;
+
+ /* Cut off when ux >= 2^24 (actual result is <= 222/U32_MAX). */
+ if (msb > U16_MAX)
+ return 0;
+
+ /* Scale first eight bits linearly: */
+ res = U32_MAX - (ux & 0xff) * (U32_MAX / 1000000);
+
+ /* Obtain e^(x + y + ...) by computing e^x * e^y * ...: */
+ for (i = 1; msb; i++, msb >>= 1) {
+ u32 y = v[i & -(msb & 1)] + U32_C(1);
+
+ res = ((u64)res * y) >> 16;
+ }
+
+ return res;
+}
+
+/* Based on the HyStart algorithm (by Ha et al.) that is implemented in
+ * tcp_cubic. Differences/experimental changes:
+ * o Using Hayes' delayed ACK filter.
+ * o Using a usec clock for the ACK train.
+ * o Reset ACK train when application limited.
+ * o Invoked at any cwnd (i.e. also when cwnd < 16).
+ * o Invoked only when cwnd < ssthresh (i.e. not when cwnd == ssthresh).
+ */
+static void tcp_cdg_hystart_update(struct sock *sk)
+{
+ struct cdg *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ ca->delay_min = min_not_zero(ca->delay_min, ca->rtt.min);
+ if (ca->delay_min == 0)
+ return;
+
+ if (hystart_detect & HYSTART_ACK_TRAIN) {
+ u32 now_us = div_u64(local_clock(), NSEC_PER_USEC);
+
+ if (ca->last_ack == 0 || !tcp_is_cwnd_limited(sk)) {
+ ca->last_ack = now_us;
+ ca->round_start = now_us;
+ } else if (before(now_us, ca->last_ack + 3000)) {
+ u32 base_owd = max(ca->delay_min / 2U, 125U);
+
+ ca->last_ack = now_us;
+ if (after(now_us, ca->round_start + base_owd)) {
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINDETECT);
+ NET_ADD_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINCWND,
+ tp->snd_cwnd);
+ tp->snd_ssthresh = tp->snd_cwnd;
+ return;
+ }
+ }
+ }
+
+ if (hystart_detect & HYSTART_DELAY) {
+ if (ca->sample_cnt < 8) {
+ ca->sample_cnt++;
+ } else {
+ s32 thresh = max(ca->delay_min + ca->delay_min / 8U,
+ 125U);
+
+ if (ca->rtt.min > thresh) {
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYDETECT);
+ NET_ADD_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYCWND,
+ tp->snd_cwnd);
+ tp->snd_ssthresh = tp->snd_cwnd;
+ }
+ }
+ }
+}
+
+static s32 tcp_cdg_grad(struct cdg *ca)
+{
+ s32 gmin = ca->rtt.min - ca->rtt_prev.min;
+ s32 gmax = ca->rtt.max - ca->rtt_prev.max;
+ s32 grad;
+
+ if (ca->gradients) {
+ ca->gsum.min += gmin - ca->gradients[ca->tail].min;
+ ca->gsum.max += gmax - ca->gradients[ca->tail].max;
+ ca->gradients[ca->tail].min = gmin;
+ ca->gradients[ca->tail].max = gmax;
+ ca->tail = (ca->tail + 1) & (window - 1);
+ gmin = ca->gsum.min;
+ gmax = ca->gsum.max;
+ }
+
+ /* We keep sums to ignore gradients during cwnd reductions;
+ * the paper's smoothed gradients otherwise simplify to:
+ * (rtt_latest - rtt_oldest) / window.
+ *
+ * We also drop division by window here.
+ */
+ grad = gmin > 0 ? gmin : gmax;
+
+ /* Extrapolate missing values in gradient window: */
+ if (!ca->gfilled) {
+ if (!ca->gradients && window > 1)
+ grad *= window; /* Memory allocation failed. */
+ else if (ca->tail == 0)
+ ca->gfilled = true;
+ else
+ grad = (grad * window) / (int)ca->tail;
+ }
+
+ /* Backoff was effectual: */
+ if (gmin <= -32 || gmax <= -32)
+ ca->backoff_cnt = 0;
+
+ if (use_tolerance) {
+ /* Reduce small variations to zero: */
+ gmin = DIV_ROUND_CLOSEST(gmin, 64);
+ gmax = DIV_ROUND_CLOSEST(gmax, 64);
+
+ if (gmin > 0 && gmax <= 0)
+ ca->state = CDG_FULL;
+ else if ((gmin > 0 && gmax > 0) || gmax < 0)
+ ca->state = CDG_NONFULL;
+ }
+ return grad;
+}
+
+static bool tcp_cdg_backoff(struct sock *sk, u32 grad)
+{
+ struct cdg *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (prandom_u32() <= nexp_u32(grad * backoff_factor))
+ return false;
+
+ if (use_ineff) {
+ ca->backoff_cnt++;
+ if (ca->backoff_cnt > use_ineff)
+ return false;
+ }
+
+ ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd);
+ ca->state = CDG_BACKOFF;
+ tcp_enter_cwr(sk);
+ return true;
+}
+
+/* Not called in CWR or Recovery state. */
+static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+ struct cdg *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 prior_snd_cwnd;
+ u32 incr;
+
+ if (tp->snd_cwnd < tp->snd_ssthresh && hystart_detect)
+ tcp_cdg_hystart_update(sk);
+
+ if (after(ack, ca->rtt_seq) && ca->rtt.v64) {
+ s32 grad = 0;
+
+ if (ca->rtt_prev.v64)
+ grad = tcp_cdg_grad(ca);
+ ca->rtt_seq = tp->snd_nxt;
+ ca->rtt_prev = ca->rtt;
+ ca->rtt.v64 = 0;
+ ca->last_ack = 0;
+ ca->sample_cnt = 0;
+
+ if (grad > 0 && tcp_cdg_backoff(sk, grad))
+ return;
+ }
+
+ if (!tcp_is_cwnd_limited(sk)) {
+ ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd);
+ return;
+ }
+
+ prior_snd_cwnd = tp->snd_cwnd;
+ tcp_reno_cong_avoid(sk, ack, acked);
+
+ incr = tp->snd_cwnd - prior_snd_cwnd;
+ ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr);
+}
+
+static void tcp_cdg_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
+{
+ struct cdg *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (rtt_us <= 0)
+ return;
+
+ /* A heuristic for filtering delayed ACKs, adapted from:
+ * D.A. Hayes. "Timing enhancements to the FreeBSD kernel to support
+ * delay and rate based TCP mechanisms." TR 100219A. CAIA, 2010.
+ */
+ if (tp->sacked_out == 0) {
+ if (num_acked == 1 && ca->delack) {
+ /* A delayed ACK is only used for the minimum if it is
+ * provenly lower than an existing non-zero minimum.
+ */
+ ca->rtt.min = min(ca->rtt.min, rtt_us);
+ ca->delack--;
+ return;
+ } else if (num_acked > 1 && ca->delack < 5) {
+ ca->delack++;
+ }
+ }
+
+ ca->rtt.min = min_not_zero(ca->rtt.min, rtt_us);
+ ca->rtt.max = max(ca->rtt.max, rtt_us);
+}
+
+static u32 tcp_cdg_ssthresh(struct sock *sk)
+{
+ struct cdg *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ ca->undo_cwnd = tp->snd_cwnd;
+
+ if (ca->state == CDG_BACKOFF)
+ return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
+
+ if (ca->state == CDG_NONFULL && use_tolerance)
+ return tp->snd_cwnd;
+
+ ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd);
+ if (use_shadow)
+ return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1);
+ return max(2U, tp->snd_cwnd >> 1);
+}
+
+static u32 tcp_cdg_undo_cwnd(struct sock *sk)
+{
+ struct cdg *ca = inet_csk_ca(sk);
+
+ return max(tcp_sk(sk)->snd_cwnd, ca->undo_cwnd);
+}
+
+static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
+{
+ struct cdg *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct minmax *gradients;
+
+ switch (ev) {
+ case CA_EVENT_CWND_RESTART:
+ gradients = ca->gradients;
+ if (gradients)
+ memset(gradients, 0, window * sizeof(gradients[0]));
+ memset(ca, 0, sizeof(*ca));
+
+ ca->gradients = gradients;
+ ca->rtt_seq = tp->snd_nxt;
+ ca->shadow_wnd = tp->snd_cwnd;
+ break;
+ case CA_EVENT_COMPLETE_CWR:
+ ca->state = CDG_UNKNOWN;
+ ca->rtt_seq = tp->snd_nxt;
+ ca->rtt_prev = ca->rtt;
+ ca->rtt.v64 = 0;
+ break;
+ default:
+ break;
+ }
+}
+
+static void tcp_cdg_init(struct sock *sk)
+{
+ struct cdg *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ /* We silently fall back to window = 1 if allocation fails. */
+ if (window > 1)
+ ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
+ GFP_NOWAIT | __GFP_NOWARN);
+ ca->rtt_seq = tp->snd_nxt;
+ ca->shadow_wnd = tp->snd_cwnd;
+}
+
+static void tcp_cdg_release(struct sock *sk)
+{
+ struct cdg *ca = inet_csk_ca(sk);
+
+ kfree(ca->gradients);
+}
+
+struct tcp_congestion_ops tcp_cdg __read_mostly = {
+ .cong_avoid = tcp_cdg_cong_avoid,
+ .cwnd_event = tcp_cdg_cwnd_event,
+ .pkts_acked = tcp_cdg_acked,
+ .undo_cwnd = tcp_cdg_undo_cwnd,
+ .ssthresh = tcp_cdg_ssthresh,
+ .release = tcp_cdg_release,
+ .init = tcp_cdg_init,
+ .owner = THIS_MODULE,
+ .name = "cdg",
+};
+
+static int __init tcp_cdg_register(void)
+{
+ if (backoff_beta > 1024 || window < 1 || window > 256)
+ return -ERANGE;
+ if (!is_power_of_2(window))
+ return -EINVAL;
+
+ BUILD_BUG_ON(sizeof(struct cdg) > ICSK_CA_PRIV_SIZE);
+ tcp_register_congestion_control(&tcp_cdg);
+ return 0;
+}
+
+static void __exit tcp_cdg_unregister(void)
+{
+ tcp_unregister_congestion_control(&tcp_cdg);
+}
+
+module_init(tcp_cdg_register);
+module_exit(tcp_cdg_unregister);
+MODULE_AUTHOR("Kenneth Klette Jonassen");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TCP CDG");
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 4c41c1287..7092a61c4 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -204,20 +204,26 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
/* Expired RTT */
if (!before(tp->snd_una, ca->next_seq)) {
- /* For avoiding denominator == 1. */
- if (ca->acked_bytes_total == 0)
- ca->acked_bytes_total = 1;
+ u64 bytes_ecn = ca->acked_bytes_ecn;
+ u32 alpha = ca->dctcp_alpha;
/* alpha = (1 - g) * alpha + g * F */
- ca->dctcp_alpha = ca->dctcp_alpha -
- (ca->dctcp_alpha >> dctcp_shift_g) +
- (ca->acked_bytes_ecn << (10U - dctcp_shift_g)) /
- ca->acked_bytes_total;
- if (ca->dctcp_alpha > DCTCP_MAX_ALPHA)
- /* Clamp dctcp_alpha to max. */
- ca->dctcp_alpha = DCTCP_MAX_ALPHA;
+ alpha -= alpha >> dctcp_shift_g;
+ if (bytes_ecn) {
+ /* If dctcp_shift_g == 1, a 32bit value would overflow
+ * after 8 Mbytes.
+ */
+ bytes_ecn <<= (10 - dctcp_shift_g);
+ do_div(bytes_ecn, max(1U, ca->acked_bytes_total));
+ alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA);
+ }
+ /* dctcp_alpha can be read from dctcp_get_info() without
+ * synchro, so we ask compiler to not use dctcp_alpha
+ * as a temporary variable in prior operations.
+ */
+ WRITE_ONCE(ca->dctcp_alpha, alpha);
dctcp_reset(tp, ca);
}
}
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 79b34a0f4..479f34946 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -19,13 +19,14 @@
static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
void *_info)
{
- const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_info *info = _info;
if (sk->sk_state == TCP_LISTEN) {
r->idiag_rqueue = sk->sk_ack_backlog;
r->idiag_wqueue = sk->sk_max_ack_backlog;
- } else {
+ } else if (sk->sk_type == SOCK_STREAM) {
+ const struct tcp_sock *tp = tcp_sk(sk);
+
r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
r->idiag_wqueue = tp->write_seq - tp->snd_una;
}
@@ -50,6 +51,7 @@ static const struct inet_diag_handler tcp_diag_handler = {
.dump_one = tcp_diag_dump_one,
.idiag_get_info = tcp_diag_get_info,
.idiag_type = IPPROTO_TCP,
+ .idiag_info_size = sizeof(struct tcp_info),
};
static int __init tcp_diag_init(void)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8e5d1bcbd..bf0636da8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -362,7 +362,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* Check #1 */
if (tp->rcv_ssthresh < tp->window_clamp &&
(int)tp->rcv_ssthresh < tcp_space(sk) &&
- !sk_under_memory_pressure(sk)) {
+ !tcp_under_memory_pressure(sk)) {
int incr;
/* Check #2. Increase window, if skb with such overhead
@@ -449,7 +449,7 @@ static void tcp_clamp_window(struct sock *sk)
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
- !sk_under_memory_pressure(sk) &&
+ !tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
@@ -1133,7 +1133,12 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
struct tcp_sacktag_state {
int reord;
int fack_count;
- long rtt_us; /* RTT measured by SACKing never-retransmitted data */
+ /* Timestamps for earliest and latest never-retransmitted segment
+ * that was SACKed. RTO needs the earliest RTT to stay conservative,
+ * but congestion control should still get an accurate delay signal.
+ */
+ struct skb_mstamp first_sackt;
+ struct skb_mstamp last_sackt;
int flag;
};
@@ -1236,14 +1241,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
state->reord);
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
- /* Pick the earliest sequence sacked for RTT */
- if (state->rtt_us < 0) {
- struct skb_mstamp now;
-
- skb_mstamp_get(&now);
- state->rtt_us = skb_mstamp_us_delta(&now,
- xmit_time);
- }
+ if (state->first_sackt.v64 == 0)
+ state->first_sackt = *xmit_time;
+ state->last_sackt = *xmit_time;
}
if (sacked & TCPCB_LOST) {
@@ -1319,16 +1319,12 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
* code can come after this skb later on it's better to keep
* setting gso_size to something.
*/
- if (!skb_shinfo(prev)->gso_size) {
- skb_shinfo(prev)->gso_size = mss;
- skb_shinfo(prev)->gso_type = sk->sk_gso_type;
- }
+ if (!TCP_SKB_CB(prev)->tcp_gso_size)
+ TCP_SKB_CB(prev)->tcp_gso_size = mss;
/* CHECKME: To clear or not to clear? Mimics normal skb currently */
- if (tcp_skb_pcount(skb) <= 1) {
- skb_shinfo(skb)->gso_size = 0;
- skb_shinfo(skb)->gso_type = 0;
- }
+ if (tcp_skb_pcount(skb) <= 1)
+ TCP_SKB_CB(skb)->tcp_gso_size = 0;
/* Difference in this won't matter, both ACKed by the same cumul. ACK */
TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
@@ -1637,7 +1633,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
static int
tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
- u32 prior_snd_una, long *sack_rtt_us)
+ u32 prior_snd_una, struct tcp_sacktag_state *state)
{
struct tcp_sock *tp = tcp_sk(sk);
const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1645,7 +1641,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
struct tcp_sack_block sp[TCP_NUM_SACKS];
struct tcp_sack_block *cache;
- struct tcp_sacktag_state state;
struct sk_buff *skb;
int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
int used_sacks;
@@ -1653,9 +1648,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
int i, j;
int first_sack_index;
- state.flag = 0;
- state.reord = tp->packets_out;
- state.rtt_us = -1L;
+ state->flag = 0;
+ state->reord = tp->packets_out;
if (!tp->sacked_out) {
if (WARN_ON(tp->fackets_out))
@@ -1666,7 +1660,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
if (found_dup_sack)
- state.flag |= FLAG_DSACKING_ACK;
+ state->flag |= FLAG_DSACKING_ACK;
/* Eliminate too old ACKs, but take into
* account more or less fresh ones, they can
@@ -1731,7 +1725,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
}
skb = tcp_write_queue_head(sk);
- state.fack_count = 0;
+ state->fack_count = 0;
i = 0;
if (!tp->sacked_out) {
@@ -1765,10 +1759,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
/* Head todo? */
if (before(start_seq, cache->start_seq)) {
- skb = tcp_sacktag_skip(skb, sk, &state,
+ skb = tcp_sacktag_skip(skb, sk, state,
start_seq);
skb = tcp_sacktag_walk(skb, sk, next_dup,
- &state,
+ state,
start_seq,
cache->start_seq,
dup_sack);
@@ -1779,7 +1773,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
goto advance_sp;
skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
- &state,
+ state,
cache->end_seq);
/* ...tail remains todo... */
@@ -1788,12 +1782,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state.fack_count = tp->fackets_out;
+ state->fack_count = tp->fackets_out;
cache++;
goto walk;
}
- skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
+ skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
/* Check overlap against next cached too (past this one already) */
cache++;
continue;
@@ -1803,12 +1797,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state.fack_count = tp->fackets_out;
+ state->fack_count = tp->fackets_out;
}
- skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
+ skb = tcp_sacktag_skip(skb, sk, state, start_seq);
walk:
- skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
+ skb = tcp_sacktag_walk(skb, sk, next_dup, state,
start_seq, end_seq, dup_sack);
advance_sp:
@@ -1823,9 +1817,9 @@ advance_sp:
for (j = 0; j < used_sacks; j++)
tp->recv_sack_cache[i++] = sp[j];
- if ((state.reord < tp->fackets_out) &&
+ if ((state->reord < tp->fackets_out) &&
((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
- tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
+ tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
tcp_mark_lost_retrans(sk);
tcp_verify_left_out(tp);
@@ -1837,8 +1831,7 @@ out:
WARN_ON((int)tp->retrans_out < 0);
WARN_ON((int)tcp_packets_in_flight(tp) < 0);
#endif
- *sack_rtt_us = state.rtt_us;
- return state.flag;
+ return state->flag;
}
/* Limits sacked_out so that sum with lost_out isn't ever larger than
@@ -1927,14 +1920,13 @@ void tcp_enter_loss(struct sock *sk)
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- bool new_recovery = false;
+ bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
bool is_reneg; /* is receiver reneging on SACKs? */
/* Reduce ssthresh if it has not yet been made inside this window. */
if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
!after(tp->high_seq, tp->snd_una) ||
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
- new_recovery = true;
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_LOSS);
@@ -2258,7 +2250,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
(oldcnt >= packets))
break;
- mss = skb_shinfo(skb)->gso_size;
+ mss = tcp_skb_mss(skb);
err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
mss, GFP_ATOMIC);
if (err < 0)
@@ -2558,6 +2550,7 @@ void tcp_enter_cwr(struct sock *sk)
tcp_set_ca_state(sk, TCP_CA_CWR);
}
}
+EXPORT_SYMBOL(tcp_enter_cwr);
static void tcp_try_keep_open(struct sock *sk)
{
@@ -3058,7 +3051,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
* arrived at the other end.
*/
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una, long sack_rtt_us)
+ u32 prior_snd_una,
+ struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct skb_mstamp first_ackt, last_ackt, now;
@@ -3066,8 +3060,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
bool fully_acked = true;
- long ca_seq_rtt_us = -1L;
+ long sack_rtt_us = -1L;
long seq_rtt_us = -1L;
+ long ca_rtt_us = -1L;
struct sk_buff *skb;
u32 pkts_acked = 0;
bool rtt_update;
@@ -3156,15 +3151,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
skb_mstamp_get(&now);
if (likely(first_ackt.v64)) {
seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
- ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ }
+ if (sack->first_sackt.v64) {
+ sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
+ ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
}
rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
if (flag & FLAG_ACKED) {
- const struct tcp_congestion_ops *ca_ops
- = inet_csk(sk)->icsk_ca_ops;
-
tcp_rearm_rto(sk);
if (unlikely(icsk->icsk_mtup.probe_size &&
!after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
@@ -3187,11 +3183,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
- if (ca_ops->pkts_acked) {
- long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
- ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
- }
-
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3201,6 +3192,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tcp_rearm_rto(sk);
}
+ if (icsk->icsk_ca_ops->pkts_acked)
+ icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
+
#if FASTRETRANS_DEBUG > 0
WARN_ON((int)tp->sacked_out < 0);
WARN_ON((int)tp->lost_out < 0);
@@ -3241,7 +3235,7 @@ static void tcp_ack_probe(struct sock *sk)
* This function is not for random using!
*/
} else {
- unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
+ unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
when, TCP_RTO_MAX);
@@ -3469,6 +3463,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_sacktag_state sack_state;
u32 prior_snd_una = tp->snd_una;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -3477,7 +3472,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
int prior_packets = tp->packets_out;
const int prior_unsacked = tp->packets_out - tp->sacked_out;
int acked = 0; /* Number of packets newly acked */
- long sack_rtt_us = -1L;
+
+ sack_state.first_sackt.v64 = 0;
/* We very likely will need to access write queue head. */
prefetchw(sk->sk_write_queue.next);
@@ -3541,7 +3537,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (TCP_SKB_CB(skb)->sacked)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- &sack_rtt_us);
+ &sack_state);
if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
flag |= FLAG_ECE;
@@ -3566,7 +3562,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
acked = tp->packets_out;
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
- sack_rtt_us);
+ &sack_state);
acked -= tp->packets_out;
/* Advance cwnd if state allows */
@@ -3618,7 +3614,7 @@ old_ack:
*/
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- &sack_rtt_us);
+ &sack_state);
tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
}
@@ -4591,10 +4587,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (eaten <= 0) {
queue_and_out:
- if (eaten < 0 &&
- tcp_try_rmem_schedule(sk, skb, skb->truesize))
- goto drop;
-
+ if (eaten < 0) {
+ if (skb_queue_len(&sk->sk_receive_queue) == 0)
+ sk_forced_mem_schedule(sk, skb->truesize);
+ else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+ goto drop;
+ }
eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
}
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
@@ -4865,7 +4863,7 @@ static int tcp_prune_queue(struct sock *sk)
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk);
- else if (sk_under_memory_pressure(sk))
+ else if (tcp_under_memory_pressure(sk))
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
tcp_collapse_ofo_queue(sk);
@@ -4909,7 +4907,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
return false;
/* If we are under global TCP memory pressure, do not expand. */
- if (sk_under_memory_pressure(sk))
+ if (tcp_under_memory_pressure(sk))
return false;
/* If we are under soft global TCP memory pressure, do not expand. */
@@ -6153,6 +6151,23 @@ static bool tcp_syn_flood_action(struct sock *sk,
return want_cookie;
}
+static void tcp_reqsk_record_syn(const struct sock *sk,
+ struct request_sock *req,
+ const struct sk_buff *skb)
+{
+ if (tcp_sk(sk)->save_syn) {
+ u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
+ u32 *copy;
+
+ copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
+ if (copy) {
+ copy[0] = len;
+ memcpy(&copy[1], skb_network_header(skb), len);
+ req->saved_syn = copy;
+ }
+ }
+}
+
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
@@ -6285,6 +6300,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->tfo_listener = false;
af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
}
+ tcp_reqsk_record_syn(sk, req, skb);
return 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1e0ce4d7b..1eef463f8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1364,7 +1364,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
if (req) {
nsk = tcp_check_req(sk, skb, req, false);
- if (!nsk)
+ if (!nsk || nsk == sk)
reqsk_put(req);
return nsk;
}
@@ -1418,7 +1418,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
+ if (tcp_checksum_complete(skb))
goto csum_err;
#ifdef CONFIG_TCP_STEALTH
@@ -1653,6 +1653,7 @@ process:
skb->dev = NULL;
bh_lock_sock_nested(sk);
+ tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
ret = 0;
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
@@ -1673,7 +1674,7 @@ no_tcp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+ if (tcp_checksum_complete(skb)) {
csum_error:
TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
bad_packet:
@@ -1697,10 +1698,6 @@ do_time_wait:
goto discard_it;
}
- if (skb->len < (th->doff << 2)) {
- inet_twsk_put(inet_twsk(sk));
- goto bad_packet;
- }
if (tcp_checksum_complete(skb)) {
inet_twsk_put(inet_twsk(sk));
goto csum_error;
@@ -1829,6 +1826,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
/* If socket is aborted during connect operation */
tcp_free_fastopen_req(tp);
+ tcp_saved_syn_free(tp);
sk_sockets_allocated_dec(sk);
sock_release_memcg(sk);
@@ -2437,12 +2435,15 @@ static int __net_init tcp_sk_init(struct net *net)
goto fail;
*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
}
+
net->ipv4.sysctl_tcp_ecn = 2;
+ net->ipv4.sysctl_tcp_ecn_fallback = 1;
+
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
- return 0;
+ return 0;
fail:
tcp_sk_exit(net);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 17e7339ee..4bc00cb79 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -451,6 +451,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rcv_wup = newtp->copied_seq =
newtp->rcv_nxt = treq->rcv_isn + 1;
+ newtp->segs_in = 0;
newtp->snd_sml = newtp->snd_una =
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -539,6 +540,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->fastopen_rsk = NULL;
newtp->syn_data_acked = 0;
+ newtp->saved_syn = req->saved_syn;
+ req->saved_syn = NULL;
+
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
return newsk;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 3f7c2fca5..9864a2dba 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -77,7 +77,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
oldlen = (u16)~skb->len;
__skb_pull(skb, thlen);
- mss = tcp_skb_mss(skb);
+ mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
goto out;
@@ -242,7 +242,7 @@ found:
flush |= *(u32 *)((u8 *)th + i) ^
*(u32 *)((u8 *)th2 + i);
- mss = tcp_skb_mss(p);
+ mss = skb_shinfo(p)->gso_size;
flush |= (len - 1) >= mss;
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d80faa151..bb07d2257 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -50,8 +50,8 @@ int sysctl_tcp_retrans_collapse __read_mostly = 1;
*/
int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
-/* Default TSQ limit of two TSO segments */
-int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
+/* Default TSQ limit of four TSO segments */
+int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
/* This limits the percentage of the congestion window which we
* will allow a single TSO frame to consume. Building TSO frames
@@ -350,6 +350,15 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
}
}
+static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
+{
+ if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
+ /* tp->ecn_flags are cleared at a later point in time when
+ * SYN ACK is ultimatively being received.
+ */
+ TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
+}
+
static void
tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
struct sock *sk)
@@ -393,8 +402,6 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
*/
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
{
- struct skb_shared_info *shinfo = skb_shinfo(skb);
-
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
@@ -402,8 +409,6 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
TCP_SKB_CB(skb)->sacked = 0;
tcp_skb_pcount_set(skb, 1);
- shinfo->gso_size = 0;
- shinfo->gso_type = 0;
TCP_SKB_CB(skb)->seq = seq;
if (flags & (TCPHDR_SYN | TCPHDR_FIN))
@@ -1001,6 +1006,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
}
tcp_options_write((__be32 *)(th + 1), tp, &opts);
+ skb_shinfo(skb)->gso_type = sk->sk_gso_type;
if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
tcp_ecn_send(sk, skb, tcp_header_size);
@@ -1025,8 +1031,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
tcp_skb_pcount(skb));
- /* OK, its time to fill skb_shinfo(skb)->gso_segs */
+ tp->segs_out += tcp_skb_pcount(skb);
+ /* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
+ skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
/* Our usage of tstamp should remain private */
skb->tstamp.tv64 = 0;
@@ -1063,25 +1071,17 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
}
/* Initialize TSO segments for a packet. */
-static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
- unsigned int mss_now)
+static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
{
- struct skb_shared_info *shinfo = skb_shinfo(skb);
-
- /* Make sure we own this skb before messing gso_size/gso_segs */
- WARN_ON_ONCE(skb_cloned(skb));
-
if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
/* Avoid the costly divide in the normal
* non-TSO case.
*/
tcp_skb_pcount_set(skb, 1);
- shinfo->gso_size = 0;
- shinfo->gso_type = 0;
+ TCP_SKB_CB(skb)->tcp_gso_size = 0;
} else {
tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
- shinfo->gso_size = mss_now;
- shinfo->gso_type = sk->sk_gso_type;
+ TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
}
}
@@ -1170,7 +1170,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
return -ENOMEM;
/* Get a new skb... force flag on. */
- buff = sk_stream_alloc_skb(sk, nsize, gfp);
+ buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
if (!buff)
return -ENOMEM; /* We'll just try again later. */
@@ -1213,8 +1213,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
old_factor = tcp_skb_pcount(skb);
/* Fix up tso_factor for both original and new SKB. */
- tcp_set_skb_tso_segs(sk, skb, mss_now);
- tcp_set_skb_tso_segs(sk, buff, mss_now);
+ tcp_set_skb_tso_segs(skb, mss_now);
+ tcp_set_skb_tso_segs(buff, mss_now);
/* If this packet has been sent out already, we must
* adjust the various packet counters.
@@ -1294,7 +1294,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
/* Any change of skb->len requires recalculation of tso factor. */
if (tcp_skb_pcount(skb) > 1)
- tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
+ tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb));
return 0;
}
@@ -1626,13 +1626,12 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
* This must be invoked the first time we consider transmitting
* SKB onto the wire.
*/
-static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
- unsigned int mss_now)
+static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
{
int tso_segs = tcp_skb_pcount(skb);
if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
- tcp_set_skb_tso_segs(sk, skb, mss_now);
+ tcp_set_skb_tso_segs(skb, mss_now);
tso_segs = tcp_skb_pcount(skb);
}
return tso_segs;
@@ -1687,7 +1686,7 @@ static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
const struct tcp_sock *tp = tcp_sk(sk);
unsigned int cwnd_quota;
- tcp_init_tso_segs(sk, skb, cur_mss);
+ tcp_init_tso_segs(skb, cur_mss);
if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
return 0;
@@ -1729,7 +1728,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
if (skb->len != skb->data_len)
return tcp_fragment(sk, skb, len, mss_now, gfp);
- buff = sk_stream_alloc_skb(sk, 0, gfp);
+ buff = sk_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff))
return -ENOMEM;
@@ -1756,8 +1755,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
tcp_fragment_tstamp(skb, buff);
/* Fix up tso_factor for both original and new SKB. */
- tcp_set_skb_tso_segs(sk, skb, mss_now);
- tcp_set_skb_tso_segs(sk, buff, mss_now);
+ tcp_set_skb_tso_segs(skb, mss_now);
+ tcp_set_skb_tso_segs(buff, mss_now);
/* Link BUFF into the send queue. */
__skb_header_release(buff);
@@ -1948,7 +1947,7 @@ static int tcp_mtu_probe(struct sock *sk)
}
/* We're allowed to probe. Build it now. */
- nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC);
+ nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
if (!nskb)
return -1;
sk->sk_wmem_queued += nskb->truesize;
@@ -1991,7 +1990,7 @@ static int tcp_mtu_probe(struct sock *sk)
skb->len, 0);
} else {
__pskb_trim_head(skb, copy);
- tcp_set_skb_tso_segs(sk, skb, mss_now);
+ tcp_set_skb_tso_segs(skb, mss_now);
}
TCP_SKB_CB(skb)->seq += copy;
}
@@ -2001,7 +2000,7 @@ static int tcp_mtu_probe(struct sock *sk)
if (len >= probe_size)
break;
}
- tcp_init_tso_segs(sk, nskb, nskb->len);
+ tcp_init_tso_segs(nskb, nskb->len);
/* We're ready to send. If this fails, the probe will
* be resegmented into mss-sized pieces by tcp_write_xmit().
@@ -2063,7 +2062,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
- tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
+ tso_segs = tcp_init_tso_segs(skb, mss_now);
BUG_ON(!tso_segs);
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
@@ -2085,7 +2084,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
break;
- if (tso_segs == 1 || !max_segs) {
+ if (tso_segs == 1) {
if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
(tcp_skb_is_last(sk, skb) ?
nonagle : TCP_NAGLE_PUSH))))
@@ -2098,7 +2097,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
}
limit = mss_now;
- if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp))
+ if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
min_t(unsigned int,
cwnd_quota,
@@ -2399,7 +2398,7 @@ u32 __tcp_select_window(struct sock *sk)
if (free_space < (full_space >> 1)) {
icsk->icsk_ack.quick = 0;
- if (sk_under_memory_pressure(sk))
+ if (tcp_under_memory_pressure(sk))
tp->rcv_ssthresh = min(tp->rcv_ssthresh,
4U * tp->advmss);
@@ -2617,11 +2616,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (unlikely(oldpcount > 1)) {
if (skb_unclone(skb, GFP_ATOMIC))
return -ENOMEM;
- tcp_init_tso_segs(sk, skb, cur_mss);
+ tcp_init_tso_segs(skb, cur_mss);
tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
}
}
+ /* RFC3168, section 6.1.1.1. ECN fallback */
+ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
+ tcp_ecn_clear_syn(sk, skb);
+
tcp_retrans_try_collapse(sk, skb, cur_mss);
/* Make a copy, if the first transmission SKB clone we made
@@ -2823,8 +2826,10 @@ begin_fwd:
* connection tear down and (memory) recovery.
* Otherwise tcp_send_fin() could be tempted to either delay FIN
* or even be forced to close flow without any FIN.
+ * In general, we want to allow one skb per socket to avoid hangs
+ * with edge trigger epoll()
*/
-static void sk_forced_wmem_schedule(struct sock *sk, int size)
+void sk_forced_mem_schedule(struct sock *sk, int size)
{
int amt, status;
@@ -2848,7 +2853,7 @@ void tcp_send_fin(struct sock *sk)
* Note: in the latter case, FIN packet will be sent after a timeout,
* as TCP stack thinks it has already been transmitted.
*/
- if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) {
+ if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
coalesce:
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(tskb)->end_seq++;
@@ -2871,7 +2876,7 @@ coalesce:
return;
}
skb_reserve(skb, MAX_TCP_HEADER);
- sk_forced_wmem_schedule(sk, skb->truesize);
+ sk_forced_mem_schedule(sk, skb->truesize);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
tcp_init_nondata_skb(skb, tp->write_seq,
TCPHDR_ACK | TCPHDR_FIN);
@@ -3182,7 +3187,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
/* limit to order-0 allocations */
space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
- syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation);
+ syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false);
if (!syn_data)
goto fallback;
syn_data->ip_summed = CHECKSUM_PARTIAL;
@@ -3248,7 +3253,7 @@ int tcp_connect(struct sock *sk)
return 0;
}
- buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+ buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
if (unlikely(!buff))
return -ENOBUFS;
@@ -3397,7 +3402,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack);
* one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
* out-of-date with SND.UNA-1 to probe window.
*/
-static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
+static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -3415,6 +3420,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
*/
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
skb_mstamp_get(&skb->skb_mstamp);
+ NET_INC_STATS_BH(sock_net(sk), mib);
return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
}
@@ -3422,12 +3428,12 @@ void tcp_send_window_probe(struct sock *sk)
{
if (sk->sk_state == TCP_ESTABLISHED) {
tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
- tcp_xmit_probe_skb(sk, 0);
+ tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
}
}
/* Initiate keepalive or window probe from timer. */
-int tcp_write_wakeup(struct sock *sk)
+int tcp_write_wakeup(struct sock *sk, int mib)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -3455,7 +3461,7 @@ int tcp_write_wakeup(struct sock *sk)
if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
return -1;
} else if (!tcp_skb_pcount(skb))
- tcp_set_skb_tso_segs(sk, skb, mss);
+ tcp_set_skb_tso_segs(skb, mss);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
@@ -3464,8 +3470,8 @@ int tcp_write_wakeup(struct sock *sk)
return err;
} else {
if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
- tcp_xmit_probe_skb(sk, 1);
- return tcp_xmit_probe_skb(sk, 0);
+ tcp_xmit_probe_skb(sk, 1, mib);
+ return tcp_xmit_probe_skb(sk, 0, mib);
}
}
@@ -3479,7 +3485,7 @@ void tcp_send_probe0(struct sock *sk)
unsigned long probe_max;
int err;
- err = tcp_write_wakeup(sk);
+ err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
if (tp->packets_out || !tcp_send_head(sk)) {
/* Cancel probe timer, if it is not required. */
@@ -3505,7 +3511,7 @@ void tcp_send_probe0(struct sock *sk)
probe_max = TCP_RESOURCE_PROBE_INTERVAL;
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- inet_csk_rto_backoff(icsk, probe_max),
+ tcp_probe0_when(sk, probe_max),
TCP_RTO_MAX);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8c65dc147..5b752f58a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -247,7 +247,7 @@ void tcp_delack_timer_handler(struct sock *sk)
}
out:
- if (sk_under_memory_pressure(sk))
+ if (tcp_under_memory_pressure(sk))
sk_mem_reclaim(sk);
}
@@ -616,7 +616,7 @@ static void tcp_keepalive_timer (unsigned long data)
tcp_write_err(sk);
goto out;
}
- if (tcp_write_wakeup(sk) <= 0) {
+ if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
icsk->icsk_probes_out++;
elapsed = keepalive_intvl_when(tp);
} else {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 83aa604f9..1b8c5ba7d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1995,12 +1995,19 @@ void udp_v4_early_demux(struct sk_buff *skb)
skb->sk = sk;
skb->destructor = sock_efree;
- dst = sk->sk_rx_dst;
+ dst = READ_ONCE(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, 0);
- if (dst)
- skb_dst_set_noref(skb, dst);
+ if (dst) {
+ /* DST_NOCACHE can not be used without taking a reference */
+ if (dst->flags & DST_NOCACHE) {
+ if (likely(atomic_inc_not_zero(&dst->__refcnt)))
+ skb_dst_set(skb, dst);
+ } else {
+ skb_dst_set_noref(skb, dst);
+ }
+ }
}
int udp_rcv(struct sk_buff *skb)
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index b763c39ae..6116604bf 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -170,6 +170,7 @@ static const struct inet_diag_handler udp_diag_handler = {
.dump_one = udp_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
.idiag_type = IPPROTO_UDP,
+ .idiag_info_size = 0,
};
static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
@@ -190,6 +191,7 @@ static const struct inet_diag_handler udplite_diag_handler = {
.dump_one = udplite_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
.idiag_type = IPPROTO_UDPLITE,
+ .idiag_info_size = 0,
};
static int __init udp_diag_init(void)
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 6bb98cc19..933ea903f 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -15,12 +15,10 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
struct socket *sock = NULL;
struct sockaddr_in udp_addr;
- err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
+ err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock);
if (err < 0)
goto error;
- sk_change_net(sock->sk, net);
-
udp_addr.sin_family = AF_INET;
udp_addr.sin_addr = cfg->local_ip;
udp_addr.sin_port = cfg->local_udp_port;
@@ -47,7 +45,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
error:
if (sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sock->sk);
+ sock_release(sock);
}
*sockp = NULL;
return err;
@@ -101,7 +99,7 @@ void udp_tunnel_sock_release(struct socket *sock)
{
rcu_assign_sk_user_data(sock->sk, NULL);
kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sock->sk);
+ sock_release(sock);
}
EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2e8c06108..0f3f19997 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -48,4 +48,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
ifneq ($(CONFIG_IPV6),)
obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
+obj-y += mcast_snoop.o
endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 37b70e82b..21c2c818d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2121,6 +2121,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
if (!fn)
goto out;
+
+ noflags |= RTF_CACHE;
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
if (rt->dst.dev->ifindex != dev->ifindex)
continue;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index eef63b394..7de52b651 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -167,7 +167,7 @@ lookup_protocol:
WARN_ON(!answer_prot->slab);
err = -ENOBUFS;
- sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot);
+ sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern);
if (!sk)
goto out;
@@ -362,7 +362,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
np->saddr = addr->sin6_addr;
/* Make sure we are allowed to bind here. */
- if (sk->sk_prot->get_port(sk, snum)) {
+ if ((snum || !inet->bind_address_no_port) &&
+ sk->sk_prot->get_port(sk, snum)) {
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
@@ -768,6 +769,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.auto_flowlabels = 0;
net->ipv6.sysctl.idgen_retries = 3;
net->ipv6.sysctl.idgen_delay = 1 * HZ;
+ net->ipv6.sysctl.flowlabel_state_ranges = 1;
atomic_set(&net->ipv6.fib6_sernum, 1);
err = ipv6_init_mibs(net);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 62d908e64..b10a88986 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
}
-int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
@@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (usin->sin6_family == AF_INET) {
if (__ipv6_only_sock(sk))
return -EAFNOSUPPORT;
- err = ip4_datagram_connect(sk, uaddr, addr_len);
+ err = __ip4_datagram_connect(sk, uaddr, addr_len);
goto ipv4_connected;
}
@@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sin.sin_addr.s_addr = daddr->s6_addr32[3];
sin.sin_port = usin->sin6_port;
- err = ip4_datagram_connect(sk,
- (struct sockaddr *) &sin,
- sizeof(sin));
+ err = __ip4_datagram_connect(sk,
+ (struct sockaddr *) &sin,
+ sizeof(sin));
ipv4_connected:
if (err)
@@ -204,6 +204,16 @@ out:
fl6_sock_release(flowlabel);
return err;
}
+
+int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ int res;
+
+ lock_sock(sk);
+ res = __ip6_datagram_connect(sk, uaddr, addr_len);
+ release_sock(sk);
+ return res;
+}
EXPORT_SYMBOL_GPL(ip6_datagram_connect);
int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 7c07ce36a..060a60b2f 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -76,7 +76,7 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen)
len = ALIGN(len, crypto_tfm_ctx_alignment());
}
- len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead);
+ len += sizeof(struct aead_request) + crypto_aead_reqsize(aead);
len = ALIGN(len, __alignof__(struct scatterlist));
len += sizeof(struct scatterlist) * nfrags;
@@ -96,17 +96,6 @@ static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
}
-static inline struct aead_givcrypt_request *esp_tmp_givreq(
- struct crypto_aead *aead, u8 *iv)
-{
- struct aead_givcrypt_request *req;
-
- req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
- crypto_tfm_ctx_alignment());
- aead_givcrypt_set_tfm(req, aead);
- return req;
-}
-
static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
{
struct aead_request *req;
@@ -125,14 +114,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
-static inline struct scatterlist *esp_givreq_sg(
- struct crypto_aead *aead, struct aead_givcrypt_request *req)
-{
- return (void *)ALIGN((unsigned long)(req + 1) +
- crypto_aead_reqsize(aead),
- __alignof__(struct scatterlist));
-}
-
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -141,32 +122,57 @@ static void esp_output_done(struct crypto_async_request *base, int err)
xfrm_output_resume(skb, err);
}
+/* Move ESP header back into place. */
+static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
+{
+ struct ip_esp_hdr *esph = (void *)(skb->data + offset);
+ void *tmp = ESP_SKB_CB(skb)->tmp;
+ __be32 *seqhi = esp_tmp_seqhi(tmp);
+
+ esph->seq_no = esph->spi;
+ esph->spi = *seqhi;
+}
+
+static void esp_output_restore_header(struct sk_buff *skb)
+{
+ esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
+}
+
+static void esp_output_done_esn(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+
+ esp_output_restore_header(skb);
+ esp_output_done(base, err);
+}
+
static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
- struct aead_givcrypt_request *req;
+ struct aead_request *req;
struct scatterlist *sg;
- struct scatterlist *asg;
struct sk_buff *trailer;
void *tmp;
int blksize;
int clen;
int alen;
int plen;
+ int ivlen;
int tfclen;
int nfrags;
int assoclen;
- int sglists;
int seqhilen;
u8 *iv;
u8 *tail;
__be32 *seqhi;
+ __be64 seqno;
/* skb is pure payload to encrypt */
aead = x->data;
alen = crypto_aead_authsize(aead);
+ ivlen = crypto_aead_ivsize(aead);
tfclen = 0;
if (x->tfcpad) {
@@ -187,16 +193,14 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
nfrags = err;
assoclen = sizeof(*esph);
- sglists = 1;
seqhilen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
- sglists += 2;
seqhilen += sizeof(__be32);
assoclen += seqhilen;
}
- tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
+ tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
if (!tmp) {
err = -ENOMEM;
goto error;
@@ -204,9 +208,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
seqhi = esp_tmp_seqhi(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
- req = esp_tmp_givreq(aead, iv);
- asg = esp_givreq_sg(aead, req);
- sg = asg + sglists;
+ req = esp_tmp_req(aead, iv);
+ sg = esp_req_sg(aead, req);
/* Fill padding... */
tail = skb_tail_pointer(trailer);
@@ -227,37 +230,53 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
esph = ip_esp_hdr(skb);
*skb_mac_header(skb) = IPPROTO_ESP;
- esph->spi = x->id.spi;
esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ aead_request_set_callback(req, 0, esp_output_done, skb);
+
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * encryption.
+ */
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
+ *seqhi = esph->spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ }
+
+ esph->spi = x->id.spi;
+
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg,
- esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
- clen + alen);
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + clen + alen);
- if ((x->props.flags & XFRM_STATE_ESN)) {
- sg_init_table(asg, 3);
- sg_set_buf(asg, &esph->spi, sizeof(__be32));
- *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
- sg_set_buf(asg + 1, seqhi, seqhilen);
- sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
- } else
- sg_init_one(asg, esph, sizeof(*esph));
-
- aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
- aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
- aead_givcrypt_set_assoc(req, asg, assoclen);
- aead_givcrypt_set_giv(req, esph->enc_data,
- XFRM_SKB_CB(skb)->seq.output.low +
- ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
+ aead_request_set_crypt(req, sg, sg, ivlen + clen, iv);
+ aead_request_set_ad(req, assoclen);
+
+ seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
+ ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
+
+ memset(iv, 0, ivlen);
+ memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8),
+ min(ivlen, 8));
ESP_SKB_CB(skb)->tmp = tmp;
- err = crypto_aead_givencrypt(req);
- if (err == -EINPROGRESS)
+ err = crypto_aead_encrypt(req);
+
+ switch (err) {
+ case -EINPROGRESS:
goto error;
- if (err == -EBUSY)
+ case -EBUSY:
err = NET_XMIT_DROP;
+ break;
+
+ case 0:
+ if ((x->props.flags & XFRM_STATE_ESN))
+ esp_output_restore_header(skb);
+ }
kfree(tmp);
@@ -318,25 +337,38 @@ static void esp_input_done(struct crypto_async_request *base, int err)
xfrm_input_resume(skb, esp_input_done2(skb, err));
}
+static void esp_input_restore_header(struct sk_buff *skb)
+{
+ esp_restore_header(skb, 0);
+ __skb_pull(skb, 4);
+}
+
+static void esp_input_done_esn(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+
+ esp_input_restore_header(skb);
+ esp_input_done(base, err);
+}
+
static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ip_esp_hdr *esph;
struct crypto_aead *aead = x->data;
struct aead_request *req;
struct sk_buff *trailer;
- int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
+ int ivlen = crypto_aead_ivsize(aead);
+ int elen = skb->len - sizeof(*esph) - ivlen;
int nfrags;
int assoclen;
- int sglists;
int seqhilen;
int ret = 0;
void *tmp;
__be32 *seqhi;
u8 *iv;
struct scatterlist *sg;
- struct scatterlist *asg;
- if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) {
+ if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) {
ret = -EINVAL;
goto out;
}
@@ -355,16 +387,14 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
ret = -ENOMEM;
assoclen = sizeof(*esph);
- sglists = 1;
seqhilen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
- sglists += 2;
seqhilen += sizeof(__be32);
assoclen += seqhilen;
}
- tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
+ tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
if (!tmp)
goto out;
@@ -372,36 +402,39 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
seqhi = esp_tmp_seqhi(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_req(aead, iv);
- asg = esp_req_sg(aead, req);
- sg = asg + sglists;
+ sg = esp_req_sg(aead, req);
skb->ip_summed = CHECKSUM_NONE;
esph = (struct ip_esp_hdr *)skb->data;
- /* Get ivec. This can be wrong, check against another impls. */
- iv = esph->enc_data;
-
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
+ aead_request_set_callback(req, 0, esp_input_done, skb);
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * decryption.
+ */
if ((x->props.flags & XFRM_STATE_ESN)) {
- sg_init_table(asg, 3);
- sg_set_buf(asg, &esph->spi, sizeof(__be32));
- *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
- sg_set_buf(asg + 1, seqhi, seqhilen);
- sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
- } else
- sg_init_one(asg, esph, sizeof(*esph));
+ esph = (void *)skb_push(skb, 4);
+ *seqhi = esph->spi;
+ esph->spi = esph->seq_no;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi);
+ aead_request_set_callback(req, 0, esp_input_done_esn, skb);
+ }
- aead_request_set_callback(req, 0, esp_input_done, skb);
- aead_request_set_crypt(req, sg, sg, elen, iv);
- aead_request_set_assoc(req, asg, assoclen);
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, 0, skb->len);
+
+ aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
+ aead_request_set_ad(req, assoclen);
ret = crypto_aead_decrypt(req);
if (ret == -EINPROGRESS)
goto out;
+ if ((x->props.flags & XFRM_STATE_ESN))
+ esp_input_restore_header(skb);
+
ret = esp_input_done2(skb, ret);
out:
@@ -461,10 +494,16 @@ static void esp6_destroy(struct xfrm_state *x)
static int esp_init_aead(struct xfrm_state *x)
{
+ char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
- aead = crypto_alloc_aead(x->aead->alg_name, 0, 0);
+ err = -ENAMETOOLONG;
+ if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
+ x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ goto error;
+
+ aead = crypto_alloc_aead(aead_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -503,15 +542,19 @@ static int esp_init_authenc(struct xfrm_state *x)
if ((x->props.flags & XFRM_STATE_ESN)) {
if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
- "authencesn(%s,%s)",
+ "%s%sauthencesn(%s,%s)%s",
+ x->geniv ?: "", x->geniv ? "(" : "",
x->aalg ? x->aalg->alg_name : "digest_null",
- x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ x->ealg->alg_name,
+ x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
goto error;
} else {
if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
- "authenc(%s,%s)",
+ "%s%sauthenc(%s,%s)%s",
+ x->geniv ?: "", x->geniv ? "(" : "",
x->aalg ? x->aalg->alg_name : "digest_null",
- x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ x->ealg->alg_name,
+ x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
goto error;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2c2b5d51f..713d7434c 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -207,7 +207,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
struct inet_peer *peer;
peer = inet_getpeer_v6(net->ipv6.peers,
- &rt->rt6i_dst.addr, 1);
+ &fl6->daddr, 1);
res = inet_peer_xrlim_allow(peer, tmo);
if (peer)
inet_putpeer(peer);
@@ -337,7 +337,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
* We won't send icmp if the destination is known
* anycast.
*/
- if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
+ if (ipv6_anycast_destination(dst, &fl6->daddr)) {
net_dbg_ratelimited("icmp6_send: acast source\n");
dst_release(dst);
return ERR_PTR(-EINVAL);
@@ -564,7 +564,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (!ipv6_unicast_destination(skb) &&
!(net->ipv6.sysctl.anycast_src_echo_reply &&
- ipv6_anycast_destination(skb)))
+ ipv6_anycast_destination(skb_dst(skb), saddr)))
saddr = NULL;
memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 871641bc1..b4fd96de9 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -257,7 +257,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static inline u32 inet6_sk_port_offset(const struct sock *sk)
+static u32 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -269,7 +269,11 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk)
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk),
+ u32 port_offset = 0;
+
+ if (!inet_sk(sk)->inet_num)
+ port_offset = inet6_sk_port_offset(sk);
+ return __inet_hash_connect(death_row, sk, port_offset,
__inet6_check_established);
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index bde57b113..548c6237b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -154,10 +154,34 @@ static void node_free(struct fib6_node *fn)
kmem_cache_free(fib6_node_kmem, fn);
}
+static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+{
+ int cpu;
+
+ if (!non_pcpu_rt->rt6i_pcpu)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **ppcpu_rt;
+ struct rt6_info *pcpu_rt;
+
+ ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
+ pcpu_rt = *ppcpu_rt;
+ if (pcpu_rt) {
+ dst_free(&pcpu_rt->dst);
+ *ppcpu_rt = NULL;
+ }
+ }
+
+ non_pcpu_rt->rt6i_pcpu = NULL;
+}
+
static void rt6_release(struct rt6_info *rt)
{
- if (atomic_dec_and_test(&rt->rt6i_ref))
+ if (atomic_dec_and_test(&rt->rt6i_ref)) {
+ rt6_free_pcpu(rt);
dst_free(&rt->dst);
+ }
}
static void fib6_link_table(struct net *net, struct fib6_table *tb)
@@ -738,6 +762,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
rt6_clean_expires(iter);
else
rt6_set_expires(iter, rt->dst.expires);
+ iter->rt6i_pmtu = rt->rt6i_pmtu;
return -EEXIST;
}
/* If we have the same destination and the same metric,
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index d49112501..1f9ebe3cb 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -595,6 +595,10 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
return -EINVAL;
+ if (net->ipv6.sysctl.flowlabel_state_ranges &&
+ (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
+ return -ERANGE;
+
fl = fl_create(net, sk, &freq, optval, optlen, &err);
if (!fl)
return err;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index a38d3ac0f..69f4f689f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -361,6 +361,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
ip6gre_tunnel_unlink(ign, t);
+ ip6_tnl_dst_reset(t);
dev_put(dev);
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index f2e464eba..57990c929 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -331,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb)
if (offset < 0)
goto out;
- if (!ipv6_is_mld(skb, nexthdr, offset))
- goto out;
+ if (ipv6_is_mld(skb, nexthdr, offset))
+ deliver = true;
- deliver = true;
+ goto out;
}
/* unknown RA - process it normally */
}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index e893cd186..08b62047c 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -292,8 +292,6 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
static const struct net_offload sit_offload = {
.callbacks = {
.gso_segment = ipv6_gso_segment,
- .gro_receive = ipv6_gro_receive,
- .gro_complete = ipv6_gro_complete,
},
};
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bc09cb97b..d5f771666 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst);
+ nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
@@ -459,7 +459,7 @@ int ip6_forward(struct sk_buff *skb)
else
target = &hdr->daddr;
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
/* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect)
@@ -551,7 +551,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
struct frag_hdr *fh;
unsigned int mtu, hlen, left, len;
int hroom, troom;
- __be32 frag_id = 0;
+ __be32 frag_id;
int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -564,18 +564,17 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
/* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket.
*/
- if (unlikely(!skb->ignore_df && skb->len > mtu) ||
- (IP6CB(skb)->frag_max_size &&
- IP6CB(skb)->frag_max_size > mtu)) {
- if (skb->sk && dst_allfrag(skb_dst(skb)))
- sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
+ if (unlikely(!skb->ignore_df && skb->len > mtu))
+ goto fail_toobig;
- skb->dev = skb_dst(skb)->dev;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGFAILS);
- kfree_skb(skb);
- return -EMSGSIZE;
+ if (IP6CB(skb)->frag_max_size) {
+ if (IP6CB(skb)->frag_max_size > mtu)
+ goto fail_toobig;
+
+ /* don't send fragments larger than what we received */
+ mtu = IP6CB(skb)->frag_max_size;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
}
if (np && np->frag_size < mtu) {
@@ -584,6 +583,9 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
}
mtu -= hlen + sizeof(struct frag_hdr);
+ frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr);
+
if (skb_has_frag_list(skb)) {
int first_len = skb_pagelen(skb);
struct sk_buff *frag2;
@@ -632,11 +634,10 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
skb_reset_network_header(skb);
memcpy(skb_network_header(skb), tmp_hdr, hlen);
- ipv6_select_ident(net, fh, rt);
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(IP6_MF);
- frag_id = fh->identification;
+ fh->identification = frag_id;
first_len = skb_pagelen(skb);
skb->data_len = first_len - skb_headlen(skb);
@@ -778,11 +779,7 @@ slow_path:
*/
fh->nexthdr = nexthdr;
fh->reserved = 0;
- if (!frag_id) {
- ipv6_select_ident(net, fh, rt);
- frag_id = fh->identification;
- } else
- fh->identification = frag_id;
+ fh->identification = frag_id;
/*
* Copy a block of the IP datagram.
@@ -815,6 +812,14 @@ slow_path:
consume_skb(skb);
return err;
+fail_toobig:
+ if (skb->sk && dst_allfrag(skb_dst(skb)))
+ sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
+
+ skb->dev = skb_dst(skb)->dev;
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ err = -EMSGSIZE;
+
fail:
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
@@ -936,7 +941,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
*/
rt = (struct rt6_info *) *dst;
rcu_read_lock_bh();
- n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
+ n = __ipv6_neigh_lookup_noref(rt->dst.dev,
+ rt6_nexthop(rt, &fl6->daddr));
err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
rcu_read_unlock_bh();
@@ -1060,11 +1066,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
int odd, struct sk_buff *skb),
void *from, int length, int hh_len, int fragheaderlen,
int transhdrlen, int mtu, unsigned int flags,
- struct rt6_info *rt)
+ const struct flowi6 *fl6)
{
struct sk_buff *skb;
- struct frag_hdr fhdr;
int err;
/* There is support for UDP large send offload by network
@@ -1106,8 +1111,9 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
sizeof(struct frag_hdr)) & ~7;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
- ipv6_select_ident(sock_net(sk), &fhdr, rt);
- skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
+ skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
+ &fl6->daddr,
+ &fl6->saddr);
append:
return skb_append_datato_frags(sk, skb, getfrag, from,
@@ -1332,7 +1338,7 @@ emsgsize:
(sk->sk_type == SOCK_DGRAM)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen,
- transhdrlen, mtu, flags, rt);
+ transhdrlen, mtu, flags, fl6);
if (err)
goto error;
return 0;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5cafd92c2..2e67b6601 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -151,7 +151,7 @@ EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *) dst;
- t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+ t->dst_cookie = rt6_get_cookie(rt);
dst_release(t->dst_cache);
t->dst_cache = dst;
}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index bba8903e8..e1a1136bd 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -19,12 +19,10 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
int err;
struct socket *sock = NULL;
- err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
+ err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock);
if (err < 0)
goto error;
- sk_change_net(sock->sk, net);
-
udp6_addr.sin6_family = AF_INET6;
memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
sizeof(udp6_addr.sin6_addr));
@@ -55,7 +53,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
error:
if (sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sock->sk);
+ sock_release(sock);
}
*sockp = NULL;
return err;
diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c
new file mode 100644
index 000000000..9405b04ee
--- /dev/null
+++ b/net/ipv6/mcast_snoop.c
@@ -0,0 +1,216 @@
+/* Copyright (C) 2010: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ * Copyright (C) 2015: Linus Lüssing <linus.luessing@c0d3.blue>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki.
+ */
+
+#include <linux/skbuff.h>
+#include <net/ipv6.h>
+#include <net/mld.h>
+#include <net/addrconf.h>
+#include <net/ip6_checksum.h>
+
+static int ipv6_mc_check_ip6hdr(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ip6h;
+ unsigned int len;
+ unsigned int offset = skb_network_offset(skb) + sizeof(*ip6h);
+
+ if (!pskb_may_pull(skb, offset))
+ return -EINVAL;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->version != 6)
+ return -EINVAL;
+
+ len = offset + ntohs(ip6h->payload_len);
+ if (skb->len < len || len <= offset)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ipv6_mc_check_exthdrs(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ip6h;
+ int offset;
+ u8 nexthdr;
+ __be16 frag_off;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_HOPOPTS)
+ return -ENOMSG;
+
+ nexthdr = ip6h->nexthdr;
+ offset = skb_network_offset(skb) + sizeof(*ip6h);
+ offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
+
+ if (offset < 0)
+ return -EINVAL;
+
+ if (nexthdr != IPPROTO_ICMPV6)
+ return -ENOMSG;
+
+ skb_set_transport_header(skb, offset);
+
+ return 0;
+}
+
+static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb)
+{
+ unsigned int len = skb_transport_offset(skb);
+
+ len += sizeof(struct mld2_report);
+
+ return pskb_may_pull(skb, len) ? 0 : -EINVAL;
+}
+
+static int ipv6_mc_check_mld_query(struct sk_buff *skb)
+{
+ struct mld_msg *mld;
+ unsigned int len = skb_transport_offset(skb);
+
+ /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
+ return -EINVAL;
+
+ len += sizeof(struct mld_msg);
+ if (skb->len < len)
+ return -EINVAL;
+
+ /* MLDv1? */
+ if (skb->len != len) {
+ /* or MLDv2? */
+ len += sizeof(struct mld2_query) - sizeof(struct mld_msg);
+ if (skb->len < len || !pskb_may_pull(skb, len))
+ return -EINVAL;
+ }
+
+ mld = (struct mld_msg *)skb_transport_header(skb);
+
+ /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer
+ * all-nodes destination address (ff02::1) for general queries
+ */
+ if (ipv6_addr_any(&mld->mld_mca) &&
+ !ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ipv6_mc_check_mld_msg(struct sk_buff *skb)
+{
+ struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb);
+
+ switch (mld->mld_type) {
+ case ICMPV6_MGM_REDUCTION:
+ case ICMPV6_MGM_REPORT:
+ /* fall through */
+ return 0;
+ case ICMPV6_MLD2_REPORT:
+ return ipv6_mc_check_mld_reportv2(skb);
+ case ICMPV6_MGM_QUERY:
+ return ipv6_mc_check_mld_query(skb);
+ default:
+ return -ENOMSG;
+ }
+}
+
+static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb)
+{
+ return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo);
+}
+
+static int __ipv6_mc_check_mld(struct sk_buff *skb,
+ struct sk_buff **skb_trimmed)
+
+{
+ struct sk_buff *skb_chk = NULL;
+ unsigned int transport_len;
+ unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg);
+ int ret = -EINVAL;
+
+ transport_len = ntohs(ipv6_hdr(skb)->payload_len);
+ transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr);
+
+ skb_chk = skb_checksum_trimmed(skb, transport_len,
+ ipv6_mc_validate_checksum);
+ if (!skb_chk)
+ goto err;
+
+ if (!pskb_may_pull(skb_chk, len))
+ goto err;
+
+ ret = ipv6_mc_check_mld_msg(skb_chk);
+ if (ret)
+ goto err;
+
+ if (skb_trimmed)
+ *skb_trimmed = skb_chk;
+ /* free now unneeded clone */
+ else if (skb_chk != skb)
+ kfree_skb(skb_chk);
+
+ ret = 0;
+
+err:
+ if (ret && skb_chk && skb_chk != skb)
+ kfree_skb(skb_chk);
+
+ return ret;
+}
+
+/**
+ * ipv6_mc_check_mld - checks whether this is a sane MLD packet
+ * @skb: the skb to validate
+ * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional)
+ *
+ * Checks whether an IPv6 packet is a valid MLD packet. If so sets
+ * skb transport header accordingly and returns zero.
+ *
+ * -EINVAL: A broken packet was detected, i.e. it violates some internet
+ * standard
+ * -ENOMSG: IP header validation succeeded but it is not an MLD packet.
+ * -ENOMEM: A memory allocation failure happened.
+ *
+ * Optionally, an skb pointer might be provided via skb_trimmed (or set it
+ * to NULL): After parsing an MLD packet successfully it will point to
+ * an skb which has its tail aligned to the IP packet end. This might
+ * either be the originally provided skb or a trimmed, cloned version if
+ * the skb frame had data beyond the IP packet. A cloned skb allows us
+ * to leave the original skb and its full frame unchanged (which might be
+ * desirable for layer 2 frame jugglers).
+ *
+ * Caller needs to set the skb network header and free any returned skb if it
+ * differs from the provided skb.
+ */
+int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+{
+ int ret;
+
+ ret = ipv6_mc_check_ip6hdr(skb);
+ if (ret < 0)
+ return ret;
+
+ ret = ipv6_mc_check_exthdrs(skb);
+ if (ret < 0)
+ return ret;
+
+ return __ipv6_mc_check_mld(skb, skb_trimmed);
+}
+EXPORT_SYMBOL(ipv6_mc_check_mld);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 96f153c08..c53331cfe 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1506,7 +1506,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
"Redirect: destination is not a neighbour\n");
goto release;
}
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
ret = inet_peer_xrlim_allow(peer, 1*HZ);
if (peer)
inet_putpeer(peer);
@@ -1650,6 +1650,7 @@ int ndisc_rcv(struct sk_buff *skb)
static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_change_info *change_info;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
@@ -1664,6 +1665,11 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
ndisc_send_unsol_na(dev);
in6_dev_put(idev);
break;
+ case NETDEV_CHANGE:
+ change_info = ptr;
+ if (change_info->flags_changed & IFF_NOARP)
+ neigh_changeaddr(&nd_tbl, dev);
+ break;
case NETDEV_DOWN:
neigh_ifdown(&nd_tbl, dev);
fib6_run_gc(0, net, false);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d958718b5..b4de08a83 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -191,6 +191,8 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
static const struct nf_ipv6_ops ipv6ops = {
.chk_addr = ipv6_chk_addr,
+ .route_input = ip6_route_input,
+ .fragment = ip6_fragment
};
static const struct nf_afinfo nf_ip6_afinfo = {
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index ca6998345..b552cf0d6 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -186,7 +186,8 @@ config IP6_NF_MATCH_MH
config IP6_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
- depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW)
+ depends on NETFILTER_ADVANCED
+ depends on IP6_NF_MANGLE || IP6_NF_RAW
---help---
This option allows you to match packets whose replies would
go out via the interface the packet came in.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 62f5b0d0b..3c35ced39 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -283,15 +283,13 @@ static void trace_packet(const struct sk_buff *skb,
const struct xt_table_info *private,
const struct ip6t_entry *e)
{
- const void *table_base;
const struct ip6t_entry *root;
const char *hookname, *chainname, *comment;
const struct ip6t_entry *iter;
unsigned int rulenum = 0;
struct net *net = dev_net(in ? in : out);
- table_base = private->entries[smp_processor_id()];
- root = get_entry(table_base, private->hook_entry[hook]);
+ root = get_entry(private->entries, private->hook_entry[hook]);
hookname = chainname = hooknames[hook];
comment = comments[NF_IP6_TRACE_COMMENT_RULE];
@@ -357,7 +355,7 @@ ip6t_do_table(struct sk_buff *skb,
*/
smp_read_barrier_depends();
cpu = smp_processor_id();
- table_base = private->entries[cpu];
+ table_base = private->entries;
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
stackptr = per_cpu_ptr(private->stackptr, cpu);
origptr = *stackptr;
@@ -367,6 +365,7 @@ ip6t_do_table(struct sk_buff *skb,
do {
const struct xt_entry_target *t;
const struct xt_entry_match *ematch;
+ struct xt_counters *counter;
IP_NF_ASSERT(e);
acpar.thoff = 0;
@@ -384,7 +383,8 @@ ip6t_do_table(struct sk_buff *skb,
goto no_match;
}
- ADD_COUNTER(e->counters, skb->len, 1);
+ counter = xt_get_this_cpu_counter(&e->counters);
+ ADD_COUNTER(*counter, skb->len, 1);
t = ip6t_get_target_c(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -679,6 +679,10 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
if (ret)
return ret;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
+
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -714,6 +718,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
break;
cleanup_match(ematch, net);
}
+
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -797,6 +804,8 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
+
+ xt_percpu_counter_free(e->counters.pcnt);
}
/* Checks and translates the user-supplied table segment (held in
@@ -879,12 +888,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i) {
- if (newinfo->entries[i] && newinfo->entries[i] != entry0)
- memcpy(newinfo->entries[i], entry0, newinfo->size);
- }
-
return ret;
}
@@ -900,14 +903,16 @@ get_counters(const struct xt_table_info *t,
seqcount_t *s = &per_cpu(xt_recseq, cpu);
i = 0;
- xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ xt_entry_foreach(iter, t->entries, t->size) {
+ struct xt_counters *tmp;
u64 bcnt, pcnt;
unsigned int start;
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
do {
start = read_seqcount_begin(s);
- bcnt = iter->counters.bcnt;
- pcnt = iter->counters.pcnt;
+ bcnt = tmp->bcnt;
+ pcnt = tmp->pcnt;
} while (read_seqcount_retry(s, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -952,11 +957,7 @@ copy_entries_to_user(unsigned int total_size,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy that is on our node/cpu, ...
- * This choice is lazy (because current thread is
- * allowed to migrate to another cpu)
- */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
ret = -EFAULT;
goto free_counters;
@@ -1064,16 +1065,16 @@ static int compat_table_info(const struct xt_table_info *info,
struct xt_table_info *newinfo)
{
struct ip6t_entry *iter;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
int ret;
if (!newinfo || !info)
return -EINVAL;
- /* we dont care about newinfo->entries[] */
+ /* we dont care about newinfo->entries */
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
- loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ loc_cpu_entry = info->entries;
xt_compat_init_offsets(AF_INET6, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1194,7 +1195,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_table *t;
struct xt_table_info *oldinfo;
struct xt_counters *counters;
- const void *loc_cpu_old_entry;
struct ip6t_entry *iter;
ret = 0;
@@ -1237,8 +1237,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
- loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
- xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
cleanup_entry(iter, net);
xt_free_table_info(oldinfo);
@@ -1284,8 +1283,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1316,7 +1314,7 @@ static int
do_add_counters(struct net *net, const void __user *user, unsigned int len,
int compat)
{
- unsigned int i, curcpu;
+ unsigned int i;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1326,7 +1324,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
- const void *loc_cpu_entry;
struct ip6t_entry *iter;
unsigned int addend;
#ifdef CONFIG_COMPAT
@@ -1374,7 +1371,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
goto free;
}
-
local_bh_disable();
private = t->private;
if (private->number != num_counters) {
@@ -1383,16 +1379,15 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
}
i = 0;
- /* Choose the copy that is on our node */
- curcpu = smp_processor_id();
addend = xt_write_recseq_begin();
- loc_cpu_entry = private->entries[curcpu];
- xt_entry_foreach(iter, loc_cpu_entry, private->size) {
- ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ xt_entry_foreach(iter, private->entries, private->size) {
+ struct xt_counters *tmp;
+
+ tmp = xt_get_this_cpu_counter(&iter->counters);
+ ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
++i;
}
xt_write_recseq_end(addend);
-
unlock_up_free:
local_bh_enable();
xt_table_unlock(t);
@@ -1459,7 +1454,6 @@ static int
compat_find_calc_match(struct xt_entry_match *m,
const char *name,
const struct ip6t_ip6 *ipv6,
- unsigned int hookmask,
int *size)
{
struct xt_match *match;
@@ -1528,8 +1522,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
entry_offset = (void *)e - (void *)base;
j = 0;
xt_ematch_foreach(ematch, e) {
- ret = compat_find_calc_match(ematch, name,
- &e->ipv6, e->comefrom, &off);
+ ret = compat_find_calc_match(ematch, name, &e->ipv6, &off);
if (ret != 0)
goto release_matches;
++j;
@@ -1623,6 +1616,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -1647,6 +1643,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
break;
cleanup_match(ematch, net);
}
+
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -1731,7 +1730,7 @@ translate_compat_table(struct net *net,
newinfo->hook_entry[i] = info->hook_entry[i];
newinfo->underflow[i] = info->underflow[i];
}
- entry1 = newinfo->entries[raw_smp_processor_id()];
+ entry1 = newinfo->entries;
pos = entry1;
size = total_size;
xt_entry_foreach(iter0, entry0, total_size) {
@@ -1783,11 +1782,6 @@ translate_compat_table(struct net *net,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
-
*pinfo = newinfo;
*pentry0 = entry1;
xt_free_table_info(info);
@@ -1834,8 +1828,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1906,7 +1899,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
void __user *pos;
unsigned int size;
int ret = 0;
- const void *loc_cpu_entry;
unsigned int i = 0;
struct ip6t_entry *iter;
@@ -1914,14 +1906,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy that is on our node/cpu, ...
- * This choice is lazy (because current thread is
- * allowed to migrate to another cpu)
- */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
pos = userptr;
size = total_size;
- xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ xt_entry_foreach(iter, private->entries, total_size) {
ret = compat_copy_entry_to_user(iter, &pos,
&size, counters, i++);
if (ret != 0)
@@ -2096,8 +2083,7 @@ struct xt_table *ip6t_register_table(struct net *net,
goto out;
}
- /* choose the copy on our node/cpu, but dont care about preemption */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(net, newinfo, loc_cpu_entry, repl);
@@ -2127,7 +2113,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table)
private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
cleanup_entry(iter, net);
if (private->number > private->initial_entries)
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 6edb7b106..ebbb754c2 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -37,12 +37,13 @@ synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
}
static void
-synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+synproxy_send_tcp(const struct synproxy_net *snet,
+ const struct sk_buff *skb, struct sk_buff *nskb,
struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
struct ipv6hdr *niph, struct tcphdr *nth,
unsigned int tcp_hdr_size)
{
- struct net *net = nf_ct_net((struct nf_conn *)nfct);
+ struct net *net = nf_ct_net(snet->tmpl);
struct dst_entry *dst;
struct flowi6 fl6;
@@ -83,7 +84,8 @@ free_nskb:
}
static void
-synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+synproxy_send_client_synack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
const struct synproxy_options *opts)
{
struct sk_buff *nskb;
@@ -119,7 +121,7 @@ synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
niph, nth, tcp_hdr_size);
}
@@ -163,7 +165,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
niph, nth, tcp_hdr_size);
}
@@ -203,7 +205,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+ synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
}
static void
@@ -241,7 +243,8 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+ synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
}
static bool
@@ -301,7 +304,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
XT_SYNPROXY_OPT_SACK_PERM |
XT_SYNPROXY_OPT_ECN);
- synproxy_send_client_synack(skb, th, &opts);
+ synproxy_send_client_synack(snet, skb, th, &opts);
return NF_DROP;
} else if (th->ack && !(th->fin || th->rst || th->syn)) {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6f187c8d8..6d0249817 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -348,7 +348,7 @@ found:
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
- add_frag_mem_limit(&fq->q, skb->truesize);
+ add_frag_mem_limit(fq->q.net, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
- add_frag_mem_limit(&fq->q, clone->truesize);
+ add_frag_mem_limit(fq->q.net, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
}
- sub_frag_mem_limit(&fq->q, head->truesize);
+ sub_frag_mem_limit(fq->q.net, head->truesize);
head->ignore_df = 1;
head->next = NULL;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 85892af57..928a0fb0b 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -8,9 +8,11 @@
#include <net/ip6_fib.h>
#include <net/addrconf.h>
#include <net/secure_seq.h>
+#include <linux/netfilter.h>
static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
- struct in6_addr *dst, struct in6_addr *src)
+ const struct in6_addr *dst,
+ const struct in6_addr *src)
{
u32 hash, id;
@@ -60,17 +62,17 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
-void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr,
- struct rt6_info *rt)
+__be32 ipv6_select_ident(struct net *net,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
static u32 ip6_idents_hashrnd __read_mostly;
u32 id;
net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
- id = __ipv6_select_ident(net, ip6_idents_hashrnd, &rt->rt6i_dst.addr,
- &rt->rt6i_src.addr);
- fhdr->identification = htonl(id);
+ id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
+ return htonl(id);
}
EXPORT_SYMBOL(ipv6_select_ident);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8072bd413..ca4700cb2 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -865,6 +865,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_oif = np->ucast_oif;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ if (inet->hdrincl)
+ fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
+
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
@@ -1324,13 +1327,7 @@ static struct inet_protosw rawv6_protosw = {
int __init rawv6_init(void)
{
- int ret;
-
- ret = inet6_register_protosw(&rawv6_protosw);
- if (ret)
- goto out;
-out:
- return ret;
+ return inet6_register_protosw(&rawv6_protosw);
}
void rawv6_exit(void)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 8ffa2c8cc..f1159bb76 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
- if (fq->q.flags & INET_FRAG_EVICTED)
+ if (inet_frag_evicting(&fq->q))
goto out_rcu_unlock;
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
@@ -330,7 +330,7 @@ found:
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
- add_frag_mem_limit(&fq->q, skb->truesize);
+ add_frag_mem_limit(fq->q.net, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(&fq->q, clone->truesize);
+ add_frag_mem_limit(fq->q.net, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
}
fp = next;
}
- sub_frag_mem_limit(&fq->q, sum_truesize);
+ sub_frag_mem_limit(fq->q.net, sum_truesize);
head->next = NULL;
head->dev = dev;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c73ae5039..d15586490 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -72,8 +72,7 @@ enum rt6_nud_state {
RT6_NUD_SUCCEED = 1
};
-static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
- const struct in6_addr *dest);
+static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
static unsigned int ip6_mtu(const struct dst_entry *dst);
@@ -92,6 +91,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
+static void rt6_dst_from_metrics_check(struct rt6_info *rt);
static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -104,65 +104,82 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
const struct in6_addr *gwaddr, int ifindex);
#endif
-static void rt6_bind_peer(struct rt6_info *rt, int create)
+struct uncached_list {
+ spinlock_t lock;
+ struct list_head head;
+};
+
+static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
+
+static void rt6_uncached_list_add(struct rt6_info *rt)
{
- struct inet_peer_base *base;
- struct inet_peer *peer;
+ struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
- base = inetpeer_base_ptr(rt->_rt6i_peer);
- if (!base)
- return;
+ rt->dst.flags |= DST_NOCACHE;
+ rt->rt6i_uncached_list = ul;
- peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
- if (peer) {
- if (!rt6_set_peer(rt, peer))
- inet_putpeer(peer);
+ spin_lock_bh(&ul->lock);
+ list_add_tail(&rt->rt6i_uncached, &ul->head);
+ spin_unlock_bh(&ul->lock);
+}
+
+static void rt6_uncached_list_del(struct rt6_info *rt)
+{
+ if (!list_empty(&rt->rt6i_uncached)) {
+ struct uncached_list *ul = rt->rt6i_uncached_list;
+
+ spin_lock_bh(&ul->lock);
+ list_del(&rt->rt6i_uncached);
+ spin_unlock_bh(&ul->lock);
}
}
-static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
+static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
{
- if (rt6_has_peer(rt))
- return rt6_peer_ptr(rt);
+ struct net_device *loopback_dev = net->loopback_dev;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
+ struct rt6_info *rt;
+
+ spin_lock_bh(&ul->lock);
+ list_for_each_entry(rt, &ul->head, rt6i_uncached) {
+ struct inet6_dev *rt_idev = rt->rt6i_idev;
+ struct net_device *rt_dev = rt->dst.dev;
- rt6_bind_peer(rt, create);
- return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
+ if (rt_idev && (rt_idev->dev == dev || !dev) &&
+ rt_idev->dev != loopback_dev) {
+ rt->rt6i_idev = in6_dev_get(loopback_dev);
+ in6_dev_put(rt_idev);
+ }
+
+ if (rt_dev && (rt_dev == dev || !dev) &&
+ rt_dev != loopback_dev) {
+ rt->dst.dev = loopback_dev;
+ dev_hold(rt->dst.dev);
+ dev_put(rt_dev);
+ }
+ }
+ spin_unlock_bh(&ul->lock);
+ }
}
-static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
+static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
{
- return __rt6_get_peer(rt, 1);
+ return dst_metrics_write_ptr(rt->dst.from);
}
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
{
- struct rt6_info *rt = (struct rt6_info *) dst;
- struct inet_peer *peer;
- u32 *p = NULL;
+ struct rt6_info *rt = (struct rt6_info *)dst;
- if (!(rt->dst.flags & DST_HOST))
+ if (rt->rt6i_flags & RTF_PCPU)
+ return rt6_pcpu_cow_metrics(rt);
+ else if (rt->rt6i_flags & RTF_CACHE)
+ return NULL;
+ else
return dst_cow_metrics_generic(dst, old);
-
- peer = rt6_get_peer_create(rt);
- if (peer) {
- u32 *old_p = __DST_METRICS_PTR(old);
- unsigned long prev, new;
-
- p = peer->metrics;
- if (inet_metrics_new(peer) ||
- (old & DST_METRICS_FORCE_OVERWRITE))
- memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
-
- new = (unsigned long) p;
- prev = cmpxchg(&dst->_metrics, old, new);
-
- if (prev != old) {
- p = __DST_METRICS_PTR(prev);
- if (prev & DST_METRICS_READ_ONLY)
- p = NULL;
- }
- }
- return p;
}
static inline const void *choose_neigh_daddr(struct rt6_info *rt,
@@ -299,10 +316,9 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
#endif
/* allocate dst with ip6_dst_ops */
-static inline struct rt6_info *ip6_dst_alloc(struct net *net,
- struct net_device *dev,
- int flags,
- struct fib6_table *table)
+static struct rt6_info *__ip6_dst_alloc(struct net *net,
+ struct net_device *dev,
+ int flags)
{
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
0, DST_OBSOLETE_FORCE_CHK, flags);
@@ -311,21 +327,50 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
struct dst_entry *dst = &rt->dst;
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
- rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
INIT_LIST_HEAD(&rt->rt6i_siblings);
+ INIT_LIST_HEAD(&rt->rt6i_uncached);
+ }
+ return rt;
+}
+
+static struct rt6_info *ip6_dst_alloc(struct net *net,
+ struct net_device *dev,
+ int flags)
+{
+ struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
+
+ if (rt) {
+ rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
+ if (rt->rt6i_pcpu) {
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **p;
+
+ p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+ /* no one shares rt */
+ *p = NULL;
+ }
+ } else {
+ dst_destroy((struct dst_entry *)rt);
+ return NULL;
+ }
}
+
return rt;
}
static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
- struct inet6_dev *idev = rt->rt6i_idev;
struct dst_entry *from = dst->from;
+ struct inet6_dev *idev;
- if (!(rt->dst.flags & DST_HOST))
- dst_destroy_metrics_generic(dst);
+ dst_destroy_metrics_generic(dst);
+ free_percpu(rt->rt6i_pcpu);
+ rt6_uncached_list_del(rt);
+ idev = rt->rt6i_idev;
if (idev) {
rt->rt6i_idev = NULL;
in6_dev_put(idev);
@@ -333,11 +378,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
dst->from = NULL;
dst_release(from);
-
- if (rt6_has_peer(rt)) {
- struct inet_peer *peer = rt6_peer_ptr(rt);
- inet_putpeer(peer);
- }
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -652,15 +692,33 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
u32 metric, int oif, int strict,
bool *do_rr)
{
- struct rt6_info *rt, *match;
+ struct rt6_info *rt, *match, *cont;
int mpri = -1;
match = NULL;
- for (rt = rr_head; rt && rt->rt6i_metric == metric;
- rt = rt->dst.rt6_next)
+ cont = NULL;
+ for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
+ if (rt->rt6i_metric != metric) {
+ cont = rt;
+ break;
+ }
+
+ match = find_match(rt, oif, strict, &mpri, match, do_rr);
+ }
+
+ for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
+ if (rt->rt6i_metric != metric) {
+ cont = rt;
+ break;
+ }
+
match = find_match(rt, oif, strict, &mpri, match, do_rr);
- for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
- rt = rt->dst.rt6_next)
+ }
+
+ if (match || !cont)
+ return match;
+
+ for (rt = cont; rt; rt = rt->dst.rt6_next)
match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
@@ -694,6 +752,11 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
return match ? match : net->ipv6.ip6_null_entry;
}
+static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
+{
+ return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
+}
+
#ifdef CONFIG_IPV6_ROUTE_INFO
int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
const struct in6_addr *gwaddr)
@@ -872,9 +935,9 @@ int ip6_ins_rt(struct rt6_info *rt)
return __ip6_ins_rt(rt, &info, &mxc);
}
-static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr)
+static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct rt6_info *rt;
@@ -882,15 +945,25 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
* Clone the route.
*/
- rt = ip6_rt_copy(ort, daddr);
+ if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
+ ort = (struct rt6_info *)ort->dst.from;
- if (rt) {
+ rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
+
+ if (!rt)
+ return NULL;
+
+ ip6_rt_copy_init(rt, ort);
+ rt->rt6i_flags |= RTF_CACHE;
+ rt->rt6i_metric = 0;
+ rt->dst.flags |= DST_HOST;
+ rt->rt6i_dst.addr = *daddr;
+ rt->rt6i_dst.plen = 128;
+
+ if (!rt6_is_gw_or_nonexthop(ort)) {
if (ort->rt6i_dst.plen != 128 &&
ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST;
-
- rt->rt6i_flags |= RTF_CACHE;
-
#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen && saddr) {
rt->rt6i_src.addr = *saddr;
@@ -902,30 +975,85 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
return rt;
}
-static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
- const struct in6_addr *daddr)
+static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
{
- struct rt6_info *rt = ip6_rt_copy(ort, daddr);
+ struct rt6_info *pcpu_rt;
- if (rt)
- rt->rt6i_flags |= RTF_CACHE;
- return rt;
+ pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
+ rt->dst.dev, rt->dst.flags);
+
+ if (!pcpu_rt)
+ return NULL;
+ ip6_rt_copy_init(pcpu_rt, rt);
+ pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
+ pcpu_rt->rt6i_flags |= RTF_PCPU;
+ return pcpu_rt;
+}
+
+/* It should be called with read_lock_bh(&tb6_lock) acquired */
+static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
+{
+ struct rt6_info *pcpu_rt, **p;
+
+ p = this_cpu_ptr(rt->rt6i_pcpu);
+ pcpu_rt = *p;
+
+ if (pcpu_rt) {
+ dst_hold(&pcpu_rt->dst);
+ rt6_dst_from_metrics_check(pcpu_rt);
+ }
+ return pcpu_rt;
+}
+
+static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
+{
+ struct fib6_table *table = rt->rt6i_table;
+ struct rt6_info *pcpu_rt, *prev, **p;
+
+ pcpu_rt = ip6_rt_pcpu_alloc(rt);
+ if (!pcpu_rt) {
+ struct net *net = dev_net(rt->dst.dev);
+
+ dst_hold(&net->ipv6.ip6_null_entry->dst);
+ return net->ipv6.ip6_null_entry;
+ }
+
+ read_lock_bh(&table->tb6_lock);
+ if (rt->rt6i_pcpu) {
+ p = this_cpu_ptr(rt->rt6i_pcpu);
+ prev = cmpxchg(p, NULL, pcpu_rt);
+ if (prev) {
+ /* If someone did it before us, return prev instead */
+ dst_destroy(&pcpu_rt->dst);
+ pcpu_rt = prev;
+ }
+ } else {
+ /* rt has been removed from the fib6 tree
+ * before we have a chance to acquire the read_lock.
+ * In this case, don't brother to create a pcpu rt
+ * since rt is going away anyway. The next
+ * dst_check() will trigger a re-lookup.
+ */
+ dst_destroy(&pcpu_rt->dst);
+ pcpu_rt = rt;
+ }
+ dst_hold(&pcpu_rt->dst);
+ rt6_dst_from_metrics_check(pcpu_rt);
+ read_unlock_bh(&table->tb6_lock);
+ return pcpu_rt;
}
static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
struct flowi6 *fl6, int flags)
{
struct fib6_node *fn, *saved_fn;
- struct rt6_info *rt, *nrt;
+ struct rt6_info *rt;
int strict = 0;
- int attempts = 3;
- int err;
strict |= flags & RT6_LOOKUP_F_IFACE;
if (net->ipv6.devconf_all->forwarding == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
-redo_fib6_lookup_lock:
read_lock_bh(&table->tb6_lock);
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
@@ -944,51 +1072,65 @@ redo_rt6_select:
strict &= ~RT6_LOOKUP_F_REACHABLE;
fn = saved_fn;
goto redo_rt6_select;
- } else {
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- goto out2;
}
}
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- if (rt->rt6i_flags & RTF_CACHE)
- goto out2;
+ if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
+ dst_use(&rt->dst, jiffies);
+ read_unlock_bh(&table->tb6_lock);
- if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
- nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
- else if (!(rt->dst.flags & DST_HOST))
- nrt = rt6_alloc_clone(rt, &fl6->daddr);
- else
- goto out2;
+ rt6_dst_from_metrics_check(rt);
+ return rt;
+ } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
+ !(rt->rt6i_flags & RTF_GATEWAY))) {
+ /* Create a RTF_CACHE clone which will not be
+ * owned by the fib6 tree. It is for the special case where
+ * the daddr in the skb during the neighbor look-up is different
+ * from the fl6->daddr used to look-up route here.
+ */
- ip6_rt_put(rt);
- rt = nrt ? : net->ipv6.ip6_null_entry;
+ struct rt6_info *uncached_rt;
- dst_hold(&rt->dst);
- if (nrt) {
- err = ip6_ins_rt(nrt);
- if (!err)
- goto out2;
- }
+ dst_use(&rt->dst, jiffies);
+ read_unlock_bh(&table->tb6_lock);
- if (--attempts <= 0)
- goto out2;
+ uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
+ dst_release(&rt->dst);
- /*
- * Race condition! In the gap, when table->tb6_lock was
- * released someone could insert this route. Relookup.
- */
- ip6_rt_put(rt);
- goto redo_fib6_lookup_lock;
+ if (uncached_rt)
+ rt6_uncached_list_add(uncached_rt);
+ else
+ uncached_rt = net->ipv6.ip6_null_entry;
-out2:
- rt->dst.lastuse = jiffies;
- rt->dst.__use++;
+ dst_hold(&uncached_rt->dst);
+ return uncached_rt;
- return rt;
+ } else {
+ /* Get a percpu copy */
+
+ struct rt6_info *pcpu_rt;
+
+ rt->dst.lastuse = jiffies;
+ rt->dst.__use++;
+ pcpu_rt = rt6_get_pcpu_route(rt);
+
+ if (pcpu_rt) {
+ read_unlock_bh(&table->tb6_lock);
+ } else {
+ /* We have to do the read_unlock first
+ * because rt6_make_pcpu_route() may trigger
+ * ip6_dst_gc() which will take the write_lock.
+ */
+ dst_hold(&rt->dst);
+ read_unlock_bh(&table->tb6_lock);
+ pcpu_rt = rt6_make_pcpu_route(rt);
+ dst_release(&rt->dst);
+ }
+
+ return pcpu_rt;
+
+ }
}
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
@@ -1059,7 +1201,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
new = &rt->dst;
memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
- rt6_init_peer(rt, net->ipv6.peers);
new->__use = 1;
new->input = dst_discard;
@@ -1093,6 +1234,33 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
* Destination cache support functions
*/
+static void rt6_dst_from_metrics_check(struct rt6_info *rt)
+{
+ if (rt->dst.from &&
+ dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
+ dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
+}
+
+static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
+{
+ if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+ return NULL;
+
+ if (rt6_check_expired(rt))
+ return NULL;
+
+ return &rt->dst;
+}
+
+static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
+{
+ if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
+ rt6_check((struct rt6_info *)(rt->dst.from), cookie))
+ return &rt->dst;
+ else
+ return NULL;
+}
+
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
{
struct rt6_info *rt;
@@ -1103,13 +1271,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
* into this function always.
*/
- if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
- return NULL;
- if (rt6_check_expired(rt))
- return NULL;
+ rt6_dst_from_metrics_check(rt);
- return dst;
+ if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
+ return rt6_dst_from_check(rt, cookie);
+ else
+ return rt6_check(rt, cookie);
}
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
@@ -1148,24 +1316,63 @@ static void ip6_link_failure(struct sk_buff *skb)
}
}
-static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
+{
+ struct net *net = dev_net(rt->dst.dev);
+
+ rt->rt6i_flags |= RTF_MODIFIED;
+ rt->rt6i_pmtu = mtu;
+ rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
+}
+
+static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
+ const struct ipv6hdr *iph, u32 mtu)
{
struct rt6_info *rt6 = (struct rt6_info *)dst;
- dst_confirm(dst);
- if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
- struct net *net = dev_net(dst->dev);
+ if (rt6->rt6i_flags & RTF_LOCAL)
+ return;
- rt6->rt6i_flags |= RTF_MODIFIED;
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
+ dst_confirm(dst);
+ mtu = max_t(u32, mtu, IPV6_MIN_MTU);
+ if (mtu >= dst_mtu(dst))
+ return;
- dst_metric_set(dst, RTAX_MTU, mtu);
- rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
+ if (rt6->rt6i_flags & RTF_CACHE) {
+ rt6_do_update_pmtu(rt6, mtu);
+ } else {
+ const struct in6_addr *daddr, *saddr;
+ struct rt6_info *nrt6;
+
+ if (iph) {
+ daddr = &iph->daddr;
+ saddr = &iph->saddr;
+ } else if (sk) {
+ daddr = &sk->sk_v6_daddr;
+ saddr = &inet6_sk(sk)->saddr;
+ } else {
+ return;
+ }
+ nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
+ if (nrt6) {
+ rt6_do_update_pmtu(nrt6, mtu);
+
+ /* ip6_ins_rt(nrt6) will bump the
+ * rt6->rt6i_node->fn_sernum
+ * which will fail the next rt6_check() and
+ * invalidate the sk->sk_dst_cache.
+ */
+ ip6_ins_rt(nrt6);
+ }
}
}
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
+{
+ __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
+}
+
void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
int oif, u32 mark)
{
@@ -1182,7 +1389,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
- ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
+ __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_update_pmtu);
@@ -1341,12 +1548,17 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
static unsigned int ip6_mtu(const struct dst_entry *dst)
{
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+ unsigned int mtu = rt->rt6i_pmtu;
struct inet6_dev *idev;
- unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
goto out;
+ mtu = dst_metric_raw(dst, RTAX_MTU);
+ if (mtu)
+ goto out;
+
mtu = IPV6_MIN_MTU;
rcu_read_lock();
@@ -1373,7 +1585,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
if (unlikely(!idev))
return ERR_PTR(-ENODEV);
- rt = ip6_dst_alloc(net, dev, 0, NULL);
+ rt = ip6_dst_alloc(net, dev, 0);
if (unlikely(!rt)) {
in6_dev_put(idev);
dst = ERR_PTR(-ENOMEM);
@@ -1560,7 +1772,8 @@ int ip6_route_add(struct fib6_config *cfg)
if (!table)
goto out;
- rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
+ rt = ip6_dst_alloc(net, NULL,
+ (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
if (!rt) {
err = -ENOMEM;
@@ -1590,10 +1803,8 @@ int ip6_route_add(struct fib6_config *cfg)
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->rt6i_dst.plen = cfg->fc_dst_len;
- if (rt->rt6i_dst.plen == 128) {
+ if (rt->rt6i_dst.plen == 128)
rt->dst.flags |= DST_HOST;
- dst_metrics_set_force_overwrite(&rt->dst);
- }
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1651,9 +1862,21 @@ int ip6_route_add(struct fib6_config *cfg)
int gwa_type;
gw_addr = &cfg->fc_gateway;
- rt->rt6i_gateway = *gw_addr;
gwa_type = ipv6_addr_type(gw_addr);
+ /* if gw_addr is local we will fail to detect this in case
+ * address is still TENTATIVE (DAD in progress). rt6_lookup()
+ * will return already-added prefix route via interface that
+ * prefix route was assigned to, which might be non-loopback.
+ */
+ err = -EINVAL;
+ if (ipv6_chk_addr_and_flags(net, gw_addr,
+ gwa_type & IPV6_ADDR_LINKLOCAL ?
+ dev : NULL, 0, 0))
+ goto out;
+
+ rt->rt6i_gateway = *gw_addr;
+
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
struct rt6_info *grt;
@@ -1664,7 +1887,6 @@ int ip6_route_add(struct fib6_config *cfg)
(SIT, PtP, NBMA NOARP links) it is handy to allow
some exceptions. --ANK
*/
- err = -EINVAL;
if (!(gwa_type & IPV6_ADDR_UNICAST))
goto out;
@@ -1785,6 +2007,9 @@ static int ip6_route_del(struct fib6_config *cfg)
if (fn) {
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ if ((rt->rt6i_flags & RTF_CACHE) &&
+ !(cfg->fc_flags & RTF_CACHE))
+ continue;
if (cfg->fc_ifindex &&
(!rt->dst.dev ||
rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -1894,7 +2119,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
NEIGH_UPDATE_F_ISROUTER))
);
- nrt = ip6_rt_copy(rt, &msg->dest);
+ nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
if (!nrt)
goto out;
@@ -1926,42 +2151,35 @@ out:
* Misc support functions
*/
-static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
- const struct in6_addr *dest)
+static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
{
- struct net *net = dev_net(ort->dst.dev);
- struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
- ort->rt6i_table);
-
- if (rt) {
- rt->dst.input = ort->dst.input;
- rt->dst.output = ort->dst.output;
- rt->dst.flags |= DST_HOST;
+ BUG_ON(from->dst.from);
- rt->rt6i_dst.addr = *dest;
- rt->rt6i_dst.plen = 128;
- dst_copy_metrics(&rt->dst, &ort->dst);
- rt->dst.error = ort->dst.error;
- rt->rt6i_idev = ort->rt6i_idev;
- if (rt->rt6i_idev)
- in6_dev_hold(rt->rt6i_idev);
- rt->dst.lastuse = jiffies;
-
- if (ort->rt6i_flags & RTF_GATEWAY)
- rt->rt6i_gateway = ort->rt6i_gateway;
- else
- rt->rt6i_gateway = *dest;
- rt->rt6i_flags = ort->rt6i_flags;
- rt6_set_from(rt, ort);
- rt->rt6i_metric = 0;
+ rt->rt6i_flags &= ~RTF_EXPIRES;
+ dst_hold(&from->dst);
+ rt->dst.from = &from->dst;
+ dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
+}
+static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
+{
+ rt->dst.input = ort->dst.input;
+ rt->dst.output = ort->dst.output;
+ rt->rt6i_dst = ort->rt6i_dst;
+ rt->dst.error = ort->dst.error;
+ rt->rt6i_idev = ort->rt6i_idev;
+ if (rt->rt6i_idev)
+ in6_dev_hold(rt->rt6i_idev);
+ rt->dst.lastuse = jiffies;
+ rt->rt6i_gateway = ort->rt6i_gateway;
+ rt->rt6i_flags = ort->rt6i_flags;
+ rt6_set_from(rt, ort);
+ rt->rt6i_metric = ort->rt6i_metric;
#ifdef CONFIG_IPV6_SUBTREES
- memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
+ rt->rt6i_src = ort->rt6i_src;
#endif
- memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
- rt->rt6i_table = ort->rt6i_table;
- }
- return rt;
+ rt->rt6i_prefsrc = ort->rt6i_prefsrc;
+ rt->rt6i_table = ort->rt6i_table;
}
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -2212,7 +2430,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
{
struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
- DST_NOCOUNT, NULL);
+ DST_NOCOUNT);
if (!rt)
return ERR_PTR(-ENOMEM);
@@ -2336,6 +2554,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev)
fib6_clean_all(net, fib6_ifdown, &adn);
icmp6_clean_all(fib6_ifdown, &adn);
+ rt6_uncached_list_flush_dev(net, dev);
}
struct rt6_mtu_change_arg {
@@ -2373,11 +2592,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
PMTU discouvery.
*/
if (rt->dst.dev == arg->dev &&
- !dst_metric_locked(&rt->dst, RTAX_MTU) &&
- (dst_mtu(&rt->dst) >= arg->mtu ||
- (dst_mtu(&rt->dst) < arg->mtu &&
- dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
- dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
+ !dst_metric_locked(&rt->dst, RTAX_MTU)) {
+ if (rt->rt6i_flags & RTF_CACHE) {
+ /* For RTF_CACHE with rt6i_pmtu == 0
+ * (i.e. a redirected route),
+ * the metrics of its rt->dst.from has already
+ * been updated.
+ */
+ if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
+ rt->rt6i_pmtu = arg->mtu;
+ } else if (dst_mtu(&rt->dst) >= arg->mtu ||
+ (dst_mtu(&rt->dst) < arg->mtu &&
+ dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+ dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
+ }
}
return 0;
}
@@ -2434,6 +2662,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rtm->rtm_type == RTN_LOCAL)
cfg->fc_flags |= RTF_LOCAL;
+ if (rtm->rtm_flags & RTM_F_CLONED)
+ cfg->fc_flags |= RTF_CACHE;
+
cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -2608,6 +2839,7 @@ static int rt6_fill_node(struct net *net,
int iif, int type, u32 portid, u32 seq,
int prefix, int nowait, unsigned int flags)
{
+ u32 metrics[RTAX_MAX];
struct rtmsg *rtm;
struct nlmsghdr *nlh;
long expires;
@@ -2721,7 +2953,10 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
}
- if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+ memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
+ if (rt->rt6i_pmtu)
+ metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
+ if (rtnetlink_put_metrics(skb, metrics) < 0)
goto nla_put_failure;
if (rt->rt6i_flags & RTF_GATEWAY) {
@@ -3216,6 +3451,7 @@ static struct notifier_block ip6_route_dev_notifier = {
int __init ip6_route_init(void)
{
int ret;
+ int cpu;
ret = -ENOMEM;
ip6_dst_ops_template.kmem_cachep =
@@ -3275,6 +3511,13 @@ int __init ip6_route_init(void)
if (ret)
goto out_register_late_subsys;
+ for_each_possible_cpu(cpu) {
+ struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
+
+ INIT_LIST_HEAD(&ul->head);
+ spin_lock_init(&ul->lock);
+ }
+
out:
return ret;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 21bc2eb53..0909f4e0d 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -41,23 +41,6 @@ static __u16 const msstab[] = {
9000 - 60,
};
-static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct sock *child;
-
- child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
- if (child) {
- atomic_set(&req->rsk_refcnt, 1);
- inet_csk_reqsk_queue_add(sk, req, child);
- } else {
- reqsk_free(req);
- }
- return child;
-}
-
static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
ipv6_cookie_scratch);
@@ -264,7 +247,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
- ret = get_cookie_sock(sk, skb, req, dst);
+ ret = tcp_get_cookie_sock(sk, skb, req, dst);
out:
return ret;
out_free:
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index abcc79f64..4e705add4 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -68,6 +68,13 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "flowlabel_state_ranges",
+ .data = &init_net.ipv6.sysctl.flowlabel_state_ranges,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -109,6 +116,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect;
ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries;
ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
+ ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5193d0953..a6f28765d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -100,8 +100,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
dst_hold(dst);
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
- if (rt->rt6i_node)
- inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+ inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
}
}
@@ -122,7 +121,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
- struct rt6_info *rt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -260,10 +258,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_gso_type = SKB_GSO_TCPV6;
__ip6_dst_store(sk, dst, NULL, NULL);
- rt = (struct rt6_info *) dst;
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))
+ ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
@@ -962,7 +959,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
&ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
if (req) {
nsk = tcp_check_req(sk, skb, req, false);
- if (!nsk)
+ if (!nsk || nsk == sk)
reqsk_put(req);
return nsk;
}
@@ -1268,7 +1265,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
+ if (tcp_checksum_complete(skb))
goto csum_err;
#ifdef CONFIG_TCP_STEALTH
@@ -1445,6 +1442,7 @@ process:
skb->dev = NULL;
bh_lock_sock_nested(sk);
+ tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
ret = 0;
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
@@ -1466,7 +1464,7 @@ no_tcp_socket:
tcp_v6_fill_cb(skb, hdr, th);
- if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ if (tcp_checksum_complete(skb)) {
csum_error:
TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
bad_packet:
@@ -1491,10 +1489,6 @@ do_time_wait:
tcp_v6_fill_cb(skb, hdr, th);
- if (skb->len < (th->doff<<2)) {
- inet_twsk_put(inet_twsk(sk));
- goto bad_packet;
- }
if (tcp_checksum_complete(skb)) {
inet_twsk_put(inet_twsk(sk));
goto csum_error;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f337a908a..ed0583c1b 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -71,20 +71,12 @@ static int xfrm6_get_tos(const struct flowi *fl)
return 0;
}
-static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
-{
- struct rt6_info *rt = (struct rt6_info *)xdst;
-
- rt6_init_peer(rt, net->ipv6.peers);
-}
-
static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
if (dst->ops->family == AF_INET6) {
struct rt6_info *rt = (struct rt6_info *)dst;
- if (rt->rt6i_node)
- path->path_cookie = rt->rt6i_node->fn_sernum;
+ path->path_cookie = rt6_get_cookie(rt);
}
path->u.rt6.rt6i_nfheader_len = nfheader_len;
@@ -106,16 +98,13 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
return -ENODEV;
}
- rt6_transfer_peer(&xdst->u.rt6, rt);
-
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
RTF_LOCAL);
xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
xdst->u.rt6.rt6i_node = rt->rt6i_node;
- if (rt->rt6i_node)
- xdst->route_cookie = rt->rt6i_node->fn_sernum;
+ xdst->route_cookie = rt6_get_cookie(rt);
xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
xdst->u.rt6.rt6i_src = rt->rt6i_src;
@@ -255,10 +244,6 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
if (likely(xdst->u.rt6.rt6i_idev))
in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
- if (rt6_has_peer(&xdst->u.rt6)) {
- struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6);
- inet_putpeer(peer);
- }
xfrm_dst_destroy(xdst);
}
@@ -308,7 +293,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
.get_tos = xfrm6_get_tos,
- .init_dst = xfrm6_init_dst,
.init_path = xfrm6_init_path,
.fill_dst = xfrm6_fill_dst,
.blackhole_route = ip6_blackhole_route,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 4ea5d7497..48d0dc89b 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1347,7 +1347,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol,
goto out;
rc = -ENOMEM;
- sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto);
+ sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, kern);
if (!sk)
goto out;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index ee0ea25c8..fae6822cc 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1100,7 +1100,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
}
/* Allocate networking socket */
- sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto);
+ sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto, kern);
if (sk == NULL)
return -ENOMEM;
diff --git a/net/irda/timer.c b/net/irda/timer.c
index 0c4c115a5..f2280f73b 100644
--- a/net/irda/timer.c
+++ b/net/irda/timer.c
@@ -60,8 +60,8 @@ void irlap_start_query_timer(struct irlap_cb *self, int S, int s)
* to avoid messing with for incoming connections requests and
* to accommodate devices that perform discovery slower than us.
* Jean II */
- timeout = ((sysctl_slot_timeout * HZ / 1000) * (S - s)
- + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT);
+ timeout = msecs_to_jiffies(sysctl_slot_timeout) * (S - s)
+ + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT;
/* Set or re-set the timer. We reset the timer for each received
* discovery query, which allow us to automatically adjust to
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 6daa52a18..918151c11 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -535,12 +535,12 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent)
sk->sk_type = parent->sk_type;
}
-static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
+static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, int kern)
{
struct sock *sk;
struct iucv_sock *iucv;
- sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto);
+ sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto, kern);
if (!sk)
return NULL;
iucv = iucv_sk(sk);
@@ -602,7 +602,7 @@ static int iucv_sock_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL);
+ sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern);
if (!sk)
return -ENOMEM;
@@ -1723,7 +1723,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
}
/* Create the new socket */
- nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
+ nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
if (!nsk) {
err = pr_iucv->path_sever(path, user_data);
iucv_path_free(path);
@@ -1933,7 +1933,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
goto out;
}
- nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
+ nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
bh_lock_sock(sk);
if ((sk->sk_state != IUCV_LISTEN) ||
sk_acceptq_is_full(sk) ||
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f0d52d721..83a706887 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -149,7 +149,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
return -EPROTONOSUPPORT;
err = -ENOMEM;
- sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto);
+ sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern);
if (sk == NULL)
goto out;
@@ -219,7 +219,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
#define BROADCAST_ONE 1
#define BROADCAST_REGISTERED 2
#define BROADCAST_PROMISC_ONLY 4
-static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
+static int pfkey_broadcast(struct sk_buff *skb,
int broadcast_flags, struct sock *one_sk,
struct net *net)
{
@@ -244,7 +244,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
* socket.
*/
if (pfk->promisc)
- pfkey_broadcast_one(skb, &skb2, allocation, sk);
+ pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
/* the exact target will be processed later */
if (sk == one_sk)
@@ -259,7 +259,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
continue;
}
- err2 = pfkey_broadcast_one(skb, &skb2, allocation, sk);
+ err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
/* Error is cleare after succecful sending to at least one
* registered KM */
@@ -269,7 +269,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
rcu_read_unlock();
if (one_sk != NULL)
- err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
+ err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk);
kfree_skb(skb2);
kfree_skb(skb);
@@ -292,7 +292,7 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
hdr = (struct sadb_msg *) pfk->dump.skb->data;
hdr->sadb_msg_seq = 0;
hdr->sadb_msg_errno = rc;
- pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = NULL;
}
@@ -333,7 +333,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
hdr->sadb_msg_len = (sizeof(struct sadb_msg) /
sizeof(uint64_t));
- pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk));
+ pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
return 0;
}
@@ -1190,6 +1190,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
memcpy(x->ealg->alg_key, key+1, keysize);
}
x->props.ealgo = sa->sadb_sa_encrypt;
+ x->geniv = a->uinfo.encr.geniv;
}
}
/* x->algo.flags = sa->sadb_sa_flags; */
@@ -1364,7 +1365,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
xfrm_state_put(x);
- pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net);
+ pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net);
return 0;
}
@@ -1451,7 +1452,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c)
hdr->sadb_msg_seq = c->seq;
hdr->sadb_msg_pid = c->portid;
- pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x));
+ pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x));
return 0;
}
@@ -1564,7 +1565,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg
out_hdr->sadb_msg_reserved = 0;
out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
- pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
+ pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk));
return 0;
}
@@ -1669,7 +1670,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
return -ENOBUFS;
}
- pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, sock_net(sk));
+ pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk));
return 0;
}
@@ -1688,7 +1689,7 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr)
hdr->sadb_msg_errno = (uint8_t) 0;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
- return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
+ return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
}
static int key_notify_sa_flush(const struct km_event *c)
@@ -1709,7 +1710,7 @@ static int key_notify_sa_flush(const struct km_event *c)
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
hdr->sadb_msg_reserved = 0;
- pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
+ pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net);
return 0;
}
@@ -1766,7 +1767,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr)
out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
if (pfk->dump.skb)
- pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = out_skb;
@@ -1846,7 +1847,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb
new_hdr->sadb_msg_errno = 0;
}
- pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk));
+ pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk));
return 0;
}
@@ -2180,7 +2181,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev
out_hdr->sadb_msg_errno = 0;
out_hdr->sadb_msg_seq = c->seq;
out_hdr->sadb_msg_pid = c->portid;
- pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp));
+ pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp));
return 0;
}
@@ -2400,7 +2401,7 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc
out_hdr->sadb_msg_errno = 0;
out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
- pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp));
+ pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp));
err = 0;
out:
@@ -2654,7 +2655,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
if (pfk->dump.skb)
- pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = out_skb;
@@ -2707,7 +2708,7 @@ static int key_notify_policy_flush(const struct km_event *c)
hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
hdr->sadb_msg_reserved = 0;
- pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
+ pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net);
return 0;
}
@@ -2769,7 +2770,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
void *ext_hdrs[SADB_EXT_MAX];
int err;
- pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
+ pfkey_broadcast(skb_clone(skb, GFP_KERNEL),
BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
memset(ext_hdrs, 0, sizeof(ext_hdrs));
@@ -2991,7 +2992,7 @@ static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c)
out_hdr->sadb_msg_seq = 0;
out_hdr->sadb_msg_pid = 0;
- pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x));
+ pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x));
return 0;
}
@@ -3181,7 +3182,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
xfrm_ctx->ctx_len);
}
- return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x));
+ return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
}
static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
@@ -3379,7 +3380,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
n_port->sadb_x_nat_t_port_port = sport;
n_port->sadb_x_nat_t_port_reserved = 0;
- return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x));
+ return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
}
#ifdef CONFIG_NET_KEY_MIGRATE
@@ -3571,7 +3572,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* broadcast migrate message to sockets */
- pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net);
+ pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net);
return 0;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index a29a50449..f6b090df3 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1334,9 +1334,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
if (sock)
inet_shutdown(sock, 2);
} else {
- if (sock)
+ if (sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sk);
+ sock_release(sock);
+ }
}
l2tp_tunnel_sock_put(sk);
@@ -1399,13 +1400,11 @@ static int l2tp_tunnel_sock_create(struct net *net,
if (cfg->local_ip6 && cfg->peer_ip6) {
struct sockaddr_l2tpip6 ip6_addr = {0};
- err = sock_create_kern(AF_INET6, SOCK_DGRAM,
+ err = sock_create_kern(net, AF_INET6, SOCK_DGRAM,
IPPROTO_L2TP, &sock);
if (err < 0)
goto out;
- sk_change_net(sock->sk, net);
-
ip6_addr.l2tp_family = AF_INET6;
memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6,
sizeof(ip6_addr.l2tp_addr));
@@ -1429,13 +1428,11 @@ static int l2tp_tunnel_sock_create(struct net *net,
{
struct sockaddr_l2tpip ip_addr = {0};
- err = sock_create_kern(AF_INET, SOCK_DGRAM,
+ err = sock_create_kern(net, AF_INET, SOCK_DGRAM,
IPPROTO_L2TP, &sock);
if (err < 0)
goto out;
- sk_change_net(sock->sk, net);
-
ip_addr.l2tp_family = AF_INET;
ip_addr.l2tp_addr = cfg->local_ip;
ip_addr.l2tp_conn_id = tunnel_id;
@@ -1462,7 +1459,7 @@ out:
*sockp = sock;
if ((err < 0) && sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sock->sk);
+ sock_release(sock);
*sockp = NULL;
}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index e9b0dec56..f56c9f69e 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -542,12 +542,12 @@ static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb)
/* socket() handler. Initialize a new struct sock.
*/
-static int pppol2tp_create(struct net *net, struct socket *sock)
+static int pppol2tp_create(struct net *net, struct socket *sock, int kern)
{
int error = -ENOMEM;
struct sock *sk;
- sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
+ sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto, kern);
if (!sk)
goto out;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 17a8dff06..8dab4e569 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -168,7 +168,7 @@ static int llc_ui_create(struct net *net, struct socket *sock, int protocol,
if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) {
rc = -ENOMEM;
- sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto);
+ sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto, kern);
if (sk) {
rc = 0;
llc_ui_sk_init(sock, sk);
@@ -613,7 +613,7 @@ static int llc_wait_data(struct sock *sk, long timeo)
if (signal_pending(current))
break;
rc = 0;
- if (sk_wait_data(sk, &timeo))
+ if (sk_wait_data(sk, &timeo, NULL))
break;
}
return rc;
@@ -802,7 +802,7 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
release_sock(sk);
lock_sock(sk);
} else
- sk_wait_data(sk, &timeo);
+ sk_wait_data(sk, &timeo, NULL);
if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) {
net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n",
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 81a61fce3..3e821daf9 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -768,7 +768,7 @@ static struct sock *llc_create_incoming_sock(struct sock *sk,
struct llc_addr *daddr)
{
struct sock *newsk = llc_sk_alloc(sock_net(sk), sk->sk_family, GFP_ATOMIC,
- sk->sk_prot);
+ sk->sk_prot, 0);
struct llc_sock *newllc, *llc = llc_sk(sk);
if (!newsk)
@@ -931,9 +931,9 @@ static void llc_sk_init(struct sock *sk)
* Allocates a LLC sock and initializes it. Returns the new LLC sock
* or %NULL if there's no memory available for one
*/
-struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot)
+struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern)
{
- struct sock *sk = sk_alloc(net, family, priority, prot);
+ struct sock *sk = sk_alloc(net, family, priority, prot, kern);
if (!sk)
goto out;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 64a012a0c..086de496a 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -302,6 +302,20 @@ config MAC80211_DEBUG_COUNTERS
---help---
Selecting this option causes mac80211 to keep additional
and very verbose statistics about TX and RX handler use
- and show them in debugfs.
+ as well as a few selected dot11 counters. These will be
+ exposed in debugfs.
+
+ Note that some of the counters are not concurrency safe
+ and may thus not always be accurate.
If unsure, say N.
+
+config MAC80211_STA_HASH_MAX_SIZE
+ int "Station hash table maximum size" if MAC80211_DEBUG_MENU
+ default 0
+ ---help---
+ Setting this option to a low value (e.g. 4) allows testing the
+ hash table with collisions relatively deterministically (just
+ connect more stations than the number selected here.)
+
+ If unsure, leave the default of 0.
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 208df7c0b..7663c28ba 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -11,9 +11,8 @@
#include <linux/kernel.h>
#include <linux/types.h>
-#include <linux/crypto.h>
#include <linux/err.h>
-#include <crypto/aes.h>
+#include <crypto/aead.h>
#include <net/mac80211.h>
#include "key.h"
@@ -23,7 +22,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
u8 *data, size_t data_len, u8 *mic,
size_t mic_len)
{
- struct scatterlist assoc, pt, ct[2];
+ struct scatterlist sg[3];
char aead_req_data[sizeof(struct aead_request) +
crypto_aead_reqsize(tfm)]
@@ -32,15 +31,14 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
memset(aead_req, 0, sizeof(aead_req_data));
- sg_init_one(&pt, data, data_len);
- sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
- sg_init_table(ct, 2);
- sg_set_buf(&ct[0], data, data_len);
- sg_set_buf(&ct[1], mic, mic_len);
+ sg_init_table(sg, 3);
+ sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+ sg_set_buf(&sg[1], data, data_len);
+ sg_set_buf(&sg[2], mic, mic_len);
aead_request_set_tfm(aead_req, tfm);
- aead_request_set_assoc(aead_req, &assoc, assoc.length);
- aead_request_set_crypt(aead_req, &pt, ct, data_len, b_0);
+ aead_request_set_crypt(aead_req, sg, sg, data_len, b_0);
+ aead_request_set_ad(aead_req, sg[0].length);
crypto_aead_encrypt(aead_req);
}
@@ -49,7 +47,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
u8 *data, size_t data_len, u8 *mic,
size_t mic_len)
{
- struct scatterlist assoc, pt, ct[2];
+ struct scatterlist sg[3];
char aead_req_data[sizeof(struct aead_request) +
crypto_aead_reqsize(tfm)]
__aligned(__alignof__(struct aead_request));
@@ -60,15 +58,14 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
memset(aead_req, 0, sizeof(aead_req_data));
- sg_init_one(&pt, data, data_len);
- sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
- sg_init_table(ct, 2);
- sg_set_buf(&ct[0], data, data_len);
- sg_set_buf(&ct[1], mic, mic_len);
+ sg_init_table(sg, 3);
+ sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+ sg_set_buf(&sg[1], data, data_len);
+ sg_set_buf(&sg[2], mic, mic_len);
aead_request_set_tfm(aead_req, tfm);
- aead_request_set_assoc(aead_req, &assoc, assoc.length);
- aead_request_set_crypt(aead_req, ct, &pt, data_len + mic_len, b_0);
+ aead_request_set_crypt(aead_req, sg, sg, data_len + mic_len, b_0);
+ aead_request_set_ad(aead_req, sg[0].length);
return crypto_aead_decrypt(aead_req);
}
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
index fd278bbe1..3afe361fd 100644
--- a/net/mac80211/aes_gcm.c
+++ b/net/mac80211/aes_gcm.c
@@ -8,9 +8,8 @@
#include <linux/kernel.h>
#include <linux/types.h>
-#include <linux/crypto.h>
#include <linux/err.h>
-#include <crypto/aes.h>
+#include <crypto/aead.h>
#include <net/mac80211.h>
#include "key.h"
@@ -19,7 +18,7 @@
void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
u8 *data, size_t data_len, u8 *mic)
{
- struct scatterlist assoc, pt, ct[2];
+ struct scatterlist sg[3];
char aead_req_data[sizeof(struct aead_request) +
crypto_aead_reqsize(tfm)]
@@ -28,15 +27,14 @@ void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
memset(aead_req, 0, sizeof(aead_req_data));
- sg_init_one(&pt, data, data_len);
- sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
- sg_init_table(ct, 2);
- sg_set_buf(&ct[0], data, data_len);
- sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN);
+ sg_init_table(sg, 3);
+ sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+ sg_set_buf(&sg[1], data, data_len);
+ sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
aead_request_set_tfm(aead_req, tfm);
- aead_request_set_assoc(aead_req, &assoc, assoc.length);
- aead_request_set_crypt(aead_req, &pt, ct, data_len, j_0);
+ aead_request_set_crypt(aead_req, sg, sg, data_len, j_0);
+ aead_request_set_ad(aead_req, sg[0].length);
crypto_aead_encrypt(aead_req);
}
@@ -44,7 +42,7 @@ void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
u8 *data, size_t data_len, u8 *mic)
{
- struct scatterlist assoc, pt, ct[2];
+ struct scatterlist sg[3];
char aead_req_data[sizeof(struct aead_request) +
crypto_aead_reqsize(tfm)]
__aligned(__alignof__(struct aead_request));
@@ -55,16 +53,15 @@ int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
memset(aead_req, 0, sizeof(aead_req_data));
- sg_init_one(&pt, data, data_len);
- sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
- sg_init_table(ct, 2);
- sg_set_buf(&ct[0], data, data_len);
- sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN);
+ sg_init_table(sg, 3);
+ sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+ sg_set_buf(&sg[1], data, data_len);
+ sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
aead_request_set_tfm(aead_req, tfm);
- aead_request_set_assoc(aead_req, &assoc, assoc.length);
- aead_request_set_crypt(aead_req, ct, &pt,
+ aead_request_set_crypt(aead_req, sg, sg,
data_len + IEEE80211_GCMP_MIC_LEN, j_0);
+ aead_request_set_ad(aead_req, sg[0].length);
return crypto_aead_decrypt(aead_req);
}
diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c
index f1321b7d6..3ddd927aa 100644
--- a/net/mac80211/aes_gmac.c
+++ b/net/mac80211/aes_gmac.c
@@ -9,8 +9,8 @@
#include <linux/kernel.h>
#include <linux/types.h>
-#include <linux/crypto.h>
#include <linux/err.h>
+#include <crypto/aead.h>
#include <crypto/aes.h>
#include <net/mac80211.h>
@@ -24,7 +24,7 @@
int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
const u8 *data, size_t data_len, u8 *mic)
{
- struct scatterlist sg[3], ct[1];
+ struct scatterlist sg[4];
char aead_req_data[sizeof(struct aead_request) +
crypto_aead_reqsize(tfm)]
__aligned(__alignof__(struct aead_request));
@@ -37,21 +37,19 @@ int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
memset(aead_req, 0, sizeof(aead_req_data));
memset(zero, 0, GMAC_MIC_LEN);
- sg_init_table(sg, 3);
+ sg_init_table(sg, 4);
sg_set_buf(&sg[0], aad, AAD_LEN);
sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN);
sg_set_buf(&sg[2], zero, GMAC_MIC_LEN);
+ sg_set_buf(&sg[3], mic, GMAC_MIC_LEN);
memcpy(iv, nonce, GMAC_NONCE_LEN);
memset(iv + GMAC_NONCE_LEN, 0, sizeof(iv) - GMAC_NONCE_LEN);
iv[AES_BLOCK_SIZE - 1] = 0x01;
- sg_init_table(ct, 1);
- sg_set_buf(&ct[0], mic, GMAC_MIC_LEN);
-
aead_request_set_tfm(aead_req, tfm);
- aead_request_set_assoc(aead_req, sg, AAD_LEN + data_len);
- aead_request_set_crypt(aead_req, NULL, ct, 0, iv);
+ aead_request_set_crypt(aead_req, sg, sg, 0, iv);
+ aead_request_set_ad(aead_req, AAD_LEN + data_len);
crypto_aead_encrypt(aead_req);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index cce9d425c..c8ba2e777 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -564,8 +564,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
return -EINVAL;
if ((tid >= IEEE80211_NUM_TIDS) ||
- !(local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) ||
- (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW))
+ !ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) ||
+ ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW))
return -EINVAL;
ht_dbg(sdata, "Open BA session requested for %pM tid %u\n",
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f06d42267..bf7023f6c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2,7 +2,7 @@
* mac80211 configuration hooks for cfg80211
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
- * Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright 2013-2015 Intel Mobile Communications GmbH
*
* This file is GPLv2 as found in COPYING.
*/
@@ -137,6 +137,9 @@ static int ieee80211_set_noack_map(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
sdata->noack_map = noack_map;
+
+ ieee80211_check_fast_xmit_iface(sdata);
+
return 0;
}
@@ -309,6 +312,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
u32 iv32;
u16 iv16;
int err = -ENOENT;
+ struct ieee80211_key_seq kseq = {};
sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -339,10 +343,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
iv32 = key->u.tkip.tx.iv32;
iv16 = key->u.tkip.tx.iv16;
- if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
- drv_get_tkip_seq(sdata->local,
- key->conf.hw_key_idx,
- &iv32, &iv16);
+ if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
+ drv_get_key_seq(sdata->local, key, &kseq);
+ iv32 = kseq.tkip.iv32;
+ iv16 = kseq.tkip.iv16;
+ }
seq[0] = iv16 & 0xff;
seq[1] = (iv16 >> 8) & 0xff;
@@ -355,52 +361,44 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
break;
case WLAN_CIPHER_SUITE_CCMP:
case WLAN_CIPHER_SUITE_CCMP_256:
- pn64 = atomic64_read(&key->u.ccmp.tx_pn);
- seq[0] = pn64;
- seq[1] = pn64 >> 8;
- seq[2] = pn64 >> 16;
- seq[3] = pn64 >> 24;
- seq[4] = pn64 >> 32;
- seq[5] = pn64 >> 40;
- params.seq = seq;
- params.seq_len = 6;
- break;
case WLAN_CIPHER_SUITE_AES_CMAC:
case WLAN_CIPHER_SUITE_BIP_CMAC_256:
- pn64 = atomic64_read(&key->u.aes_cmac.tx_pn);
- seq[0] = pn64;
- seq[1] = pn64 >> 8;
- seq[2] = pn64 >> 16;
- seq[3] = pn64 >> 24;
- seq[4] = pn64 >> 32;
- seq[5] = pn64 >> 40;
- params.seq = seq;
- params.seq_len = 6;
- break;
+ BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
+ offsetof(typeof(kseq), aes_cmac));
case WLAN_CIPHER_SUITE_BIP_GMAC_128:
case WLAN_CIPHER_SUITE_BIP_GMAC_256:
- pn64 = atomic64_read(&key->u.aes_gmac.tx_pn);
- seq[0] = pn64;
- seq[1] = pn64 >> 8;
- seq[2] = pn64 >> 16;
- seq[3] = pn64 >> 24;
- seq[4] = pn64 >> 32;
- seq[5] = pn64 >> 40;
- params.seq = seq;
- params.seq_len = 6;
- break;
+ BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
+ offsetof(typeof(kseq), aes_gmac));
case WLAN_CIPHER_SUITE_GCMP:
case WLAN_CIPHER_SUITE_GCMP_256:
- pn64 = atomic64_read(&key->u.gcmp.tx_pn);
- seq[0] = pn64;
- seq[1] = pn64 >> 8;
- seq[2] = pn64 >> 16;
- seq[3] = pn64 >> 24;
- seq[4] = pn64 >> 32;
- seq[5] = pn64 >> 40;
+ BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
+ offsetof(typeof(kseq), gcmp));
+
+ if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
+ drv_get_key_seq(sdata->local, key, &kseq);
+ memcpy(seq, kseq.ccmp.pn, 6);
+ } else {
+ pn64 = atomic64_read(&key->conf.tx_pn);
+ seq[0] = pn64;
+ seq[1] = pn64 >> 8;
+ seq[2] = pn64 >> 16;
+ seq[3] = pn64 >> 24;
+ seq[4] = pn64 >> 32;
+ seq[5] = pn64 >> 40;
+ }
params.seq = seq;
params.seq_len = 6;
break;
+ default:
+ if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+ break;
+ if (WARN_ON(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
+ break;
+ drv_get_key_seq(sdata->local, key, &kseq);
+ params.seq = kseq.hw.seq;
+ params.seq_len = kseq.hw.seq_len;
+ break;
}
params.key = key->conf.key;
@@ -1372,6 +1370,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
}
sta->sdata = vlansdata;
+ ieee80211_check_fast_xmit(sta);
if (sta->sta_state == IEEE80211_STA_AUTHORIZED &&
prev_4addr != new_4addr) {
@@ -1764,7 +1763,7 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
/* our RSSI threshold implementation is supported only for
* devices that report signal in dBm.
*/
- if (!(sdata->local->hw.flags & IEEE80211_HW_SIGNAL_DBM))
+ if (!ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM))
return -ENOTSUPP;
conf->rssi_threshold = nconf->rssi_threshold;
}
@@ -2099,10 +2098,14 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
int err;
if (changed & WIPHY_PARAM_FRAG_THRESHOLD) {
+ ieee80211_check_fast_xmit_all(local);
+
err = drv_set_frag_threshold(local, wiphy->frag_threshold);
- if (err)
+ if (err) {
+ ieee80211_check_fast_xmit_all(local);
return err;
+ }
}
if ((changed & WIPHY_PARAM_COVERAGE_CLASS) ||
@@ -2404,7 +2407,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
if (sdata->vif.type != NL80211_IFTYPE_STATION)
return -EOPNOTSUPP;
- if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS))
+ if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS))
return -EOPNOTSUPP;
if (enabled == sdata->u.mgd.powersave &&
@@ -2419,7 +2422,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
__ieee80211_request_smps_mgd(sdata, sdata->u.mgd.req_smps);
sdata_unlock(sdata);
- if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
+ if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
ieee80211_recalc_ps(local, -1);
@@ -2463,7 +2466,7 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
if (!ieee80211_sdata_running(sdata))
return -ENETDOWN;
- if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) {
+ if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
ret = drv_set_bitrate_mask(local, sdata, mask);
if (ret)
return ret;
@@ -2514,6 +2517,19 @@ static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local,
return true;
}
+static u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local)
+{
+ lockdep_assert_held(&local->mtx);
+
+ local->roc_cookie_counter++;
+
+ /* wow, you wrapped 64 bits ... more likely a bug */
+ if (WARN_ON(local->roc_cookie_counter == 0))
+ local->roc_cookie_counter++;
+
+ return local->roc_cookie_counter;
+}
+
static int ieee80211_start_roc_work(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *channel,
@@ -2551,7 +2567,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
roc->req_duration = duration;
roc->frame = txskb;
roc->type = type;
- roc->mgmt_tx_cookie = (unsigned long)txskb;
roc->sdata = sdata;
INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work);
INIT_LIST_HEAD(&roc->dependents);
@@ -2561,17 +2576,10 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
* or the SKB (for mgmt TX)
*/
if (!txskb) {
- /* local->mtx protects this */
- local->roc_cookie_counter++;
- roc->cookie = local->roc_cookie_counter;
- /* wow, you wrapped 64 bits ... more likely a bug */
- if (WARN_ON(roc->cookie == 0)) {
- roc->cookie = 1;
- local->roc_cookie_counter++;
- }
+ roc->cookie = ieee80211_mgmt_tx_cookie(local);
*cookie = roc->cookie;
} else {
- *cookie = (unsigned long)txskb;
+ roc->mgmt_tx_cookie = *cookie;
}
/* if there's one pending or we're scanning, queue this one */
@@ -3244,13 +3252,43 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
return err;
}
+static struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local,
+ struct sk_buff *skb, u64 *cookie,
+ gfp_t gfp)
+{
+ unsigned long spin_flags;
+ struct sk_buff *ack_skb;
+ int id;
+
+ ack_skb = skb_copy(skb, gfp);
+ if (!ack_skb)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_irqsave(&local->ack_status_lock, spin_flags);
+ id = idr_alloc(&local->ack_status_frames, ack_skb,
+ 1, 0x10000, GFP_ATOMIC);
+ spin_unlock_irqrestore(&local->ack_status_lock, spin_flags);
+
+ if (id < 0) {
+ kfree_skb(ack_skb);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ IEEE80211_SKB_CB(skb)->ack_frame_id = id;
+
+ *cookie = ieee80211_mgmt_tx_cookie(local);
+ IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie;
+
+ return ack_skb;
+}
+
static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
struct cfg80211_mgmt_tx_params *params,
u64 *cookie)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
struct ieee80211_local *local = sdata->local;
- struct sk_buff *skb;
+ struct sk_buff *skb, *ack_skb;
struct sta_info *sta;
const struct ieee80211_mgmt *mgmt = (void *)params->buf;
bool need_offchan = false;
@@ -3299,8 +3337,14 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
- if (!sdata->u.mgd.associated)
+ sdata_lock(sdata);
+ if (!sdata->u.mgd.associated ||
+ (params->offchan && params->wait &&
+ local->ops->remain_on_channel &&
+ memcmp(sdata->u.mgd.associated->bssid,
+ mgmt->bssid, ETH_ALEN)))
need_offchan = true;
+ sdata_unlock(sdata);
break;
case NL80211_IFTYPE_P2P_DEVICE:
need_offchan = true;
@@ -3383,8 +3427,23 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
skb->dev = sdata->dev;
+ if (!params->dont_wait_for_ack) {
+ /* make a copy to preserve the frame contents
+ * in case of encryption.
+ */
+ ack_skb = ieee80211_make_ack_skb(local, skb, cookie,
+ GFP_KERNEL);
+ if (IS_ERR(ack_skb)) {
+ ret = PTR_ERR(ack_skb);
+ kfree_skb(skb);
+ goto out_unlock;
+ }
+ } else {
+ /* for cookie below */
+ ack_skb = skb;
+ }
+
if (!need_offchan) {
- *cookie = (unsigned long) skb;
ieee80211_tx_skb(sdata, skb);
ret = 0;
goto out_unlock;
@@ -3392,7 +3451,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN |
IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
- if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
+ if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
IEEE80211_SKB_CB(skb)->hw_queue =
local->hw.offchannel_tx_hw_queue;
@@ -3477,7 +3536,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
struct ieee80211_qos_hdr *nullfunc;
- struct sk_buff *skb;
+ struct sk_buff *skb, *ack_skb;
int size = sizeof(*nullfunc);
__le16 fc;
bool qos;
@@ -3485,20 +3544,24 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
struct sta_info *sta;
struct ieee80211_chanctx_conf *chanctx_conf;
enum ieee80211_band band;
+ int ret;
+
+ /* the lock is needed to assign the cookie later */
+ mutex_lock(&local->mtx);
rcu_read_lock();
chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
- rcu_read_unlock();
- return -EINVAL;
+ ret = -EINVAL;
+ goto unlock;
}
band = chanctx_conf->def.chan->band;
sta = sta_info_get_bss(sdata, peer);
if (sta) {
qos = sta->sta.wme;
} else {
- rcu_read_unlock();
- return -ENOLINK;
+ ret = -ENOLINK;
+ goto unlock;
}
if (qos) {
@@ -3514,8 +3577,8 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
skb = dev_alloc_skb(local->hw.extra_tx_headroom + size);
if (!skb) {
- rcu_read_unlock();
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto unlock;
}
skb->dev = dev;
@@ -3541,13 +3604,23 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
if (qos)
nullfunc->qos_ctrl = cpu_to_le16(7);
+ ack_skb = ieee80211_make_ack_skb(local, skb, cookie, GFP_ATOMIC);
+ if (IS_ERR(ack_skb)) {
+ kfree_skb(skb);
+ ret = PTR_ERR(ack_skb);
+ goto unlock;
+ }
+
local_bh_disable();
ieee80211_xmit(sdata, sta, skb);
local_bh_enable();
+
+ ret = 0;
+unlock:
rcu_read_unlock();
+ mutex_unlock(&local->mtx);
- *cookie = (unsigned long) skb;
- return 0;
+ return ret;
}
static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 5bcd4e558..f01c18a31 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -664,6 +664,8 @@ out:
ieee80211_bss_info_change_notify(sdata,
BSS_CHANGED_IDLE);
+ ieee80211_check_fast_xmit_iface(sdata);
+
return ret;
}
@@ -1008,6 +1010,8 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
if (WARN_ON(!chandef))
return -EINVAL;
+ ieee80211_change_chanctx(local, new_ctx, chandef);
+
vif_chsw[0].vif = &sdata->vif;
vif_chsw[0].old_ctx = &old_ctx->conf;
vif_chsw[0].new_ctx = &new_ctx->conf;
@@ -1030,6 +1034,8 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
if (sdata->vif.type == NL80211_IFTYPE_AP)
__ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
+ ieee80211_check_fast_xmit_iface(sdata);
+
if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
ieee80211_free_chanctx(local, old_ctx);
@@ -1079,6 +1085,8 @@ ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata)
if (WARN_ON(!chandef))
return -EINVAL;
+ ieee80211_change_chanctx(local, new_ctx, chandef);
+
list_del(&sdata->reserved_chanctx_list);
sdata->reserved_chanctx = NULL;
@@ -1376,6 +1384,8 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
__ieee80211_vif_copy_chanctx_to_vlans(sdata,
false);
+ ieee80211_check_fast_xmit_iface(sdata);
+
sdata->radar_required = sdata->reserved_radar_required;
if (sdata->vif.bss_conf.chandef.width !=
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 23813ebb3..3ea8b7de9 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -1,4 +1,3 @@
-
/*
* mac80211 debugfs for wireless PHYs
*
@@ -92,62 +91,66 @@ static const struct file_operations reset_ops = {
};
#endif
+static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
+#define FLAG(F) [IEEE80211_HW_##F] = #F
+ FLAG(HAS_RATE_CONTROL),
+ FLAG(RX_INCLUDES_FCS),
+ FLAG(HOST_BROADCAST_PS_BUFFERING),
+ FLAG(SIGNAL_UNSPEC),
+ FLAG(SIGNAL_DBM),
+ FLAG(NEED_DTIM_BEFORE_ASSOC),
+ FLAG(SPECTRUM_MGMT),
+ FLAG(AMPDU_AGGREGATION),
+ FLAG(SUPPORTS_PS),
+ FLAG(PS_NULLFUNC_STACK),
+ FLAG(SUPPORTS_DYNAMIC_PS),
+ FLAG(MFP_CAPABLE),
+ FLAG(WANT_MONITOR_VIF),
+ FLAG(NO_AUTO_VIF),
+ FLAG(SW_CRYPTO_CONTROL),
+ FLAG(SUPPORT_FAST_XMIT),
+ FLAG(REPORTS_TX_ACK_STATUS),
+ FLAG(CONNECTION_MONITOR),
+ FLAG(QUEUE_CONTROL),
+ FLAG(SUPPORTS_PER_STA_GTK),
+ FLAG(AP_LINK_PS),
+ FLAG(TX_AMPDU_SETUP_IN_HW),
+ FLAG(SUPPORTS_RC_TABLE),
+ FLAG(P2P_DEV_ADDR_FOR_INTF),
+ FLAG(TIMING_BEACON_ONLY),
+ FLAG(SUPPORTS_HT_CCK_RATES),
+ FLAG(CHANCTX_STA_CSA),
+ FLAG(SUPPORTS_CLONED_SKBS),
+ FLAG(SINGLE_SCAN_ON_ALL_BANDS),
+
+ /* keep last for the build bug below */
+ (void *)0x1
+#undef FLAG
+};
+
static ssize_t hwflags_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
struct ieee80211_local *local = file->private_data;
- int mxln = 500;
+ size_t bufsz = 30 * NUM_IEEE80211_HW_FLAGS;
+ char *buf = kzalloc(bufsz, GFP_KERNEL);
+ char *pos = buf, *end = buf + bufsz - 1;
ssize_t rv;
- char *buf = kzalloc(mxln, GFP_KERNEL);
- int sf = 0; /* how many written so far */
+ int i;
if (!buf)
- return 0;
-
- sf += scnprintf(buf, mxln - sf, "0x%x\n", local->hw.flags);
- if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
- sf += scnprintf(buf + sf, mxln - sf, "HAS_RATE_CONTROL\n");
- if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
- sf += scnprintf(buf + sf, mxln - sf, "RX_INCLUDES_FCS\n");
- if (local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)
- sf += scnprintf(buf + sf, mxln - sf,
- "HOST_BCAST_PS_BUFFERING\n");
- if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)
- sf += scnprintf(buf + sf, mxln - sf,
- "2GHZ_SHORT_SLOT_INCAPABLE\n");
- if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)
- sf += scnprintf(buf + sf, mxln - sf,
- "2GHZ_SHORT_PREAMBLE_INCAPABLE\n");
- if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
- sf += scnprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n");
- if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
- sf += scnprintf(buf + sf, mxln - sf, "SIGNAL_DBM\n");
- if (local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC)
- sf += scnprintf(buf + sf, mxln - sf,
- "NEED_DTIM_BEFORE_ASSOC\n");
- if (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)
- sf += scnprintf(buf + sf, mxln - sf, "SPECTRUM_MGMT\n");
- if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)
- sf += scnprintf(buf + sf, mxln - sf, "AMPDU_AGGREGATION\n");
- if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS)
- sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_PS\n");
- if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
- sf += scnprintf(buf + sf, mxln - sf, "PS_NULLFUNC_STACK\n");
- if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
- sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n");
- if (local->hw.flags & IEEE80211_HW_MFP_CAPABLE)
- sf += scnprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n");
- if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
- sf += scnprintf(buf + sf, mxln - sf,
- "REPORTS_TX_ACK_STATUS\n");
- if (local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
- sf += scnprintf(buf + sf, mxln - sf, "CONNECTION_MONITOR\n");
- if (local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK)
- sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_PER_STA_GTK\n");
- if (local->hw.flags & IEEE80211_HW_AP_LINK_PS)
- sf += scnprintf(buf + sf, mxln - sf, "AP_LINK_PS\n");
- if (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)
- sf += scnprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n");
+ return -ENOMEM;
+
+ /* fail compilation if somebody adds or removes
+ * a flag without updating the name array above
+ */
+ BUILD_BUG_ON(hw_flag_names[NUM_IEEE80211_HW_FLAGS] != (void *)0x1);
+
+ for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) {
+ if (test_bit(i, local->hw.flags))
+ pos += scnprintf(pos, end - pos, "%s",
+ hw_flag_names[i]);
+ }
rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
kfree(buf);
@@ -219,8 +222,8 @@ static const struct file_operations stats_ ##name## _ops = { \
.llseek = generic_file_llseek, \
};
-#define DEBUGFS_STATS_ADD(name, field) \
- debugfs_create_u32(#name, 0400, statsd, (u32 *) &field);
+#define DEBUGFS_STATS_ADD(name) \
+ debugfs_create_u32(#name, 0400, statsd, &local->name);
#define DEBUGFS_DEVSTATS_ADD(name) \
debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops);
@@ -255,53 +258,31 @@ void debugfs_hw_add(struct ieee80211_local *local)
if (!statsd)
return;
- DEBUGFS_STATS_ADD(transmitted_fragment_count,
- local->dot11TransmittedFragmentCount);
- DEBUGFS_STATS_ADD(multicast_transmitted_frame_count,
- local->dot11MulticastTransmittedFrameCount);
- DEBUGFS_STATS_ADD(failed_count, local->dot11FailedCount);
- DEBUGFS_STATS_ADD(retry_count, local->dot11RetryCount);
- DEBUGFS_STATS_ADD(multiple_retry_count,
- local->dot11MultipleRetryCount);
- DEBUGFS_STATS_ADD(frame_duplicate_count,
- local->dot11FrameDuplicateCount);
- DEBUGFS_STATS_ADD(received_fragment_count,
- local->dot11ReceivedFragmentCount);
- DEBUGFS_STATS_ADD(multicast_received_frame_count,
- local->dot11MulticastReceivedFrameCount);
- DEBUGFS_STATS_ADD(transmitted_frame_count,
- local->dot11TransmittedFrameCount);
#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
- DEBUGFS_STATS_ADD(tx_handlers_drop, local->tx_handlers_drop);
- DEBUGFS_STATS_ADD(tx_handlers_queued, local->tx_handlers_queued);
- DEBUGFS_STATS_ADD(tx_handlers_drop_fragment,
- local->tx_handlers_drop_fragment);
- DEBUGFS_STATS_ADD(tx_handlers_drop_wep,
- local->tx_handlers_drop_wep);
- DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc,
- local->tx_handlers_drop_not_assoc);
- DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port,
- local->tx_handlers_drop_unauth_port);
- DEBUGFS_STATS_ADD(rx_handlers_drop, local->rx_handlers_drop);
- DEBUGFS_STATS_ADD(rx_handlers_queued, local->rx_handlers_queued);
- DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc,
- local->rx_handlers_drop_nullfunc);
- DEBUGFS_STATS_ADD(rx_handlers_drop_defrag,
- local->rx_handlers_drop_defrag);
- DEBUGFS_STATS_ADD(rx_handlers_drop_short,
- local->rx_handlers_drop_short);
- DEBUGFS_STATS_ADD(tx_expand_skb_head,
- local->tx_expand_skb_head);
- DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned,
- local->tx_expand_skb_head_cloned);
- DEBUGFS_STATS_ADD(rx_expand_skb_head,
- local->rx_expand_skb_head);
- DEBUGFS_STATS_ADD(rx_expand_skb_head2,
- local->rx_expand_skb_head2);
- DEBUGFS_STATS_ADD(rx_handlers_fragments,
- local->rx_handlers_fragments);
- DEBUGFS_STATS_ADD(tx_status_drop,
- local->tx_status_drop);
+ DEBUGFS_STATS_ADD(dot11TransmittedFragmentCount);
+ DEBUGFS_STATS_ADD(dot11MulticastTransmittedFrameCount);
+ DEBUGFS_STATS_ADD(dot11FailedCount);
+ DEBUGFS_STATS_ADD(dot11RetryCount);
+ DEBUGFS_STATS_ADD(dot11MultipleRetryCount);
+ DEBUGFS_STATS_ADD(dot11FrameDuplicateCount);
+ DEBUGFS_STATS_ADD(dot11ReceivedFragmentCount);
+ DEBUGFS_STATS_ADD(dot11MulticastReceivedFrameCount);
+ DEBUGFS_STATS_ADD(dot11TransmittedFrameCount);
+ DEBUGFS_STATS_ADD(tx_handlers_drop);
+ DEBUGFS_STATS_ADD(tx_handlers_queued);
+ DEBUGFS_STATS_ADD(tx_handlers_drop_wep);
+ DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc);
+ DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port);
+ DEBUGFS_STATS_ADD(rx_handlers_drop);
+ DEBUGFS_STATS_ADD(rx_handlers_queued);
+ DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc);
+ DEBUGFS_STATS_ADD(rx_handlers_drop_defrag);
+ DEBUGFS_STATS_ADD(rx_handlers_drop_short);
+ DEBUGFS_STATS_ADD(tx_expand_skb_head);
+ DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned);
+ DEBUGFS_STATS_ADD(rx_expand_skb_head_defrag);
+ DEBUGFS_STATS_ADD(rx_handlers_fragments);
+ DEBUGFS_STATS_ADD(tx_status_drop);
#endif
DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount);
DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount);
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 71ac1b5f4..e82bf1e9d 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -95,28 +95,13 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
break;
case WLAN_CIPHER_SUITE_CCMP:
case WLAN_CIPHER_SUITE_CCMP_256:
- pn = atomic64_read(&key->u.ccmp.tx_pn);
- len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
- (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
- (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
- break;
case WLAN_CIPHER_SUITE_AES_CMAC:
case WLAN_CIPHER_SUITE_BIP_CMAC_256:
- pn = atomic64_read(&key->u.aes_cmac.tx_pn);
- len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
- (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
- (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
- break;
case WLAN_CIPHER_SUITE_BIP_GMAC_128:
case WLAN_CIPHER_SUITE_BIP_GMAC_256:
- pn = atomic64_read(&key->u.aes_gmac.tx_pn);
- len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
- (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
- (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
- break;
case WLAN_CIPHER_SUITE_GCMP:
case WLAN_CIPHER_SUITE_GCMP_256:
- pn = atomic64_read(&key->u.gcmp.tx_pn);
+ pn = atomic64_read(&key->conf.tx_pn);
len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 252859e90..06d529350 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -29,8 +29,6 @@ static ssize_t sta_ ##name## _read(struct file *file, \
format_string, sta->field); \
}
#define STA_READ_D(name, field) STA_READ(name, field, "%d\n")
-#define STA_READ_U(name, field) STA_READ(name, field, "%u\n")
-#define STA_READ_S(name, field) STA_READ(name, field, "%s\n")
#define STA_OPS(name) \
static const struct file_operations sta_ ##name## _ops = { \
@@ -52,10 +50,7 @@ static const struct file_operations sta_ ##name## _ops = { \
STA_OPS(name)
STA_FILE(aid, sta.aid, D);
-STA_FILE(dev, sdata->name, S);
-STA_FILE(last_signal, last_signal, D);
STA_FILE(last_ack_signal, last_ack_signal, D);
-STA_FILE(beacon_loss_count, beacon_loss_count, D);
static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
@@ -101,40 +96,6 @@ static ssize_t sta_num_ps_buf_frames_read(struct file *file,
}
STA_OPS(num_ps_buf_frames);
-static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- struct sta_info *sta = file->private_data;
- return mac80211_format_buffer(userbuf, count, ppos, "%d\n",
- jiffies_to_msecs(jiffies - sta->last_rx));
-}
-STA_OPS(inactive_ms);
-
-
-static ssize_t sta_connected_time_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- struct sta_info *sta = file->private_data;
- struct timespec uptime;
- struct tm result;
- long connected_time_secs;
- char buf[100];
- int res;
- ktime_get_ts(&uptime);
- connected_time_secs = uptime.tv_sec - sta->last_connected;
- time_to_tm(connected_time_secs, 0, &result);
- result.tm_year -= 70;
- result.tm_mday -= 1;
- res = scnprintf(buf, sizeof(buf),
- "years - %ld\nmonths - %d\ndays - %d\nclock - %d:%d:%d\n\n",
- result.tm_year, result.tm_mon, result.tm_mday,
- result.tm_hour, result.tm_min, result.tm_sec);
- return simple_read_from_buffer(userbuf, count, ppos, buf, res);
-}
-STA_OPS(connected_time);
-
-
-
static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
@@ -359,37 +320,6 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
}
STA_OPS(vht_capa);
-static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- struct sta_info *sta = file->private_data;
- struct rate_info rinfo;
- u16 rate;
- sta_set_rate_info_tx(sta, &sta->last_tx_rate, &rinfo);
- rate = cfg80211_calculate_bitrate(&rinfo);
-
- return mac80211_format_buffer(userbuf, count, ppos,
- "%d.%d MBit/s\n",
- rate/10, rate%10);
-}
-STA_OPS(current_tx_rate);
-
-static ssize_t sta_last_rx_rate_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- struct sta_info *sta = file->private_data;
- struct rate_info rinfo;
- u16 rate;
-
- sta_set_rate_info_rx(sta, &rinfo);
-
- rate = cfg80211_calculate_bitrate(&rinfo);
-
- return mac80211_format_buffer(userbuf, count, ppos,
- "%d.%d MBit/s\n",
- rate/10, rate%10);
-}
-STA_OPS(last_rx_rate);
#define DEBUGFS_ADD(name) \
debugfs_create_file(#name, 0400, \
@@ -432,30 +362,15 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
DEBUGFS_ADD(flags);
DEBUGFS_ADD(num_ps_buf_frames);
- DEBUGFS_ADD(inactive_ms);
- DEBUGFS_ADD(connected_time);
DEBUGFS_ADD(last_seq_ctrl);
DEBUGFS_ADD(agg_status);
- DEBUGFS_ADD(dev);
- DEBUGFS_ADD(last_signal);
- DEBUGFS_ADD(beacon_loss_count);
DEBUGFS_ADD(ht_capa);
DEBUGFS_ADD(vht_capa);
DEBUGFS_ADD(last_ack_signal);
- DEBUGFS_ADD(current_tx_rate);
- DEBUGFS_ADD(last_rx_rate);
- DEBUGFS_ADD_COUNTER(rx_packets, rx_packets);
- DEBUGFS_ADD_COUNTER(tx_packets, tx_packets);
- DEBUGFS_ADD_COUNTER(rx_bytes, rx_bytes);
- DEBUGFS_ADD_COUNTER(tx_bytes, tx_bytes);
DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates);
DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments);
- DEBUGFS_ADD_COUNTER(rx_dropped, rx_dropped);
- DEBUGFS_ADD_COUNTER(tx_fragments, tx_fragments);
DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count);
- DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed);
- DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count);
if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
debugfs_create_x32("driver_buffered_tids", 0400,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 26e1ca8a4..32a2e707e 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -146,7 +146,7 @@ static inline int drv_add_interface(struct ieee80211_local *local,
if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
(sdata->vif.type == NL80211_IFTYPE_MONITOR &&
- !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF) &&
+ !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))))
return -EINVAL;
@@ -417,12 +417,13 @@ static inline int drv_get_stats(struct ieee80211_local *local,
return ret;
}
-static inline void drv_get_tkip_seq(struct ieee80211_local *local,
- u8 hw_key_idx, u32 *iv32, u16 *iv16)
+static inline void drv_get_key_seq(struct ieee80211_local *local,
+ struct ieee80211_key *key,
+ struct ieee80211_key_seq *seq)
{
- if (local->ops->get_tkip_seq)
- local->ops->get_tkip_seq(&local->hw, hw_key_idx, iv32, iv16);
- trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16);
+ if (local->ops->get_key_seq)
+ local->ops->get_key_seq(&local->hw, &key->conf, seq);
+ trace_drv_get_key_seq(local, &key->conf);
}
static inline int drv_set_frag_threshold(struct ieee80211_local *local,
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index 52bcea6ad..188faab11 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -38,7 +38,7 @@ static void ieee80211_get_ringparam(struct net_device *dev,
static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
"rx_packets", "rx_bytes",
"rx_duplicates", "rx_fragments", "rx_dropped",
- "tx_packets", "tx_bytes", "tx_fragments",
+ "tx_packets", "tx_bytes",
"tx_filtered", "tx_retry_failed", "tx_retries",
"beacon_loss", "sta_state", "txrate", "rxrate", "signal",
"channel", "noise", "ch_time", "ch_time_busy",
@@ -87,7 +87,6 @@ static void ieee80211_get_stats(struct net_device *dev,
\
data[i++] += sinfo.tx_packets; \
data[i++] += sinfo.tx_bytes; \
- data[i++] += sta->tx_fragments; \
data[i++] += sta->tx_filtered_count; \
data[i++] += sta->tx_retry_failed; \
data[i++] += sta->tx_retry_count; \
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a9c9d961f..7f72bc9ba 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1032,8 +1032,11 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
}
}
- if (sta && elems->wmm_info && local->hw.queues >= IEEE80211_NUM_ACS)
+ if (sta && !sta->sta.wme &&
+ elems->wmm_info && local->hw.queues >= IEEE80211_NUM_ACS) {
sta->sta.wme = true;
+ ieee80211_check_fast_xmit(sta);
+ }
if (sta && elems->ht_operation && elems->ht_cap_elem &&
sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT &&
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c0a9187bc..b12f61507 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -181,8 +181,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
/**
* enum ieee80211_packet_rx_flags - packet RX flags
- * @IEEE80211_RX_RA_MATCH: frame is destined to interface currently processed
- * (incl. multicast frames)
* @IEEE80211_RX_FRAGMENTED: fragmented frame
* @IEEE80211_RX_AMSDU: a-MSDU packet
* @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed
@@ -192,7 +190,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
* @rx_flags field of &struct ieee80211_rx_status.
*/
enum ieee80211_packet_rx_flags {
- IEEE80211_RX_RA_MATCH = BIT(1),
IEEE80211_RX_FRAGMENTED = BIT(2),
IEEE80211_RX_AMSDU = BIT(3),
IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4),
@@ -722,7 +719,6 @@ struct ieee80211_if_mesh {
* enum ieee80211_sub_if_data_flags - virtual interface flags
*
* @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets
- * @IEEE80211_SDATA_PROMISC: interface is promisc
* @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode
* @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between
* associated stations and deliver multicast frames both
@@ -732,7 +728,6 @@ struct ieee80211_if_mesh {
*/
enum ieee80211_sub_if_data_flags {
IEEE80211_SDATA_ALLMULTI = BIT(0),
- IEEE80211_SDATA_PROMISC = BIT(1),
IEEE80211_SDATA_OPERATING_GMODE = BIT(2),
IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4),
@@ -1040,7 +1035,6 @@ enum queue_stop_reason {
#ifdef CONFIG_MAC80211_LEDS
struct tpt_led_trigger {
- struct led_trigger trig;
char name[32];
const struct ieee80211_tpt_blink *blink_table;
unsigned int blink_table_len;
@@ -1208,8 +1202,8 @@ struct ieee80211_local {
atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES];
- /* number of interfaces with corresponding IFF_ flags */
- atomic_t iff_allmultis, iff_promiscs;
+ /* number of interfaces with allmulti RX */
+ atomic_t iff_allmultis;
struct rate_control_ref *rate_ctrl;
@@ -1261,6 +1255,15 @@ struct ieee80211_local {
struct list_head chanctx_list;
struct mutex chanctx_mtx;
+#ifdef CONFIG_MAC80211_LEDS
+ struct led_trigger tx_led, rx_led, assoc_led, radio_led;
+ struct led_trigger tpt_led;
+ atomic_t tx_led_active, rx_led_active, assoc_led_active;
+ atomic_t radio_led_active, tpt_led_active;
+ struct tpt_led_trigger *tpt_led_trigger;
+#endif
+
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
/* SNMP counters */
/* dot11CountersTable */
u32 dot11TransmittedFragmentCount;
@@ -1273,18 +1276,9 @@ struct ieee80211_local {
u32 dot11MulticastReceivedFrameCount;
u32 dot11TransmittedFrameCount;
-#ifdef CONFIG_MAC80211_LEDS
- struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led;
- struct tpt_led_trigger *tpt_led_trigger;
- char tx_led_name[32], rx_led_name[32],
- assoc_led_name[32], radio_led_name[32];
-#endif
-
-#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
/* TX/RX handler statistics */
unsigned int tx_handlers_drop;
unsigned int tx_handlers_queued;
- unsigned int tx_handlers_drop_fragment;
unsigned int tx_handlers_drop_wep;
unsigned int tx_handlers_drop_not_assoc;
unsigned int tx_handlers_drop_unauth_port;
@@ -1295,8 +1289,7 @@ struct ieee80211_local {
unsigned int rx_handlers_drop_short;
unsigned int tx_expand_skb_head;
unsigned int tx_expand_skb_head_cloned;
- unsigned int rx_expand_skb_head;
- unsigned int rx_expand_skb_head2;
+ unsigned int rx_expand_skb_head_defrag;
unsigned int rx_handlers_fragments;
unsigned int tx_status_drop;
#define I802_DEBUG_INC(c) (c)++
@@ -1648,6 +1641,11 @@ struct sk_buff *
ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 info_flags);
+void ieee80211_check_fast_xmit(struct sta_info *sta);
+void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
+void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata);
+void ieee80211_clear_fast_xmit(struct sta_info *sta);
+
/* HT */
void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_ht_cap *ht_cap);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 84cef600c..553ac6dd4 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -338,7 +338,7 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata,
if ((iftype != NL80211_IFTYPE_AP &&
iftype != NL80211_IFTYPE_P2P_GO &&
iftype != NL80211_IFTYPE_MESH_POINT) ||
- !(sdata->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) {
+ !ieee80211_hw_check(&sdata->local->hw, QUEUE_CONTROL)) {
sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
return 0;
}
@@ -378,7 +378,7 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata)
int i;
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
- if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
+ if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
sdata->vif.hw_queue[i] = IEEE80211_INVAL_HW_QUEUE;
else if (local->hw.queues >= IEEE80211_NUM_ACS)
sdata->vif.hw_queue[i] = i;
@@ -393,7 +393,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
struct ieee80211_sub_if_data *sdata;
int ret;
- if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF))
+ if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
return 0;
ASSERT_RTNL();
@@ -454,7 +454,7 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
- if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF))
+ if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
return;
ASSERT_RTNL();
@@ -703,9 +703,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
atomic_inc(&local->iff_allmultis);
- if (sdata->flags & IEEE80211_SDATA_PROMISC)
- atomic_inc(&local->iff_promiscs);
-
if (coming_up)
local->open_count++;
@@ -835,13 +832,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) ||
(sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)));
- /* don't count this interface for promisc/allmulti while it is down */
+ /* don't count this interface for allmulti while it is down */
if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
atomic_dec(&local->iff_allmultis);
- if (sdata->flags & IEEE80211_SDATA_PROMISC)
- atomic_dec(&local->iff_promiscs);
-
if (sdata->vif.type == NL80211_IFTYPE_AP) {
local->fif_pspoll--;
local->fif_probe_req--;
@@ -1055,12 +1049,10 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
- int allmulti, promisc, sdata_allmulti, sdata_promisc;
+ int allmulti, sdata_allmulti;
allmulti = !!(dev->flags & IFF_ALLMULTI);
- promisc = !!(dev->flags & IFF_PROMISC);
sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI);
- sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC);
if (allmulti != sdata_allmulti) {
if (dev->flags & IFF_ALLMULTI)
@@ -1070,13 +1062,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
sdata->flags ^= IEEE80211_SDATA_ALLMULTI;
}
- if (promisc != sdata_promisc) {
- if (dev->flags & IFF_PROMISC)
- atomic_inc(&local->iff_promiscs);
- else
- atomic_dec(&local->iff_promiscs);
- sdata->flags ^= IEEE80211_SDATA_PROMISC;
- }
spin_lock_bh(&local->filter_lock);
__hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
spin_unlock_bh(&local->filter_lock);
@@ -1117,6 +1102,35 @@ static u16 ieee80211_netdev_select_queue(struct net_device *dev,
return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
}
+static struct rtnl_link_stats64 *
+ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_sw_netstats *tstats;
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ tstats = per_cpu_ptr(dev->tstats, i);
+
+ do {
+ start = u64_stats_fetch_begin_irq(&tstats->syncp);
+ rx_packets = tstats->rx_packets;
+ tx_packets = tstats->tx_packets;
+ rx_bytes = tstats->rx_bytes;
+ tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
+
+ stats->rx_packets += rx_packets;
+ stats->tx_packets += tx_packets;
+ stats->rx_bytes += rx_bytes;
+ stats->tx_bytes += tx_bytes;
+ }
+
+ return stats;
+}
+
static const struct net_device_ops ieee80211_dataif_ops = {
.ndo_open = ieee80211_open,
.ndo_stop = ieee80211_stop,
@@ -1126,6 +1140,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
.ndo_change_mtu = ieee80211_change_mtu,
.ndo_set_mac_address = ieee80211_change_mac,
.ndo_select_queue = ieee80211_netdev_select_queue,
+ .ndo_get_stats64 = ieee80211_get_stats64,
};
static u16 ieee80211_monitor_select_queue(struct net_device *dev,
@@ -1159,14 +1174,21 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
.ndo_change_mtu = ieee80211_change_mtu,
.ndo_set_mac_address = ieee80211_change_mac,
.ndo_select_queue = ieee80211_monitor_select_queue,
+ .ndo_get_stats64 = ieee80211_get_stats64,
};
+static void ieee80211_if_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
static void ieee80211_if_setup(struct net_device *dev)
{
ether_setup(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->netdev_ops = &ieee80211_dataif_ops;
- dev->destructor = free_netdev;
+ dev->destructor = ieee80211_if_free;
}
static void ieee80211_iface_work(struct work_struct *work)
@@ -1564,7 +1586,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
break;
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_P2P_GO:
- if (local->hw.flags & IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF) {
+ if (ieee80211_hw_check(&local->hw, P2P_DEV_ADDR_FOR_INTF)) {
list_for_each_entry(sdata, &local->interfaces, list) {
if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE)
continue;
@@ -1707,6 +1729,12 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
return -ENOMEM;
dev_net_set(ndev, wiphy_net(local->hw.wiphy));
+ ndev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!ndev->tstats) {
+ free_netdev(ndev);
+ return -ENOMEM;
+ }
+
ndev->needed_headroom = local->tx_headroom +
4*6 /* four MAC addresses */
+ 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */
@@ -1835,10 +1863,6 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata)
ieee80211_teardown_sdata(sdata);
}
-/*
- * Remove all interfaces, may only be called at hardware unregistration
- * time because it doesn't do RCU-safe list removals.
- */
void ieee80211_remove_interfaces(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata, *tmp;
@@ -1847,14 +1871,21 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
ASSERT_RTNL();
- /*
- * Close all AP_VLAN interfaces first, as otherwise they
- * might be closed while the AP interface they belong to
- * is closed, causing unregister_netdevice_many() to crash.
+ /* Before destroying the interfaces, make sure they're all stopped so
+ * that the hardware is stopped. Otherwise, the driver might still be
+ * iterating the interfaces during the shutdown, e.g. from a worker
+ * or from RX processing or similar, and if it does so (using atomic
+ * iteration) while we're manipulating the list, the iteration will
+ * crash.
+ *
+ * After this, the hardware should be stopped and the driver should
+ * have stopped all of its activities, so that we can do RCU-unaware
+ * manipulations of the interface list below.
*/
- list_for_each_entry(sdata, &local->interfaces, list)
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
- dev_close(sdata->dev);
+ cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+
+ WARN(local->open_count, "%s: open count remains %d\n",
+ wiphy_name(local->hw.wiphy), local->open_count);
mutex_lock(&local->iflist_mtx);
list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 81e9785f3..b22df3a79 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -154,7 +154,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
* is supported; if not, return.
*/
if (sta && !(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE) &&
- !(key->local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK))
+ !ieee80211_hw_check(&key->local->hw, SUPPORTS_PER_STA_GTK))
goto out_unsupported;
if (sta && !sta->uploaded)
@@ -208,7 +208,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
/* all of these we can do in software - if driver can */
if (ret == 1)
return 0;
- if (key->local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL)
+ if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL))
return -EINVAL;
return 0;
default:
@@ -263,6 +263,7 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
if (uni) {
rcu_assign_pointer(sdata->default_unicast_key, key);
+ ieee80211_check_fast_xmit_iface(sdata);
drv_set_default_unicast_key(sdata->local, sdata, idx);
}
@@ -332,6 +333,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
if (pairwise) {
rcu_assign_pointer(sta->ptk[idx], new);
sta->ptk_idx = idx;
+ ieee80211_check_fast_xmit(sta);
} else {
rcu_assign_pointer(sta->gtk[idx], new);
sta->gtk_idx = idx;
@@ -517,15 +519,17 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
break;
default:
if (cs) {
- size_t len = (seq_len > MAX_PN_LEN) ?
- MAX_PN_LEN : seq_len;
+ if (seq_len && seq_len != cs->pn_len) {
+ kfree(key);
+ return ERR_PTR(-EINVAL);
+ }
key->conf.iv_len = cs->hdr_len;
key->conf.icv_len = cs->mic_len;
for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++)
- for (j = 0; j < len; j++)
+ for (j = 0; j < seq_len; j++)
key->u.gen.rx_pn[i][j] =
- seq[len - j - 1];
+ seq[seq_len - j - 1];
key->flags |= KEY_FLAG_CIPHER_SCHEME;
}
}
@@ -899,27 +903,19 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
break;
case WLAN_CIPHER_SUITE_CCMP:
case WLAN_CIPHER_SUITE_CCMP_256:
- pn64 = atomic64_read(&key->u.ccmp.tx_pn);
- seq->ccmp.pn[5] = pn64;
- seq->ccmp.pn[4] = pn64 >> 8;
- seq->ccmp.pn[3] = pn64 >> 16;
- seq->ccmp.pn[2] = pn64 >> 24;
- seq->ccmp.pn[1] = pn64 >> 32;
- seq->ccmp.pn[0] = pn64 >> 40;
- break;
case WLAN_CIPHER_SUITE_AES_CMAC:
case WLAN_CIPHER_SUITE_BIP_CMAC_256:
- pn64 = atomic64_read(&key->u.aes_cmac.tx_pn);
- seq->ccmp.pn[5] = pn64;
- seq->ccmp.pn[4] = pn64 >> 8;
- seq->ccmp.pn[3] = pn64 >> 16;
- seq->ccmp.pn[2] = pn64 >> 24;
- seq->ccmp.pn[1] = pn64 >> 32;
- seq->ccmp.pn[0] = pn64 >> 40;
- break;
+ BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
+ offsetof(typeof(*seq), aes_cmac));
case WLAN_CIPHER_SUITE_BIP_GMAC_128:
case WLAN_CIPHER_SUITE_BIP_GMAC_256:
- pn64 = atomic64_read(&key->u.aes_gmac.tx_pn);
+ BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
+ offsetof(typeof(*seq), aes_gmac));
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
+ offsetof(typeof(*seq), gcmp));
+ pn64 = atomic64_read(&key->conf.tx_pn);
seq->ccmp.pn[5] = pn64;
seq->ccmp.pn[4] = pn64 >> 8;
seq->ccmp.pn[3] = pn64 >> 16;
@@ -927,16 +923,6 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
seq->ccmp.pn[1] = pn64 >> 32;
seq->ccmp.pn[0] = pn64 >> 40;
break;
- case WLAN_CIPHER_SUITE_GCMP:
- case WLAN_CIPHER_SUITE_GCMP_256:
- pn64 = atomic64_read(&key->u.gcmp.tx_pn);
- seq->gcmp.pn[5] = pn64;
- seq->gcmp.pn[4] = pn64 >> 8;
- seq->gcmp.pn[3] = pn64 >> 16;
- seq->gcmp.pn[2] = pn64 >> 24;
- seq->gcmp.pn[1] = pn64 >> 32;
- seq->gcmp.pn[0] = pn64 >> 40;
- break;
default:
WARN_ON(1);
}
@@ -1011,43 +997,25 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
break;
case WLAN_CIPHER_SUITE_CCMP:
case WLAN_CIPHER_SUITE_CCMP_256:
- pn64 = (u64)seq->ccmp.pn[5] |
- ((u64)seq->ccmp.pn[4] << 8) |
- ((u64)seq->ccmp.pn[3] << 16) |
- ((u64)seq->ccmp.pn[2] << 24) |
- ((u64)seq->ccmp.pn[1] << 32) |
- ((u64)seq->ccmp.pn[0] << 40);
- atomic64_set(&key->u.ccmp.tx_pn, pn64);
- break;
case WLAN_CIPHER_SUITE_AES_CMAC:
case WLAN_CIPHER_SUITE_BIP_CMAC_256:
- pn64 = (u64)seq->aes_cmac.pn[5] |
- ((u64)seq->aes_cmac.pn[4] << 8) |
- ((u64)seq->aes_cmac.pn[3] << 16) |
- ((u64)seq->aes_cmac.pn[2] << 24) |
- ((u64)seq->aes_cmac.pn[1] << 32) |
- ((u64)seq->aes_cmac.pn[0] << 40);
- atomic64_set(&key->u.aes_cmac.tx_pn, pn64);
- break;
+ BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
+ offsetof(typeof(*seq), aes_cmac));
case WLAN_CIPHER_SUITE_BIP_GMAC_128:
case WLAN_CIPHER_SUITE_BIP_GMAC_256:
- pn64 = (u64)seq->aes_gmac.pn[5] |
- ((u64)seq->aes_gmac.pn[4] << 8) |
- ((u64)seq->aes_gmac.pn[3] << 16) |
- ((u64)seq->aes_gmac.pn[2] << 24) |
- ((u64)seq->aes_gmac.pn[1] << 32) |
- ((u64)seq->aes_gmac.pn[0] << 40);
- atomic64_set(&key->u.aes_gmac.tx_pn, pn64);
- break;
+ BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
+ offsetof(typeof(*seq), aes_gmac));
case WLAN_CIPHER_SUITE_GCMP:
case WLAN_CIPHER_SUITE_GCMP_256:
- pn64 = (u64)seq->gcmp.pn[5] |
- ((u64)seq->gcmp.pn[4] << 8) |
- ((u64)seq->gcmp.pn[3] << 16) |
- ((u64)seq->gcmp.pn[2] << 24) |
- ((u64)seq->gcmp.pn[1] << 32) |
- ((u64)seq->gcmp.pn[0] << 40);
- atomic64_set(&key->u.gcmp.tx_pn, pn64);
+ BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
+ offsetof(typeof(*seq), gcmp));
+ pn64 = (u64)seq->ccmp.pn[5] |
+ ((u64)seq->ccmp.pn[4] << 8) |
+ ((u64)seq->ccmp.pn[3] << 16) |
+ ((u64)seq->ccmp.pn[2] << 24) |
+ ((u64)seq->ccmp.pn[1] << 32) |
+ ((u64)seq->ccmp.pn[0] << 40);
+ atomic64_set(&key->conf.tx_pn, pn64);
break;
default:
WARN_ON(1);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 96557dd1e..3f4f9eaac 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -18,7 +18,6 @@
#define NUM_DEFAULT_KEYS 4
#define NUM_DEFAULT_MGMT_KEYS 2
-#define MAX_PN_LEN 16
struct ieee80211_local;
struct ieee80211_sub_if_data;
@@ -78,7 +77,6 @@ struct ieee80211_key {
u32 mic_failures;
} tkip;
struct {
- atomic64_t tx_pn;
/*
* Last received packet number. The first
* IEEE80211_NUM_TIDS counters are used with Data
@@ -90,21 +88,18 @@ struct ieee80211_key {
u32 replays; /* dot11RSNAStatsCCMPReplays */
} ccmp;
struct {
- atomic64_t tx_pn;
u8 rx_pn[IEEE80211_CMAC_PN_LEN];
struct crypto_cipher *tfm;
u32 replays; /* dot11RSNAStatsCMACReplays */
u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
} aes_cmac;
struct {
- atomic64_t tx_pn;
u8 rx_pn[IEEE80211_GMAC_PN_LEN];
struct crypto_aead *tfm;
u32 replays; /* dot11RSNAStatsCMACReplays */
u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
} aes_gmac;
struct {
- atomic64_t tx_pn;
/* Last received packet number. The first
* IEEE80211_NUM_TIDS counters are used with Data
* frames and the last counter is used with Robust
@@ -116,7 +111,7 @@ struct ieee80211_key {
} gcmp;
struct {
/* generic cipher scheme */
- u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN];
+ u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_MAX_PN_LEN];
} gen;
} u;
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index e2b836446..0505845b7 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -12,96 +12,175 @@
#include <linux/export.h>
#include "led.h"
-#define MAC80211_BLINK_DELAY 50 /* ms */
-
-void ieee80211_led_rx(struct ieee80211_local *local)
-{
- unsigned long led_delay = MAC80211_BLINK_DELAY;
- if (unlikely(!local->rx_led))
- return;
- led_trigger_blink_oneshot(local->rx_led, &led_delay, &led_delay, 0);
-}
-
-void ieee80211_led_tx(struct ieee80211_local *local)
-{
- unsigned long led_delay = MAC80211_BLINK_DELAY;
- if (unlikely(!local->tx_led))
- return;
- led_trigger_blink_oneshot(local->tx_led, &led_delay, &led_delay, 0);
-}
-
void ieee80211_led_assoc(struct ieee80211_local *local, bool associated)
{
- if (unlikely(!local->assoc_led))
+ if (!atomic_read(&local->assoc_led_active))
return;
if (associated)
- led_trigger_event(local->assoc_led, LED_FULL);
+ led_trigger_event(&local->assoc_led, LED_FULL);
else
- led_trigger_event(local->assoc_led, LED_OFF);
+ led_trigger_event(&local->assoc_led, LED_OFF);
}
void ieee80211_led_radio(struct ieee80211_local *local, bool enabled)
{
- if (unlikely(!local->radio_led))
+ if (!atomic_read(&local->radio_led_active))
return;
if (enabled)
- led_trigger_event(local->radio_led, LED_FULL);
+ led_trigger_event(&local->radio_led, LED_FULL);
else
- led_trigger_event(local->radio_led, LED_OFF);
+ led_trigger_event(&local->radio_led, LED_OFF);
+}
+
+void ieee80211_alloc_led_names(struct ieee80211_local *local)
+{
+ local->rx_led.name = kasprintf(GFP_KERNEL, "%srx",
+ wiphy_name(local->hw.wiphy));
+ local->tx_led.name = kasprintf(GFP_KERNEL, "%stx",
+ wiphy_name(local->hw.wiphy));
+ local->assoc_led.name = kasprintf(GFP_KERNEL, "%sassoc",
+ wiphy_name(local->hw.wiphy));
+ local->radio_led.name = kasprintf(GFP_KERNEL, "%sradio",
+ wiphy_name(local->hw.wiphy));
+}
+
+void ieee80211_free_led_names(struct ieee80211_local *local)
+{
+ kfree(local->rx_led.name);
+ kfree(local->tx_led.name);
+ kfree(local->assoc_led.name);
+ kfree(local->radio_led.name);
+}
+
+static void ieee80211_tx_led_activate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ tx_led);
+
+ atomic_inc(&local->tx_led_active);
+}
+
+static void ieee80211_tx_led_deactivate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ tx_led);
+
+ atomic_dec(&local->tx_led_active);
+}
+
+static void ieee80211_rx_led_activate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ rx_led);
+
+ atomic_inc(&local->rx_led_active);
+}
+
+static void ieee80211_rx_led_deactivate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ rx_led);
+
+ atomic_dec(&local->rx_led_active);
+}
+
+static void ieee80211_assoc_led_activate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ assoc_led);
+
+ atomic_inc(&local->assoc_led_active);
+}
+
+static void ieee80211_assoc_led_deactivate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ assoc_led);
+
+ atomic_dec(&local->assoc_led_active);
+}
+
+static void ieee80211_radio_led_activate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ radio_led);
+
+ atomic_inc(&local->radio_led_active);
+}
+
+static void ieee80211_radio_led_deactivate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ radio_led);
+
+ atomic_dec(&local->radio_led_active);
+}
+
+static void ieee80211_tpt_led_activate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ tpt_led);
+
+ atomic_inc(&local->tpt_led_active);
}
-void ieee80211_led_names(struct ieee80211_local *local)
+static void ieee80211_tpt_led_deactivate(struct led_classdev *led_cdev)
{
- snprintf(local->rx_led_name, sizeof(local->rx_led_name),
- "%srx", wiphy_name(local->hw.wiphy));
- snprintf(local->tx_led_name, sizeof(local->tx_led_name),
- "%stx", wiphy_name(local->hw.wiphy));
- snprintf(local->assoc_led_name, sizeof(local->assoc_led_name),
- "%sassoc", wiphy_name(local->hw.wiphy));
- snprintf(local->radio_led_name, sizeof(local->radio_led_name),
- "%sradio", wiphy_name(local->hw.wiphy));
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ tpt_led);
+
+ atomic_dec(&local->tpt_led_active);
}
void ieee80211_led_init(struct ieee80211_local *local)
{
- local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
- if (local->rx_led) {
- local->rx_led->name = local->rx_led_name;
- if (led_trigger_register(local->rx_led)) {
- kfree(local->rx_led);
- local->rx_led = NULL;
- }
+ atomic_set(&local->rx_led_active, 0);
+ local->rx_led.activate = ieee80211_rx_led_activate;
+ local->rx_led.deactivate = ieee80211_rx_led_deactivate;
+ if (local->rx_led.name && led_trigger_register(&local->rx_led)) {
+ kfree(local->rx_led.name);
+ local->rx_led.name = NULL;
}
- local->tx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
- if (local->tx_led) {
- local->tx_led->name = local->tx_led_name;
- if (led_trigger_register(local->tx_led)) {
- kfree(local->tx_led);
- local->tx_led = NULL;
- }
+ atomic_set(&local->tx_led_active, 0);
+ local->tx_led.activate = ieee80211_tx_led_activate;
+ local->tx_led.deactivate = ieee80211_tx_led_deactivate;
+ if (local->tx_led.name && led_trigger_register(&local->tx_led)) {
+ kfree(local->tx_led.name);
+ local->tx_led.name = NULL;
}
- local->assoc_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
- if (local->assoc_led) {
- local->assoc_led->name = local->assoc_led_name;
- if (led_trigger_register(local->assoc_led)) {
- kfree(local->assoc_led);
- local->assoc_led = NULL;
- }
+ atomic_set(&local->assoc_led_active, 0);
+ local->assoc_led.activate = ieee80211_assoc_led_activate;
+ local->assoc_led.deactivate = ieee80211_assoc_led_deactivate;
+ if (local->assoc_led.name && led_trigger_register(&local->assoc_led)) {
+ kfree(local->assoc_led.name);
+ local->assoc_led.name = NULL;
}
- local->radio_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
- if (local->radio_led) {
- local->radio_led->name = local->radio_led_name;
- if (led_trigger_register(local->radio_led)) {
- kfree(local->radio_led);
- local->radio_led = NULL;
- }
+ atomic_set(&local->radio_led_active, 0);
+ local->radio_led.activate = ieee80211_radio_led_activate;
+ local->radio_led.deactivate = ieee80211_radio_led_deactivate;
+ if (local->radio_led.name && led_trigger_register(&local->radio_led)) {
+ kfree(local->radio_led.name);
+ local->radio_led.name = NULL;
}
+ atomic_set(&local->tpt_led_active, 0);
if (local->tpt_led_trigger) {
- if (led_trigger_register(&local->tpt_led_trigger->trig)) {
+ local->tpt_led.activate = ieee80211_tpt_led_activate;
+ local->tpt_led.deactivate = ieee80211_tpt_led_deactivate;
+ if (led_trigger_register(&local->tpt_led)) {
kfree(local->tpt_led_trigger);
local->tpt_led_trigger = NULL;
}
@@ -110,58 +189,50 @@ void ieee80211_led_init(struct ieee80211_local *local)
void ieee80211_led_exit(struct ieee80211_local *local)
{
- if (local->radio_led) {
- led_trigger_unregister(local->radio_led);
- kfree(local->radio_led);
- }
- if (local->assoc_led) {
- led_trigger_unregister(local->assoc_led);
- kfree(local->assoc_led);
- }
- if (local->tx_led) {
- led_trigger_unregister(local->tx_led);
- kfree(local->tx_led);
- }
- if (local->rx_led) {
- led_trigger_unregister(local->rx_led);
- kfree(local->rx_led);
- }
+ if (local->radio_led.name)
+ led_trigger_unregister(&local->radio_led);
+ if (local->assoc_led.name)
+ led_trigger_unregister(&local->assoc_led);
+ if (local->tx_led.name)
+ led_trigger_unregister(&local->tx_led);
+ if (local->rx_led.name)
+ led_trigger_unregister(&local->rx_led);
if (local->tpt_led_trigger) {
- led_trigger_unregister(&local->tpt_led_trigger->trig);
+ led_trigger_unregister(&local->tpt_led);
kfree(local->tpt_led_trigger);
}
}
-char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
+const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
- return local->radio_led_name;
+ return local->radio_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_radio_led_name);
-char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw)
+const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
- return local->assoc_led_name;
+ return local->assoc_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_assoc_led_name);
-char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw)
+const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
- return local->tx_led_name;
+ return local->tx_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_tx_led_name);
-char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw)
+const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
- return local->rx_led_name;
+ return local->rx_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_rx_led_name);
@@ -205,16 +276,17 @@ static void tpt_trig_timer(unsigned long data)
}
}
- read_lock(&tpt_trig->trig.leddev_list_lock);
- list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list)
+ read_lock(&local->tpt_led.leddev_list_lock);
+ list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
led_blink_set(led_cdev, &on, &off);
- read_unlock(&tpt_trig->trig.leddev_list_lock);
+ read_unlock(&local->tpt_led.leddev_list_lock);
}
-char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
- unsigned int flags,
- const struct ieee80211_tpt_blink *blink_table,
- unsigned int blink_table_len)
+const char *
+__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
+ unsigned int flags,
+ const struct ieee80211_tpt_blink *blink_table,
+ unsigned int blink_table_len)
{
struct ieee80211_local *local = hw_to_local(hw);
struct tpt_led_trigger *tpt_trig;
@@ -229,7 +301,7 @@ char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
snprintf(tpt_trig->name, sizeof(tpt_trig->name),
"%stpt", wiphy_name(local->hw.wiphy));
- tpt_trig->trig.name = tpt_trig->name;
+ local->tpt_led.name = tpt_trig->name;
tpt_trig->blink_table = blink_table;
tpt_trig->blink_table_len = blink_table_len;
@@ -269,10 +341,10 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
tpt_trig->running = false;
del_timer_sync(&tpt_trig->timer);
- read_lock(&tpt_trig->trig.leddev_list_lock);
- list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list)
+ read_lock(&local->tpt_led.leddev_list_lock);
+ list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
led_set_brightness(led_cdev, LED_OFF);
- read_unlock(&tpt_trig->trig.leddev_list_lock);
+ read_unlock(&local->tpt_led.leddev_list_lock);
}
void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index 89f4344f1..a7893a1ac 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -11,25 +11,42 @@
#include <linux/leds.h>
#include "ieee80211_i.h"
+#define MAC80211_BLINK_DELAY 50 /* ms */
+
+static inline void ieee80211_led_rx(struct ieee80211_local *local)
+{
+#ifdef CONFIG_MAC80211_LEDS
+ unsigned long led_delay = MAC80211_BLINK_DELAY;
+
+ if (!atomic_read(&local->rx_led_active))
+ return;
+ led_trigger_blink_oneshot(&local->rx_led, &led_delay, &led_delay, 0);
+#endif
+}
+
+static inline void ieee80211_led_tx(struct ieee80211_local *local)
+{
+#ifdef CONFIG_MAC80211_LEDS
+ unsigned long led_delay = MAC80211_BLINK_DELAY;
+
+ if (!atomic_read(&local->tx_led_active))
+ return;
+ led_trigger_blink_oneshot(&local->tx_led, &led_delay, &led_delay, 0);
+#endif
+}
+
#ifdef CONFIG_MAC80211_LEDS
-void ieee80211_led_rx(struct ieee80211_local *local);
-void ieee80211_led_tx(struct ieee80211_local *local);
void ieee80211_led_assoc(struct ieee80211_local *local,
bool associated);
void ieee80211_led_radio(struct ieee80211_local *local,
bool enabled);
-void ieee80211_led_names(struct ieee80211_local *local);
+void ieee80211_alloc_led_names(struct ieee80211_local *local);
+void ieee80211_free_led_names(struct ieee80211_local *local);
void ieee80211_led_init(struct ieee80211_local *local);
void ieee80211_led_exit(struct ieee80211_local *local);
void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
unsigned int types_on, unsigned int types_off);
#else
-static inline void ieee80211_led_rx(struct ieee80211_local *local)
-{
-}
-static inline void ieee80211_led_tx(struct ieee80211_local *local)
-{
-}
static inline void ieee80211_led_assoc(struct ieee80211_local *local,
bool associated)
{
@@ -38,7 +55,10 @@ static inline void ieee80211_led_radio(struct ieee80211_local *local,
bool enabled)
{
}
-static inline void ieee80211_led_names(struct ieee80211_local *local)
+static inline void ieee80211_alloc_led_names(struct ieee80211_local *local)
+{
+}
+static inline void ieee80211_free_led_names(struct ieee80211_local *local)
{
}
static inline void ieee80211_led_init(struct ieee80211_local *local)
@@ -58,7 +78,7 @@ static inline void
ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes)
{
#ifdef CONFIG_MAC80211_LEDS
- if (local->tpt_led_trigger && ieee80211_is_data(fc))
+ if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
local->tpt_led_trigger->tx_bytes += bytes;
#endif
}
@@ -67,7 +87,7 @@ static inline void
ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes)
{
#ifdef CONFIG_MAC80211_LEDS
- if (local->tpt_led_trigger && ieee80211_is_data(fc))
+ if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
local->tpt_led_trigger->rx_bytes += bytes;
#endif
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e86daed83..3c63468b4 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -41,9 +41,6 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
unsigned int changed_flags;
unsigned int new_flags = 0;
- if (atomic_read(&local->iff_promiscs))
- new_flags |= FIF_PROMISC_IN_BSS;
-
if (atomic_read(&local->iff_allmultis))
new_flags |= FIF_ALLMULTI;
@@ -649,7 +646,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
skb_queue_head_init(&local->skb_queue);
skb_queue_head_init(&local->skb_queue_unreliable);
- ieee80211_led_names(local);
+ ieee80211_alloc_led_names(local);
ieee80211_roc_setup(local);
@@ -664,7 +661,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
{
bool have_wep = !(IS_ERR(local->wep_tx_tfm) ||
IS_ERR(local->wep_rx_tfm));
- bool have_mfp = local->hw.flags & IEEE80211_HW_MFP_CAPABLE;
+ bool have_mfp = ieee80211_hw_check(&local->hw, MFP_CAPABLE);
int n_suites = 0, r = 0, w = 0;
u32 *suites;
static const u32 cipher_suites[] = {
@@ -684,7 +681,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
WLAN_CIPHER_SUITE_BIP_GMAC_256,
};
- if (local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL ||
+ if (ieee80211_hw_check(&local->hw, SW_CRYPTO_CONTROL) ||
local->hw.wiphy->cipher_suites) {
/* If the driver advertises, or doesn't support SW crypto,
* we only need to remove WEP if necessary.
@@ -774,8 +771,13 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256;
}
- for (r = 0; r < local->hw.n_cipher_schemes; r++)
+ for (r = 0; r < local->hw.n_cipher_schemes; r++) {
suites[w++] = cs[r].cipher;
+ if (WARN_ON(cs[r].pn_len > IEEE80211_MAX_PN_LEN)) {
+ kfree(suites);
+ return -EINVAL;
+ }
+ }
}
local->hw.wiphy->cipher_suites = suites;
@@ -795,7 +797,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
netdev_features_t feature_whitelist;
struct cfg80211_chan_def dflt_chandef = {};
- if (hw->flags & IEEE80211_HW_QUEUE_CONTROL &&
+ if (ieee80211_hw_check(hw, QUEUE_CONTROL) &&
(local->hw.offchannel_tx_hw_queue == IEEE80211_INVAL_HW_QUEUE ||
local->hw.offchannel_tx_hw_queue >= local->hw.queues))
return -EINVAL;
@@ -843,7 +845,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
/* Only HW csum features are currently compatible with mac80211 */
feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
- NETIF_F_HW_CSUM;
+ NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA |
+ NETIF_F_GSO_SOFTWARE;
if (WARN_ON(hw->netdev_features & ~feature_whitelist))
return -EINVAL;
@@ -942,9 +945,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
/* mac80211 supports control port protocol changing */
local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL;
- if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) {
+ if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) {
local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
- } else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) {
+ } else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC)) {
local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
if (hw->max_signal <= 0) {
result = -EINVAL;
@@ -998,7 +1001,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP;
/* mac80211 supports eCSA, if the driver supports STA CSA at all */
- if (local->hw.flags & IEEE80211_HW_CHANCTX_STA_CSA)
+ if (ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA))
local->ext_capa[0] |= WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING;
local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
@@ -1066,7 +1069,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
/* add one default STA interface if supported */
if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
- !(hw->flags & IEEE80211_HW_NO_AUTO_VIF)) {
+ !ieee80211_hw_check(hw, NO_AUTO_VIF)) {
result = ieee80211_if_add(local, "wlan%d", NET_NAME_ENUM, NULL,
NL80211_IFTYPE_STATION, NULL);
if (result)
@@ -1212,6 +1215,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
sta_info_stop(local);
+ ieee80211_free_led_names(local);
+
wiphy_free(local->hw.wiphy);
}
EXPORT_SYMBOL(ieee80211_free_hw);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 214e63b84..085edc1d0 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -510,14 +510,14 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt,
- const u8 *preq_elem, u32 metric)
+ const u8 *preq_elem, u32 orig_metric)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct mesh_path *mpath = NULL;
const u8 *target_addr, *orig_addr;
const u8 *da;
u8 target_flags, ttl, flags;
- u32 orig_sn, target_sn, lifetime, orig_metric;
+ u32 orig_sn, target_sn, lifetime, target_metric;
bool reply = false;
bool forward = true;
bool root_is_gate;
@@ -528,7 +528,6 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
target_sn = PREQ_IE_TARGET_SN(preq_elem);
orig_sn = PREQ_IE_ORIG_SN(preq_elem);
target_flags = PREQ_IE_TARGET_F(preq_elem);
- orig_metric = metric;
/* Proactive PREQ gate announcements */
flags = PREQ_IE_FLAGS(preq_elem);
root_is_gate = !!(flags & RANN_FLAG_IS_GATE);
@@ -539,7 +538,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
mhwmp_dbg(sdata, "PREQ is for us\n");
forward = false;
reply = true;
- metric = 0;
+ target_metric = 0;
if (time_after(jiffies, ifmsh->last_sn_update +
net_traversal_jiffies(sdata)) ||
time_before(jiffies, ifmsh->last_sn_update)) {
@@ -556,7 +555,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
reply = true;
target_addr = sdata->vif.addr;
target_sn = ++ifmsh->sn;
- metric = 0;
+ target_metric = 0;
ifmsh->last_sn_update = jiffies;
}
if (root_is_gate)
@@ -574,7 +573,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
} else if ((!(target_flags & MP_F_DO)) &&
(mpath->flags & MESH_PATH_ACTIVE)) {
reply = true;
- metric = mpath->metric;
+ target_metric = mpath->metric;
target_sn = mpath->sn;
if (target_flags & MP_F_RF)
target_flags |= MP_F_DO;
@@ -593,7 +592,8 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
mesh_path_sel_frame_tx(MPATH_PREP, 0, orig_addr,
orig_sn, 0, target_addr,
target_sn, mgmt->sa, 0, ttl,
- lifetime, metric, 0, sdata);
+ lifetime, target_metric, 0,
+ sdata);
} else {
ifmsh->mshstats.dropped_frames_ttl++;
}
@@ -619,13 +619,12 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
if (flags & IEEE80211_PREQ_PROACTIVE_PREP_FLAG) {
target_addr = PREQ_IE_TARGET_ADDR(preq_elem);
target_sn = PREQ_IE_TARGET_SN(preq_elem);
- metric = orig_metric;
}
mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr,
orig_sn, target_flags, target_addr,
target_sn, da, hopcount, ttl, lifetime,
- metric, preq_id, sdata);
+ orig_metric, preq_id, sdata);
if (!is_multicast_ether_addr(da))
ifmsh->mshstats.fwded_unicast++;
else
@@ -854,7 +853,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
{
struct ieee802_11_elems elems;
size_t baselen;
- u32 last_hop_metric;
+ u32 path_metric;
struct sta_info *sta;
/* need action_code */
@@ -877,21 +876,21 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
if (elems.preq_len != 37)
/* Right now we support just 1 destination and no AE */
return;
- last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.preq,
- MPATH_PREQ);
- if (last_hop_metric)
+ path_metric = hwmp_route_info_get(sdata, mgmt, elems.preq,
+ MPATH_PREQ);
+ if (path_metric)
hwmp_preq_frame_process(sdata, mgmt, elems.preq,
- last_hop_metric);
+ path_metric);
}
if (elems.prep) {
if (elems.prep_len != 31)
/* Right now we support no AE */
return;
- last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.prep,
- MPATH_PREP);
- if (last_hop_metric)
+ path_metric = hwmp_route_info_get(sdata, mgmt, elems.prep,
+ MPATH_PREP);
+ if (path_metric)
hwmp_prep_frame_process(sdata, mgmt, elems.prep,
- last_hop_metric);
+ path_metric);
}
if (elems.perr) {
if (elems.perr_len != 15)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 60d737f14..3b5909941 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -72,10 +72,11 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata,
*
* @sta: mesh peer link to restart
*
- * Locking: this function must be called holding sta->lock
+ * Locking: this function must be called holding sta->plink_lock
*/
static inline void mesh_plink_fsm_restart(struct sta_info *sta)
{
+ lockdep_assert_held(&sta->plink_lock);
sta->plink_state = NL80211_PLINK_LISTEN;
sta->llid = sta->plid = sta->reason = 0;
sta->plink_retries = 0;
@@ -105,9 +106,7 @@ static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata)
/* (IEEE 802.11-2012 19.4.5) */
short_slot = true;
goto out;
- } else if (band != IEEE80211_BAND_2GHZ ||
- (band == IEEE80211_BAND_2GHZ &&
- local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
+ } else if (band != IEEE80211_BAND_2GHZ)
goto out;
for (i = 0; i < sband->n_bitrates; i++)
@@ -213,13 +212,15 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
* All mesh paths with this peer as next hop will be flushed
* Returns beacon changed flag if the beacon content changed.
*
- * Locking: the caller must hold sta->lock
+ * Locking: the caller must hold sta->plink_lock
*/
static u32 __mesh_plink_deactivate(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
u32 changed = 0;
+ lockdep_assert_held(&sta->plink_lock);
+
if (sta->plink_state == NL80211_PLINK_ESTAB)
changed = mesh_plink_dec_estab_count(sdata);
sta->plink_state = NL80211_PLINK_BLOCKED;
@@ -244,13 +245,13 @@ u32 mesh_plink_deactivate(struct sta_info *sta)
struct ieee80211_sub_if_data *sdata = sta->sdata;
u32 changed;
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
changed = __mesh_plink_deactivate(sta);
sta->reason = WLAN_REASON_MESH_PEER_CANCELED;
mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE,
sta->sta.addr, sta->llid, sta->plid,
sta->reason);
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
return changed;
}
@@ -305,7 +306,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
if (action == WLAN_SP_MESH_PEERING_CONFIRM) {
/* AID */
pos = skb_put(skb, 2);
- put_unaligned_le16(plid, pos + 2);
+ put_unaligned_le16(plid, pos);
}
if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
@@ -387,12 +388,13 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
sband = local->hw.wiphy->bands[band];
rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates);
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
sta->last_rx = jiffies;
/* rates and capabilities don't change during peering */
- if (sta->plink_state == NL80211_PLINK_ESTAB)
+ if (sta->plink_state == NL80211_PLINK_ESTAB && sta->processed_beacon)
goto out;
+ sta->processed_beacon = true;
if (sta->sta.supp_rates[band] != rates)
changed |= IEEE80211_RC_SUPP_RATES_CHANGED;
@@ -419,7 +421,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
else
rate_control_rate_update(local, sband, sta, changed);
out:
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
}
static struct sta_info *
@@ -552,7 +554,7 @@ static void mesh_plink_timer(unsigned long data)
if (sta->sdata->local->quiescing)
return;
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
/* If a timer fires just before a state transition on another CPU,
* we may have already extended the timeout and changed state by the
@@ -563,7 +565,7 @@ static void mesh_plink_timer(unsigned long data)
mpl_dbg(sta->sdata,
"Ignoring timer for %pM in state %s (timer adjusted)",
sta->sta.addr, mplstates[sta->plink_state]);
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
return;
}
@@ -573,7 +575,7 @@ static void mesh_plink_timer(unsigned long data)
mpl_dbg(sta->sdata,
"Ignoring timer for %pM in state %s (timer deleted)",
sta->sta.addr, mplstates[sta->plink_state]);
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
return;
}
@@ -619,7 +621,7 @@ static void mesh_plink_timer(unsigned long data)
default:
break;
}
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
if (action)
mesh_plink_frame_tx(sdata, action, sta->sta.addr,
sta->llid, sta->plid, reason);
@@ -674,16 +676,16 @@ u32 mesh_plink_open(struct sta_info *sta)
if (!test_sta_flag(sta, WLAN_STA_AUTH))
return 0;
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
sta->llid = mesh_get_new_llid(sdata);
if (sta->plink_state != NL80211_PLINK_LISTEN &&
sta->plink_state != NL80211_PLINK_BLOCKED) {
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
return 0;
}
sta->plink_state = NL80211_PLINK_OPN_SNT;
mesh_plink_timer_set(sta, sdata->u.mesh.mshcfg.dot11MeshRetryTimeout);
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
mpl_dbg(sdata,
"Mesh plink: starting establishment with %pM\n",
sta->sta.addr);
@@ -700,10 +702,10 @@ u32 mesh_plink_block(struct sta_info *sta)
{
u32 changed;
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
changed = __mesh_plink_deactivate(sta);
sta->plink_state = NL80211_PLINK_BLOCKED;
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
return changed;
}
@@ -758,7 +760,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr,
mplstates[sta->plink_state], mplevents[event]);
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
switch (sta->plink_state) {
case NL80211_PLINK_LISTEN:
switch (event) {
@@ -872,7 +874,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
*/
break;
}
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
if (action) {
mesh_plink_frame_tx(sdata, action, sta->sta.addr,
sta->llid, sta->plid, sta->reason);
@@ -1120,6 +1122,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
WLAN_SP_MESH_PEERING_CONFIRM) {
baseaddr += 4;
baselen += 4;
+
+ if (baselen > len)
+ return;
}
ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems);
mesh_process_plink_frame(sdata, mgmt, &elems);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 26053bf2f..9b2cc278a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -118,7 +118,7 @@ void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata)
if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)
return;
- if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+ if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
return;
mod_timer(&sdata->u.mgd.bcn_mon_timer,
@@ -134,7 +134,7 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
ifmgd->probe_send_count = 0;
- if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+ if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
return;
mod_timer(&sdata->u.mgd.conn_mon_timer,
@@ -669,17 +669,15 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
capab = WLAN_CAPABILITY_ESS;
if (sband->band == IEEE80211_BAND_2GHZ) {
- if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
- capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
- if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
- capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
+ capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
+ capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
}
if (assoc_data->capability & WLAN_CAPABILITY_PRIVACY)
capab |= WLAN_CAPABILITY_PRIVACY;
if ((assoc_data->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
- (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
+ ieee80211_hw_check(&local->hw, SPECTRUM_MGMT))
capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
if (ifmgd->flags & IEEE80211_STA_ENABLE_RRM)
@@ -887,7 +885,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
drv_mgd_prepare_tx(local, sdata);
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
- if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+ if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_INTFL_MLME_CONN_TX;
ieee80211_tx_skb(sdata, skb);
@@ -929,7 +927,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
- if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+ if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL)
@@ -1098,6 +1096,24 @@ static void ieee80211_chswitch_timer(unsigned long data)
ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
}
+static void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata)
+{
+ struct sta_info *sta;
+ u16 reason = WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
+ if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded ||
+ !test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ continue;
+
+ ieee80211_tdls_oper_request(&sdata->vif, sta->sta.addr,
+ NL80211_TDLS_TEARDOWN, reason,
+ GFP_ATOMIC);
+ }
+ rcu_read_unlock();
+}
+
static void
ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
u64 timestamp, u32 device_timestamp,
@@ -1161,6 +1177,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
return;
}
+ /*
+ * Drop all TDLS peers - either we disconnect or move to a different
+ * channel from this point on. There's no telling what our peer will do.
+ * The TDLS WIDER_BW scenario is also problematic, as peers might now
+ * have an incompatible wider chandef.
+ */
+ ieee80211_teardown_tdls_peers(sdata);
+
mutex_lock(&local->mtx);
mutex_lock(&local->chanctx_mtx);
conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
@@ -1174,7 +1198,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
chanctx = container_of(conf, struct ieee80211_chanctx, conf);
if (local->use_chanctx &&
- !(local->hw.flags & IEEE80211_HW_CHANCTX_STA_CSA)) {
+ !ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) {
sdata_info(sdata,
"driver doesn't support chan-switch with channel contexts\n");
goto drop_connection;
@@ -1383,15 +1407,15 @@ static void ieee80211_enable_ps(struct ieee80211_local *local,
return;
if (conf->dynamic_ps_timeout > 0 &&
- !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)) {
+ !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) {
mod_timer(&local->dynamic_ps_timer, jiffies +
msecs_to_jiffies(conf->dynamic_ps_timeout));
} else {
- if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
+ if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
ieee80211_send_nullfunc(local, sdata, 1);
- if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
- (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS))
+ if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
+ ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
return;
conf->flags |= IEEE80211_CONF_PS;
@@ -1450,7 +1474,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
int count = 0;
int timeout;
- if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) {
+ if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) {
local->ps_sdata = NULL;
return;
}
@@ -1596,7 +1620,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
- if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
+ if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
if (drv_tx_frames_pending(local)) {
mod_timer(&local->dynamic_ps_timer, jiffies +
@@ -1609,8 +1633,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
}
}
- if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) &&
- (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) ||
+ if (!(ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) &&
+ ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) ||
(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
local->hw.conf.flags |= IEEE80211_CONF_PS;
@@ -2135,7 +2159,7 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
ieee80211_recalc_ps(local, -1);
mutex_unlock(&local->iflist_mtx);
- if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+ if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
goto out;
/*
@@ -2233,7 +2257,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
*/
ifmgd->probe_send_count++;
- if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
+ if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
ifmgd->nullfunc_failed = false;
ieee80211_send_nullfunc(sdata->local, sdata, 0);
} else {
@@ -2495,6 +2519,34 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
sdata->u.mgd.auth_data = NULL;
}
+static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
+ bool assoc)
+{
+ struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
+
+ sdata_assert_lock(sdata);
+
+ if (!assoc) {
+ /*
+ * we are not associated yet, the only timer that could be
+ * running is the timeout for the association response which
+ * which is not relevant anymore.
+ */
+ del_timer_sync(&sdata->u.mgd.timer);
+ sta_info_destroy_addr(sdata, assoc_data->bss->bssid);
+
+ eth_zero_addr(sdata->u.mgd.bssid);
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
+ sdata->u.mgd.flags = 0;
+ mutex_lock(&sdata->local->mtx);
+ ieee80211_vif_release_channel(sdata);
+ mutex_unlock(&sdata->local->mtx);
+ }
+
+ kfree(assoc_data);
+ sdata->u.mgd.assoc_data = NULL;
+}
+
static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt, size_t len)
{
@@ -2510,7 +2562,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
return;
auth_data->expected_transaction = 4;
drv_mgd_prepare_tx(sdata->local, sdata);
- if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+ if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_INTFL_MLME_CONN_TX;
ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0,
@@ -2687,28 +2739,42 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt, size_t len)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- const u8 *bssid = NULL;
- u16 reason_code;
+ u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
sdata_assert_lock(sdata);
if (len < 24 + 2)
return;
- if (!ifmgd->associated ||
- !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
- return;
+ if (ifmgd->associated &&
+ ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) {
+ const u8 *bssid = ifmgd->associated->bssid;
- bssid = ifmgd->associated->bssid;
+ sdata_info(sdata, "deauthenticated from %pM (Reason: %u=%s)\n",
+ bssid, reason_code,
+ ieee80211_get_reason_code_string(reason_code));
- reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
+ ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
- sdata_info(sdata, "deauthenticated from %pM (Reason: %u=%s)\n",
- bssid, reason_code, ieee80211_get_reason_code_string(reason_code));
+ ieee80211_report_disconnect(sdata, (u8 *)mgmt, len, false,
+ reason_code);
+ return;
+ }
- ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
+ if (ifmgd->assoc_data &&
+ ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
+ const u8 *bssid = ifmgd->assoc_data->bss->bssid;
- ieee80211_report_disconnect(sdata, (u8 *)mgmt, len, false, reason_code);
+ sdata_info(sdata,
+ "deauthenticated from %pM while associating (Reason: %u=%s)\n",
+ bssid, reason_code,
+ ieee80211_get_reason_code_string(reason_code));
+
+ ieee80211_destroy_assoc_data(sdata, false);
+
+ cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
+ return;
+ }
}
@@ -2788,34 +2854,6 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
}
}
-static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
- bool assoc)
-{
- struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
-
- sdata_assert_lock(sdata);
-
- if (!assoc) {
- /*
- * we are not associated yet, the only timer that could be
- * running is the timeout for the association response which
- * which is not relevant anymore.
- */
- del_timer_sync(&sdata->u.mgd.timer);
- sta_info_destroy_addr(sdata, assoc_data->bss->bssid);
-
- eth_zero_addr(sdata->u.mgd.bssid);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
- sdata->u.mgd.flags = 0;
- mutex_lock(&sdata->local->mtx);
- ieee80211_vif_release_channel(sdata);
- mutex_unlock(&sdata->local->mtx);
- }
-
- kfree(assoc_data);
- sdata->u.mgd.assoc_data = NULL;
-}
-
static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
struct cfg80211_bss *cbss,
struct ieee80211_mgmt *mgmt, size_t len)
@@ -3299,7 +3337,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
}
ifmgd->have_beacon = true;
ifmgd->assoc_data->need_beacon = false;
- if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) {
+ if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
sdata->vif.bss_conf.sync_tsf =
le64_to_cpu(mgmt->u.beacon.timestamp);
sdata->vif.bss_conf.sync_device_ts =
@@ -3405,7 +3443,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
len - baselen, false, &elems,
care_about_ies, ncrc);
- if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) {
+ if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) {
bool directed_tim = ieee80211_check_tim(elems.tim,
elems.tim_len,
ifmgd->aid);
@@ -3473,7 +3511,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
* the driver will use them. The synchronized view is currently
* guaranteed only in certain callbacks.
*/
- if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) {
+ if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
sdata->vif.bss_conf.sync_tsf =
le64_to_cpu(mgmt->u.beacon.timestamp);
sdata->vif.bss_conf.sync_device_ts =
@@ -3711,7 +3749,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
auth_data->expected_transaction = trans;
}
- if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+ if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_INTFL_MLME_CONN_TX;
@@ -3784,7 +3822,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
IEEE80211_ASSOC_MAX_TRIES);
ieee80211_send_assoc(sdata);
- if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
+ if (!ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
assoc_data->timeout_started = true;
run_again(sdata, assoc_data->timeout);
@@ -3898,7 +3936,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
- if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+ if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
max_tries = max_nullfunc_tries;
else
max_tries = max_probe_tries;
@@ -3923,7 +3961,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
}
} else if (time_is_after_jiffies(ifmgd->probe_timeout))
run_again(sdata, ifmgd->probe_timeout);
- else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
+ else if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
mlme_dbg(sdata,
"Failed to send nullfunc to AP %pM after %dms, disconnecting\n",
bssid, probe_wait_ms);
@@ -3992,14 +4030,11 @@ static void ieee80211_sta_monitor_work(struct work_struct *work)
static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
{
- u32 flags;
-
if (sdata->vif.type == NL80211_IFTYPE_STATION) {
__ieee80211_stop_poll(sdata);
/* let's probe the connection once */
- flags = sdata->local->hw.flags;
- if (!(flags & IEEE80211_HW_CONNECTION_MONITOR))
+ if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
ieee80211_queue_work(&sdata->local->hw,
&sdata->u.mgd.monitor_work);
/* and do all the other regular work too */
@@ -4307,15 +4342,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
}
static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_bss *cbss, bool assoc)
+ struct cfg80211_bss *cbss, bool assoc,
+ bool override)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_bss *bss = (void *)cbss->priv;
struct sta_info *new_sta = NULL;
struct ieee80211_supported_band *sband;
- struct ieee80211_sta_ht_cap sta_ht_cap;
- bool have_sta = false, is_override = false;
+ bool have_sta = false;
int err;
sband = local->hw.wiphy->bands[cbss->channel->band];
@@ -4335,14 +4370,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
return -ENOMEM;
}
- memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
- ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
-
- is_override = (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) !=
- (sband->ht_cap.cap &
- IEEE80211_HT_CAP_SUP_WIDTH_20_40);
-
- if (new_sta || is_override) {
+ if (new_sta || override) {
err = ieee80211_prep_channel(sdata, cbss);
if (err) {
if (new_sta)
@@ -4419,8 +4447,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.sync_dtim_count = tim_ie[2];
else
sdata->vif.bss_conf.sync_dtim_count = 0;
- } else if (!(local->hw.flags &
- IEEE80211_HW_TIMING_BEACON_ONLY)) {
+ } else if (!ieee80211_hw_check(&sdata->local->hw,
+ TIMING_BEACON_ONLY)) {
ies = rcu_dereference(cbss->proberesp_ies);
/* must be non-NULL since beacon IEs were NULL */
sdata->vif.bss_conf.sync_tsf = ies->tsf;
@@ -4552,7 +4580,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid);
- err = ieee80211_prep_connection(sdata, req->bss, false);
+ err = ieee80211_prep_connection(sdata, req->bss, false, false);
if (err)
goto err_clear;
@@ -4570,6 +4598,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
eth_zero_addr(ifmgd->bssid);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
ifmgd->auth_data = NULL;
+ mutex_lock(&sdata->local->mtx);
+ ieee80211_vif_release_channel(sdata);
+ mutex_unlock(&sdata->local->mtx);
err_free:
kfree(auth_data);
return err;
@@ -4624,6 +4655,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband;
const u8 *ssidie, *ht_ie, *vht_ie;
int i, err;
+ bool override = false;
assoc_data = kzalloc(sizeof(*assoc_data) + req->ie_len, GFP_KERNEL);
if (!assoc_data)
@@ -4728,14 +4760,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
}
}
- if (req->flags & ASSOC_REQ_DISABLE_HT) {
- ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
- }
-
- if (req->flags & ASSOC_REQ_DISABLE_VHT)
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
-
/* Also disable HT if we don't support it or the AP doesn't use WMM */
sband = local->hw.wiphy->bands[req->bss->channel->band];
if (!sband->ht_cap.ht_supported ||
@@ -4802,7 +4826,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) &&
- (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK),
+ ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK),
"U-APSD not supported with HW_PS_NULLFUNC_STACK\n"))
sdata->vif.driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD;
@@ -4847,14 +4871,43 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
ifmgd->dtim_period = 0;
ifmgd->have_beacon = false;
- err = ieee80211_prep_connection(sdata, req->bss, true);
+ /* override HT/VHT configuration only if the AP and we support it */
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+ struct ieee80211_sta_ht_cap sta_ht_cap;
+
+ if (req->flags & ASSOC_REQ_DISABLE_HT)
+ override = true;
+
+ memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
+ ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
+
+ /* check for 40 MHz disable override */
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ) &&
+ sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 &&
+ !(sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40))
+ override = true;
+
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+ req->flags & ASSOC_REQ_DISABLE_VHT)
+ override = true;
+ }
+
+ if (req->flags & ASSOC_REQ_DISABLE_HT) {
+ ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
+ ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+ }
+
+ if (req->flags & ASSOC_REQ_DISABLE_VHT)
+ ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+
+ err = ieee80211_prep_connection(sdata, req->bss, true, override);
if (err)
goto err_clear;
rcu_read_lock();
beacon_ies = rcu_dereference(req->bss->beacon_ies);
- if (sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC &&
+ if (ieee80211_hw_check(&sdata->local->hw, NEED_DTIM_BEFORE_ASSOC) &&
!beacon_ies) {
/*
* Wait up to one beacon interval ...
@@ -4881,7 +4934,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
assoc_data->timeout = jiffies;
assoc_data->timeout_started = true;
- if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) {
+ if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
sdata->vif.bss_conf.sync_tsf = beacon_ies->tsf;
sdata->vif.bss_conf.sync_device_ts =
bss->device_ts_beacon;
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 683f0e3cb..f2c75cf49 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -46,7 +46,7 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
}
if (!local->offchannel_ps_enabled ||
- !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK))
+ !ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
/*
* If power save was enabled, no need to send a nullfunc
* frame because AP knows that we are sleeping. But if the
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index ac6ad6238..b676b9fa7 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -23,7 +23,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
ieee80211_del_virtual_monitor(local);
- if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
+ if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) {
mutex_lock(&local->sta_mtx);
list_for_each_entry(sta, &local->sta_list, list) {
set_sta_flag(sta, WLAN_STA_BLOCK_BA);
@@ -76,13 +76,29 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
if (sdata->vif.type != NL80211_IFTYPE_STATION)
continue;
ieee80211_mgd_quiesce(sdata);
+ /* If suspended during TX in progress, and wowlan
+ * is enabled (connection will be active) there
+ * can be a race where the driver is put out
+ * of power-save due to TX and during suspend
+ * dynamic_ps_timer is cancelled and TX packet
+ * is flushed, leaving the driver in ACTIVE even
+ * after resuming until dynamic_ps_timer puts
+ * driver back in DOZE.
+ */
+ if (sdata->u.mgd.associated &&
+ sdata->u.mgd.powersave &&
+ !(local->hw.conf.flags & IEEE80211_CONF_PS)) {
+ local->hw.conf.flags |= IEEE80211_CONF_PS;
+ ieee80211_hw_config(local,
+ IEEE80211_CONF_CHANGE_PS);
+ }
}
err = drv_suspend(local, wowlan);
if (err < 0) {
local->quiescing = false;
local->wowlan = false;
- if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
+ if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) {
mutex_lock(&local->sta_mtx);
list_for_each_entry(sta,
&local->sta_list, list) {
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index d53355b01..fda33f961 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -103,7 +103,7 @@ ieee80211_rate_control_ops_get(const char *name)
const struct rate_control_ops *ops;
const char *alg_name;
- kparam_block_sysfs_write(ieee80211_default_rc_algo);
+ kernel_param_lock(THIS_MODULE);
if (!name)
alg_name = ieee80211_default_rc_algo;
else
@@ -117,7 +117,7 @@ ieee80211_rate_control_ops_get(const char *name)
/* try built-in one if specific alg requested but not found */
if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
- kparam_unblock_sysfs_write(ieee80211_default_rc_algo);
+ kernel_param_unlock(THIS_MODULE);
return ops;
}
@@ -680,12 +680,18 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
info->control.rates[i].count = 0;
}
- if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
+ if (ieee80211_hw_check(&sdata->local->hw, HAS_RATE_CONTROL))
return;
- ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
+ if (ista) {
+ spin_lock_bh(&sta->rate_ctrl_lock);
+ ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
+ } else {
+ ref->ops->get_rate(ref->priv, NULL, NULL, txrc);
+ }
- if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE)
+ if (ieee80211_hw_check(&sdata->local->hw, SUPPORTS_RC_TABLE))
return;
ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb,
@@ -727,7 +733,7 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
if (local->open_count)
return -EBUSY;
- if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) {
+ if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
if (WARN_ON(!local->ops->set_rts_threshold))
return -EINVAL;
return 0;
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 38652f09f..25c9be5dd 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -42,10 +42,12 @@ static inline void rate_control_tx_status(struct ieee80211_local *local,
if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
return;
+ spin_lock_bh(&sta->rate_ctrl_lock);
if (ref->ops->tx_status)
ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb);
else
ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
}
static inline void
@@ -64,7 +66,9 @@ rate_control_tx_status_noskb(struct ieee80211_local *local,
if (WARN_ON_ONCE(!ref->ops->tx_status_noskb))
return;
+ spin_lock_bh(&sta->rate_ctrl_lock);
ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
}
static inline void rate_control_rate_init(struct sta_info *sta)
@@ -91,8 +95,10 @@ static inline void rate_control_rate_init(struct sta_info *sta)
sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band];
+ spin_lock_bh(&sta->rate_ctrl_lock);
ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista,
priv_sta);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
rcu_read_unlock();
set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
}
@@ -115,18 +121,20 @@ static inline void rate_control_rate_update(struct ieee80211_local *local,
return;
}
+ spin_lock_bh(&sta->rate_ctrl_lock);
ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def,
ista, priv_sta, changed);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
rcu_read_unlock();
}
drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
}
static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
- struct ieee80211_sta *sta,
- gfp_t gfp)
+ struct sta_info *sta, gfp_t gfp)
{
- return ref->ops->alloc_sta(ref->priv, sta, gfp);
+ spin_lock_init(&sta->rate_ctrl_lock);
+ return ref->ops->alloc_sta(ref->priv, &sta->sta, gfp);
}
static inline void rate_control_free_sta(struct sta_info *sta)
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 247552a7f..3ece7d103 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -92,14 +92,15 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma)
static inline void
minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
{
- int j = MAX_THR_RATES;
- struct minstrel_rate_stats *tmp_mrs = &mi->r[j - 1].stats;
+ int j;
+ struct minstrel_rate_stats *tmp_mrs;
struct minstrel_rate_stats *cur_mrs = &mi->r[i].stats;
- while (j > 0 && (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) >
- minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))) {
- j--;
+ for (j = MAX_THR_RATES; j > 0; --j) {
tmp_mrs = &mi->r[tp_list[j - 1]].stats;
+ if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) <=
+ minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))
+ break;
}
if (j < MAX_THR_RATES - 1)
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 7430a1df2..543b67233 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1070,7 +1070,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
if (sband->band != IEEE80211_BAND_2GHZ)
return;
- if (!(mp->hw->flags & IEEE80211_HW_SUPPORTS_HT_CCK_RATES))
+ if (!ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES))
return;
mi->cck_supported = 0;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5793f75c5..5dae166cb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -32,6 +32,16 @@
#include "wme.h"
#include "rate.h"
+static inline void ieee80211_rx_stats(struct net_device *dev, u32 len)
+{
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += len;
+ u64_stats_update_end(&tstats->syncp);
+}
+
/*
* monitor mode reception
*
@@ -42,7 +52,7 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local,
struct sk_buff *skb,
unsigned int rtap_vendor_space)
{
- if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) {
+ if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) {
if (likely(skb->len > FCS_LEN))
__pskb_trim(skb, skb->len - FCS_LEN);
else {
@@ -100,7 +110,7 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
len = ALIGN(len, 8);
len += 8;
}
- if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
+ if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
len += 1;
/* antenna field, if we don't have per-chain info */
@@ -175,7 +185,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
}
mpdulen = skb->len;
- if (!(has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)))
+ if (!(has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)))
mpdulen += FCS_LEN;
rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len);
@@ -229,7 +239,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
}
/* IEEE80211_RADIOTAP_FLAGS */
- if (has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS))
+ if (has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))
*pos |= IEEE80211_RADIOTAP_F_FCS;
if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
*pos |= IEEE80211_RADIOTAP_F_BADFCS;
@@ -279,7 +289,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
pos += 2;
/* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */
- if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM &&
+ if (ieee80211_hw_check(&local->hw, SIGNAL_DBM) &&
!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
*pos = status->signal;
rthdr->it_present |=
@@ -448,7 +458,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
* the SKB because it has a bad FCS/PLCP checksum.
*/
- if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
+ if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))
present_fcs_len = FCS_LEN;
/* ensure hdr->frame_control and vendor radiotap data are in skb head */
@@ -529,8 +539,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
}
prev_dev = sdata->dev;
- sdata->dev->stats.rx_packets++;
- sdata->dev->stats.rx_bytes += skb->len;
+ ieee80211_rx_stats(sdata->dev, skb->len);
}
if (prev_dev) {
@@ -981,7 +990,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
struct sk_buff *skb = rx->skb;
struct ieee80211_local *local = rx->local;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct sta_info *sta = rx->sta;
struct tid_ampdu_rx *tid_agg_rx;
u16 sc;
@@ -1016,10 +1024,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
goto dont_reorder;
- /* not actually part of this BA session */
- if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
- goto dont_reorder;
-
/* new, potentially un-ordered, ampdu frame - process it */
/* reset session timer */
@@ -1073,10 +1077,8 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
rx->sta->last_seq_ctrl[rx->seqno_idx] ==
hdr->seq_ctrl)) {
- if (status->rx_flags & IEEE80211_RX_RA_MATCH) {
- rx->local->dot11FrameDuplicateCount++;
- rx->sta->num_duplicates++;
- }
+ I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount);
+ rx->sta->num_duplicates++;
return RX_DROP_UNUSABLE;
} else if (!(status->flag & RX_FLAG_AMSDU_MORE)) {
rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl;
@@ -1195,11 +1197,13 @@ static void sta_ps_start(struct sta_info *sta)
atomic_inc(&ps->num_sta_ps);
set_sta_flag(sta, WLAN_STA_PS_STA);
- if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS))
+ if (!ieee80211_hw_check(&local->hw, AP_LINK_PS))
drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta);
ps_dbg(sdata, "STA %pM aid %d enters power save mode\n",
sta->sta.addr, sta->sta.aid);
+ ieee80211_clear_fast_xmit(sta);
+
if (!sta->sta.txq[0])
return;
@@ -1241,7 +1245,7 @@ int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start)
struct sta_info *sta_inf = container_of(sta, struct sta_info, sta);
bool in_ps;
- WARN_ON(!(sta_inf->local->hw.flags & IEEE80211_HW_AP_LINK_PS));
+ WARN_ON(!ieee80211_hw_check(&sta_inf->local->hw, AP_LINK_PS));
/* Don't let the same PS state be set twice */
in_ps = test_sta_flag(sta_inf, WLAN_STA_PS_STA);
@@ -1265,7 +1269,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx)
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
int tid, ac;
- if (!rx->sta || !(status->rx_flags & IEEE80211_RX_RA_MATCH))
+ if (!rx->sta)
return RX_CONTINUE;
if (sdata->vif.type != NL80211_IFTYPE_AP &&
@@ -1277,7 +1281,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx)
* uAPSD and PS-Poll frames (the latter shouldn't even come up from
* it to mac80211 since they're handled.)
*/
- if (sdata->local->hw.flags & IEEE80211_HW_AP_LINK_PS)
+ if (ieee80211_hw_check(&sdata->local->hw, AP_LINK_PS))
return RX_CONTINUE;
/*
@@ -1367,11 +1371,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
}
}
} else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) {
- u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
- NL80211_IFTYPE_OCB);
- /* OCB uses wild-card BSSID */
- if (is_broadcast_ether_addr(bssid))
- sta->last_rx = jiffies;
+ sta->last_rx = jiffies;
} else if (!is_multicast_ether_addr(hdr->addr1)) {
/*
* Mesh beacons will update last_rx when if they are found to
@@ -1386,9 +1386,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
}
}
- if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
- return RX_CONTINUE;
-
if (rx->sdata->vif.type == NL80211_IFTYPE_STATION)
ieee80211_sta_rx_notify(rx->sdata, hdr);
@@ -1416,7 +1413,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
* Change STA power saving mode only at the end of a frame
* exchange sequence.
*/
- if (!(sta->local->hw.flags & IEEE80211_HW_AP_LINK_PS) &&
+ if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) &&
!ieee80211_has_morefrags(hdr->frame_control) &&
!(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
(rx->sdata->vif.type == NL80211_IFTYPE_AP ||
@@ -1517,13 +1514,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
* possible.
*/
- /*
- * No point in finding a key and decrypting if the frame is neither
- * addressed to us nor a multicast frame.
- */
- if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
- return RX_CONTINUE;
-
/* start without a key */
rx->key = NULL;
fc = hdr->frame_control;
@@ -1795,7 +1785,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
frag = sc & IEEE80211_SCTL_FRAG;
if (is_multicast_ether_addr(hdr->addr1)) {
- rx->local->dot11MulticastReceivedFrameCount++;
+ I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount);
goto out_no_led;
}
@@ -1878,7 +1868,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
rx->skb = __skb_dequeue(&entry->skb_list);
if (skb_tailroom(rx->skb) < entry->extra_len) {
- I802_DEBUG_INC(rx->local->rx_expand_skb_head2);
+ I802_DEBUG_INC(rx->local->rx_expand_skb_head_defrag);
if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len,
GFP_ATOMIC))) {
I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
@@ -2054,18 +2044,15 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
struct sk_buff *skb, *xmit_skb;
struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
struct sta_info *dsta;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
-
- dev->stats.rx_packets++;
- dev->stats.rx_bytes += rx->skb->len;
skb = rx->skb;
xmit_skb = NULL;
+ ieee80211_rx_stats(dev, skb->len);
+
if ((sdata->vif.type == NL80211_IFTYPE_AP ||
sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
- (status->rx_flags & IEEE80211_RX_RA_MATCH) &&
(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
if (is_multicast_ether_addr(ehdr->h_dest)) {
/*
@@ -2207,7 +2194,6 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
struct sk_buff *skb = rx->skb, *fwd_skb;
struct ieee80211_local *local = rx->local;
struct ieee80211_sub_if_data *sdata = rx->sdata;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
u16 q, hdrlen;
@@ -2238,8 +2224,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
mesh_rmc_check(rx->sdata, hdr->addr3, mesh_hdr))
return RX_DROP_MONITOR;
- if (!ieee80211_is_data(hdr->frame_control) ||
- !(status->rx_flags & IEEE80211_RX_RA_MATCH))
+ if (!ieee80211_is_data(hdr->frame_control))
return RX_CONTINUE;
if (!mesh_hdr->ttl)
@@ -2330,11 +2315,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames);
ieee80211_add_pending_skb(local, fwd_skb);
out:
- if (is_multicast_ether_addr(hdr->addr1) ||
- sdata->dev->flags & IFF_PROMISC)
+ if (is_multicast_ether_addr(hdr->addr1))
return RX_CONTINUE;
- else
- return RX_DROP_MONITOR;
+ return RX_DROP_MONITOR;
}
#endif
@@ -2445,6 +2428,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
struct {
__le16 control, start_seq_num;
} __packed bar_data;
+ struct ieee80211_event event = {
+ .type = BAR_RX_EVENT,
+ };
if (!rx->sta)
return RX_DROP_MONITOR;
@@ -2460,6 +2446,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
return RX_DROP_MONITOR;
start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4;
+ event.u.ba.tid = tid;
+ event.u.ba.ssn = start_seq_num;
+ event.u.ba.sta = &rx->sta->sta;
/* reset session timer */
if (tid_agg_rx->timeout)
@@ -2472,6 +2461,8 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
start_seq_num, frames);
spin_unlock(&tid_agg_rx->reorder_lock);
+ drv_event_callback(rx->local, rx->sdata, &event);
+
kfree_skb(skb);
return RX_QUEUED;
}
@@ -2552,7 +2543,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
!(rx->flags & IEEE80211_RX_BEACON_REPORTED)) {
int sig = 0;
- if (rx->local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
+ if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM))
sig = status->signal;
cfg80211_report_obss_beacon(rx->local->hw.wiphy,
@@ -2561,9 +2552,6 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
rx->flags |= IEEE80211_RX_BEACON_REPORTED;
}
- if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
- return RX_DROP_MONITOR;
-
if (ieee80211_drop_unencrypted_mgmt(rx))
return RX_DROP_UNUSABLE;
@@ -2591,9 +2579,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT)
return RX_DROP_UNUSABLE;
- if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
- return RX_DROP_UNUSABLE;
-
switch (mgmt->u.action.category) {
case WLAN_CATEGORY_HT:
/* reject HT action frames from stations not supporting HT */
@@ -2889,7 +2874,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
* it transmitted were processed or returned.
*/
- if (rx->local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
+ if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM))
sig = status->signal;
if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
@@ -2954,7 +2939,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
info->flags = IEEE80211_TX_CTL_TX_OFFCHAN |
IEEE80211_TX_INTFL_OFFCHAN_TX_OK |
IEEE80211_TX_CTL_NO_CCK_RATE;
- if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
+ if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
info->hw_queue =
local->hw.offchannel_tx_hw_queue;
}
@@ -3077,8 +3062,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
}
prev_dev = sdata->dev;
- sdata->dev->stats.rx_packets++;
- sdata->dev->stats.rx_bytes += skb->len;
+ ieee80211_rx_stats(sdata->dev, skb->len);
}
if (prev_dev) {
@@ -3246,16 +3230,25 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames);
spin_unlock(&tid_agg_rx->reorder_lock);
+ if (!skb_queue_empty(&frames)) {
+ struct ieee80211_event event = {
+ .type = BA_FRAME_TIMEOUT,
+ .u.ba.tid = tid,
+ .u.ba.sta = &sta->sta,
+ };
+ drv_event_callback(rx.local, rx.sdata, &event);
+ }
+
ieee80211_rx_handlers(&rx, &frames);
}
/* main receive path */
-static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
- struct ieee80211_hdr *hdr)
+static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
{
struct ieee80211_sub_if_data *sdata = rx->sdata;
struct sk_buff *skb = rx->skb;
+ struct ieee80211_hdr *hdr = (void *)skb->data;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
int multicast = is_multicast_ether_addr(hdr->addr1);
@@ -3264,30 +3257,23 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
case NL80211_IFTYPE_STATION:
if (!bssid && !sdata->u.mgd.use_4addr)
return false;
- if (!multicast &&
- !ether_addr_equal(sdata->vif.addr, hdr->addr1)) {
- if (!(sdata->dev->flags & IFF_PROMISC) ||
- sdata->u.mgd.use_4addr)
- return false;
- status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
- }
- break;
+ if (multicast)
+ return true;
+ return ether_addr_equal(sdata->vif.addr, hdr->addr1);
case NL80211_IFTYPE_ADHOC:
if (!bssid)
return false;
if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
return false;
- if (ieee80211_is_beacon(hdr->frame_control)) {
+ if (ieee80211_is_beacon(hdr->frame_control))
return true;
- } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
+ if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid))
return false;
- } else if (!multicast &&
- !ether_addr_equal(sdata->vif.addr, hdr->addr1)) {
- if (!(sdata->dev->flags & IFF_PROMISC))
- return false;
- status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
- } else if (!rx->sta) {
+ if (!multicast &&
+ !ether_addr_equal(sdata->vif.addr, hdr->addr1))
+ return false;
+ if (!rx->sta) {
int rate_idx;
if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT))
rate_idx = 0; /* TODO: HT/VHT rates */
@@ -3296,25 +3282,18 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
ieee80211_ibss_rx_no_sta(sdata, bssid, hdr->addr2,
BIT(rate_idx));
}
- break;
+ return true;
case NL80211_IFTYPE_OCB:
if (!bssid)
return false;
- if (ieee80211_is_beacon(hdr->frame_control)) {
+ if (ieee80211_is_beacon(hdr->frame_control))
return false;
- } else if (!is_broadcast_ether_addr(bssid)) {
- ocb_dbg(sdata, "BSSID mismatch in OCB mode!\n");
+ if (!is_broadcast_ether_addr(bssid))
return false;
- } else if (!multicast &&
- !ether_addr_equal(sdata->dev->dev_addr,
- hdr->addr1)) {
- /* if we are in promisc mode we also accept
- * packets not destined for us
- */
- if (!(sdata->dev->flags & IFF_PROMISC))
- return false;
- rx->flags &= ~IEEE80211_RX_RA_MATCH;
- } else if (!rx->sta) {
+ if (!multicast &&
+ !ether_addr_equal(sdata->dev->dev_addr, hdr->addr1))
+ return false;
+ if (!rx->sta) {
int rate_idx;
if (status->flag & RX_FLAG_HT)
rate_idx = 0; /* TODO: HT rates */
@@ -3323,22 +3302,17 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
ieee80211_ocb_rx_no_sta(sdata, bssid, hdr->addr2,
BIT(rate_idx));
}
- break;
+ return true;
case NL80211_IFTYPE_MESH_POINT:
- if (!multicast &&
- !ether_addr_equal(sdata->vif.addr, hdr->addr1)) {
- if (!(sdata->dev->flags & IFF_PROMISC))
- return false;
-
- status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
- }
- break;
+ if (multicast)
+ return true;
+ return ether_addr_equal(sdata->vif.addr, hdr->addr1);
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_AP:
- if (!bssid) {
- if (!ether_addr_equal(sdata->vif.addr, hdr->addr1))
- return false;
- } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) {
+ if (!bssid)
+ return ether_addr_equal(sdata->vif.addr, hdr->addr1);
+
+ if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) {
/*
* Accept public action frames even when the
* BSSID doesn't match, this is used for P2P
@@ -3350,10 +3324,10 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
return false;
if (ieee80211_is_public_action(hdr, skb->len))
return true;
- if (!ieee80211_is_beacon(hdr->frame_control))
- return false;
- status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
- } else if (!ieee80211_has_tods(hdr->frame_control)) {
+ return ieee80211_is_beacon(hdr->frame_control);
+ }
+
+ if (!ieee80211_has_tods(hdr->frame_control)) {
/* ignore data frames to TDLS-peers */
if (ieee80211_is_data(hdr->frame_control))
return false;
@@ -3362,30 +3336,22 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
!ether_addr_equal(bssid, hdr->addr1))
return false;
}
- break;
+ return true;
case NL80211_IFTYPE_WDS:
if (bssid || !ieee80211_is_data(hdr->frame_control))
return false;
- if (!ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2))
- return false;
- break;
+ return ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2);
case NL80211_IFTYPE_P2P_DEVICE:
- if (!ieee80211_is_public_action(hdr, skb->len) &&
- !ieee80211_is_probe_req(hdr->frame_control) &&
- !ieee80211_is_probe_resp(hdr->frame_control) &&
- !ieee80211_is_beacon(hdr->frame_control))
- return false;
- if (!ether_addr_equal(sdata->vif.addr, hdr->addr1) &&
- !multicast)
- status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
- break;
+ return ieee80211_is_public_action(hdr, skb->len) ||
+ ieee80211_is_probe_req(hdr->frame_control) ||
+ ieee80211_is_probe_resp(hdr->frame_control) ||
+ ieee80211_is_beacon(hdr->frame_control);
default:
- /* should never get here */
- WARN_ON_ONCE(1);
break;
}
- return true;
+ WARN_ON_ONCE(1);
+ return false;
}
/*
@@ -3399,13 +3365,10 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
{
struct ieee80211_local *local = rx->local;
struct ieee80211_sub_if_data *sdata = rx->sdata;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- struct ieee80211_hdr *hdr = (void *)skb->data;
rx->skb = skb;
- status->rx_flags |= IEEE80211_RX_RA_MATCH;
- if (!prepare_for_handlers(rx, hdr))
+ if (!ieee80211_accept_frame(rx))
return false;
if (!consume) {
@@ -3448,7 +3411,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
rx.local = local;
if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
- local->dot11ReceivedFragmentCount++;
+ I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
if (ieee80211_is_mgmt(fc)) {
/* drop frame if too short for header */
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 7bb6a9383..11d0901eb 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -6,7 +6,7 @@
* Copyright 2005, Devicescape Software, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
- * Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright 2013-2015 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -69,10 +69,11 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
int clen, srlen;
enum nl80211_bss_scan_width scan_width;
s32 signal = 0;
+ bool signal_valid;
- if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
+ if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
signal = rx_status->signal * 100;
- else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
+ else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC))
signal = (rx_status->signal * 100) / local->hw.max_signal;
scan_width = NL80211_BSS_CHAN_WIDTH_20;
@@ -86,6 +87,11 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
GFP_ATOMIC);
if (!cbss)
return NULL;
+ /* In case the signal is invalid update the status */
+ signal_valid = abs(channel->center_freq - cbss->channel->center_freq)
+ <= local->hw.wiphy->max_adj_channel_rssi_comp;
+ if (!signal_valid)
+ rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL;
bss = (void *)cbss->priv;
@@ -257,7 +263,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
if (test_bit(SCAN_HW_CANCELLED, &local->scanning))
return false;
- if (local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) {
+ if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) {
for (i = 0; i < req->n_channels; i++) {
local->hw_scan_req->req.channels[i] = req->channels[i];
bands_used |= BIT(req->channels[i]->band);
@@ -326,7 +332,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
return;
if (hw_scan && !aborted &&
- !(local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) &&
+ !ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS) &&
ieee80211_prep_hw_scan(local)) {
int rc;
@@ -520,7 +526,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len;
- if (local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) {
+ if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) {
int i, n_bands = 0;
u8 bands_counted = 0;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 2880f2ae9..666ddac3c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -71,6 +71,7 @@ static const struct rhashtable_params sta_rht_params = {
.key_offset = offsetof(struct sta_info, sta.addr),
.key_len = ETH_ALEN,
.hashfn = sta_addr_hash,
+ .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
};
/* Caller must hold local->sta_mtx */
@@ -281,12 +282,12 @@ static void sta_deliver_ps_frames(struct work_struct *wk)
static int sta_prepare_rate_control(struct ieee80211_local *local,
struct sta_info *sta, gfp_t gfp)
{
- if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
+ if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
return 0;
sta->rate_ctrl = local->rate_ctrl;
sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl,
- &sta->sta, gfp);
+ sta, gfp);
if (!sta->rate_ctrl_priv)
return -ENOMEM;
@@ -312,6 +313,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
mutex_init(&sta->ampdu_mlme.mtx);
#ifdef CONFIG_MAC80211_MESH
+ spin_lock_init(&sta->plink_lock);
if (ieee80211_vif_is_mesh(&sdata->vif) &&
!sdata->u.mesh.user_mpm)
init_timer(&sta->plink_timer);
@@ -641,7 +643,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
}
/* No need to do anything if the driver does all */
- if (local->hw.flags & IEEE80211_HW_AP_LINK_PS)
+ if (ieee80211_hw_check(&local->hw, AP_LINK_PS))
return;
if (sta->dead)
@@ -1146,7 +1148,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
sta->driver_buffered_tids = 0;
sta->txq_buffered_tids = 0;
- if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS))
+ if (!ieee80211_hw_check(&local->hw, AP_LINK_PS))
drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);
if (sta->sta.txq[0]) {
@@ -1217,6 +1219,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
ps_dbg(sdata,
"STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n",
sta->sta.addr, sta->sta.aid, filtered, buffered);
+
+ ieee80211_check_fast_xmit(sta);
}
static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
@@ -1615,6 +1619,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
if (block) {
set_sta_flag(sta, WLAN_STA_PS_DRIVER);
+ ieee80211_clear_fast_xmit(sta);
return;
}
@@ -1632,6 +1637,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
ieee80211_queue_work(hw, &sta->drv_deliver_wk);
} else {
clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
+ ieee80211_check_fast_xmit(sta);
}
}
EXPORT_SYMBOL(ieee80211_sta_block_awake);
@@ -1736,6 +1742,7 @@ int sta_info_move_state(struct sta_info *sta,
!sta->sdata->u.vlan.sta))
atomic_dec(&sta->sdata->bss->num_mcast_sta);
clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
+ ieee80211_clear_fast_xmit(sta);
}
break;
case IEEE80211_STA_AUTHORIZED:
@@ -1745,6 +1752,7 @@ int sta_info_move_state(struct sta_info *sta,
!sta->sdata->u.vlan.sta))
atomic_inc(&sta->sdata->bss->num_mcast_sta);
set_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
+ ieee80211_check_fast_xmit(sta);
}
break;
default:
@@ -1871,8 +1879,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif);
}
- if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
- (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
+ if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
+ ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) {
if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
sinfo->signal = (s8)sta->last_signal;
sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
@@ -1924,7 +1932,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
if (!(tidstats->filled &
BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) &&
- local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
+ ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
tidstats->filled |=
BIT(NL80211_TID_STATS_TX_MSDU_RETRIES);
tidstats->tx_msdu_retries = sta->tx_msdu_retries[i];
@@ -1932,7 +1940,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
if (!(tidstats->filled &
BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) &&
- local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
+ ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
tidstats->filled |=
BIT(NL80211_TID_STATS_TX_MSDU_FAILED);
tidstats->tx_msdu_failed = sta->tx_msdu_failed[i];
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c164fb3f..226f8ca47 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -241,6 +241,34 @@ struct sta_ampdu_mlme {
/* Value to indicate no TID reservation */
#define IEEE80211_TID_UNRESERVED 0xff
+#define IEEE80211_FAST_XMIT_MAX_IV 18
+
+/**
+ * struct ieee80211_fast_tx - TX fastpath information
+ * @key: key to use for hw crypto
+ * @hdr: the 802.11 header to put with the frame
+ * @hdr_len: actual 802.11 header length
+ * @sa_offs: offset of the SA
+ * @da_offs: offset of the DA
+ * @pn_offs: offset where to put PN for crypto (or 0 if not needed)
+ * @band: band this will be transmitted on, for tx_info
+ * @rcu_head: RCU head to free this struct
+ *
+ * This struct is small enough so that the common case (maximum crypto
+ * header length of 8 like for CCMP/GCMP) fits into a single 64-byte
+ * cache line.
+ */
+struct ieee80211_fast_tx {
+ struct ieee80211_key *key;
+ u8 hdr_len;
+ u8 sa_offs, da_offs, pn_offs;
+ u8 band;
+ u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV +
+ sizeof(rfc1042_header)];
+
+ struct rcu_head rcu_head;
+};
+
/**
* struct sta_info - STA information
*
@@ -257,6 +285,8 @@ struct sta_ampdu_mlme {
* @gtk: group keys negotiated with this station, if any
* @gtk_idx: last installed group key index
* @rate_ctrl: rate control algorithm reference
+ * @rate_ctrl_lock: spinlock used to protect rate control data
+ * (data inside the algorithm, so serializes calls there)
* @rate_ctrl_priv: rate control private per-STA pointer
* @last_tx_rate: rate used for last transmit, to report to userspace as
* "the" transmit rate
@@ -295,10 +325,10 @@ struct sta_ampdu_mlme {
* @fail_avg: moving percentage of failed MSDUs
* @tx_packets: number of RX/TX MSDUs
* @tx_bytes: number of bytes transmitted to this STA
- * @tx_fragments: number of transmitted MPDUs
* @tid_seq: per-TID sequence numbers for sending to this STA
* @ampdu_mlme: A-MPDU state machine state
* @timer_to_tid: identity mapping to ID timers
+ * @plink_lock: serialize access to plink fields
* @llid: Local link ID
* @plid: Peer link ID
* @reason: Cancel reason on PLINK_HOLDING state
@@ -338,6 +368,9 @@ struct sta_ampdu_mlme {
* using IEEE80211_NUM_TID entry for non-QoS frames
* @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID
* entry for non-QoS frames
+ * @fast_tx: TX fastpath information
+ * @processed_beacon: set to true after peer rates and capabilities are
+ * processed
*/
struct sta_info {
/* General information, mostly static */
@@ -352,8 +385,11 @@ struct sta_info {
u8 ptk_idx;
struct rate_control_ref *rate_ctrl;
void *rate_ctrl_priv;
+ spinlock_t rate_ctrl_lock;
spinlock_t lock;
+ struct ieee80211_fast_tx __rcu *fast_tx;
+
struct work_struct drv_deliver_wk;
u16 listen_interval;
@@ -400,7 +436,6 @@ struct sta_info {
unsigned int fail_avg;
/* Updated from TX path only, no locking requirements */
- u32 tx_fragments;
u64 tx_packets[IEEE80211_NUM_ACS];
u64 tx_bytes[IEEE80211_NUM_ACS];
struct ieee80211_tx_rate last_tx_rate;
@@ -422,9 +457,10 @@ struct sta_info {
#ifdef CONFIG_MAC80211_MESH
/*
- * Mesh peer link attributes
+ * Mesh peer link attributes, protected by plink_lock.
* TODO: move to a sub-structure that is referenced with pointer?
*/
+ spinlock_t plink_lock;
u16 llid;
u16 plid;
u16 reason;
@@ -432,12 +468,14 @@ struct sta_info {
enum nl80211_plink_state plink_state;
u32 plink_timeout;
struct timer_list plink_timer;
+
s64 t_offset;
s64 t_offset_setpoint;
/* mesh power save */
enum nl80211_mesh_power_mode local_pm;
enum nl80211_mesh_power_mode peer_pm;
enum nl80211_mesh_power_mode nonpeer_pm;
+ bool processed_beacon;
#endif
#ifdef CONFIG_MAC80211_DEBUGFS
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 005fdbe39..45628f37c 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -181,7 +181,7 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
struct ieee80211_local *local = sta->local;
struct ieee80211_sub_if_data *sdata = sta->sdata;
- if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+ if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
sta->last_rx = jiffies;
if (ieee80211_is_data_qos(mgmt->frame_control)) {
@@ -414,8 +414,7 @@ static void ieee80211_tdls_td_tx_handle(struct ieee80211_local *local,
if (is_teardown) {
/* This mechanism relies on being able to get ACKs */
- WARN_ON(!(local->hw.flags &
- IEEE80211_HW_REPORTS_TX_ACK_STATUS));
+ WARN_ON(!ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS));
/* Check if peer has ACKed */
if (flags & IEEE80211_TX_STAT_ACK) {
@@ -429,6 +428,74 @@ static void ieee80211_tdls_td_tx_handle(struct ieee80211_local *local,
}
}
+static struct ieee80211_sub_if_data *
+ieee80211_sdata_from_skb(struct ieee80211_local *local, struct sk_buff *skb)
+{
+ struct ieee80211_sub_if_data *sdata;
+
+ if (skb->dev) {
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (!sdata->dev)
+ continue;
+
+ if (skb->dev == sdata->dev)
+ return sdata;
+ }
+
+ return NULL;
+ }
+
+ return rcu_dereference(local->p2p_sdata);
+}
+
+static void ieee80211_report_ack_skb(struct ieee80211_local *local,
+ struct ieee80211_tx_info *info,
+ bool acked, bool dropped)
+{
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ spin_lock_irqsave(&local->ack_status_lock, flags);
+ skb = idr_find(&local->ack_status_frames, info->ack_frame_id);
+ if (skb)
+ idr_remove(&local->ack_status_frames, info->ack_frame_id);
+ spin_unlock_irqrestore(&local->ack_status_lock, flags);
+
+ if (!skb)
+ return;
+
+ if (dropped) {
+ dev_kfree_skb_any(skb);
+ return;
+ }
+
+ if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) {
+ u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie;
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_hdr *hdr = (void *)skb->data;
+
+ rcu_read_lock();
+ sdata = ieee80211_sdata_from_skb(local, skb);
+ if (sdata) {
+ if (ieee80211_is_nullfunc(hdr->frame_control) ||
+ ieee80211_is_qos_nullfunc(hdr->frame_control))
+ cfg80211_probe_status(sdata->dev, hdr->addr1,
+ cookie, acked,
+ GFP_ATOMIC);
+ else
+ cfg80211_mgmt_tx_status(&sdata->wdev, cookie,
+ skb->data, skb->len,
+ acked, GFP_ATOMIC);
+ }
+ rcu_read_unlock();
+
+ dev_kfree_skb_any(skb);
+ } else {
+ /* consumes skb */
+ skb_complete_wifi_ack(skb, acked);
+ }
+}
+
static void ieee80211_report_used_skb(struct ieee80211_local *local,
struct sk_buff *skb, bool dropped)
{
@@ -439,28 +506,12 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
if (dropped)
acked = false;
- if (info->flags & (IEEE80211_TX_INTFL_NL80211_FRAME_TX |
- IEEE80211_TX_INTFL_MLME_CONN_TX)) {
- struct ieee80211_sub_if_data *sdata = NULL;
- struct ieee80211_sub_if_data *iter_sdata;
- u64 cookie = (unsigned long)skb;
+ if (info->flags & IEEE80211_TX_INTFL_MLME_CONN_TX) {
+ struct ieee80211_sub_if_data *sdata;
rcu_read_lock();
- if (skb->dev) {
- list_for_each_entry_rcu(iter_sdata, &local->interfaces,
- list) {
- if (!iter_sdata->dev)
- continue;
-
- if (skb->dev == iter_sdata->dev) {
- sdata = iter_sdata;
- break;
- }
- }
- } else {
- sdata = rcu_dereference(local->p2p_sdata);
- }
+ sdata = ieee80211_sdata_from_skb(local, skb);
if (!sdata) {
skb->dev = NULL;
@@ -478,38 +529,14 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
ieee80211_mgd_conn_tx_status(sdata,
hdr->frame_control,
acked);
- } else if (ieee80211_is_nullfunc(hdr->frame_control) ||
- ieee80211_is_qos_nullfunc(hdr->frame_control)) {
- cfg80211_probe_status(sdata->dev, hdr->addr1,
- cookie, acked, GFP_ATOMIC);
} else {
- cfg80211_mgmt_tx_status(&sdata->wdev, cookie, skb->data,
- skb->len, acked, GFP_ATOMIC);
+ /* we assign ack frame ID for the others */
+ WARN_ON(1);
}
rcu_read_unlock();
- }
-
- if (unlikely(info->ack_frame_id)) {
- struct sk_buff *ack_skb;
- unsigned long flags;
-
- spin_lock_irqsave(&local->ack_status_lock, flags);
- ack_skb = idr_find(&local->ack_status_frames,
- info->ack_frame_id);
- if (ack_skb)
- idr_remove(&local->ack_status_frames,
- info->ack_frame_id);
- spin_unlock_irqrestore(&local->ack_status_lock, flags);
-
- if (ack_skb) {
- if (!dropped) {
- /* consumes ack_skb */
- skb_complete_wifi_ack(ack_skb, acked);
- } else {
- dev_kfree_skb_any(ack_skb);
- }
- }
+ } else if (info->ack_frame_id) {
+ ieee80211_report_ack_skb(local, info, acked, dropped);
}
}
@@ -631,15 +658,15 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
}
if (acked || noack_success) {
- local->dot11TransmittedFrameCount++;
- if (!pubsta)
- local->dot11MulticastTransmittedFrameCount++;
- if (retry_count > 0)
- local->dot11RetryCount++;
- if (retry_count > 1)
- local->dot11MultipleRetryCount++;
+ I802_DEBUG_INC(local->dot11TransmittedFrameCount);
+ if (!pubsta)
+ I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount);
+ if (retry_count > 0)
+ I802_DEBUG_INC(local->dot11RetryCount);
+ if (retry_count > 1)
+ I802_DEBUG_INC(local->dot11MultipleRetryCount);
} else {
- local->dot11FailedCount++;
+ I802_DEBUG_INC(local->dot11FailedCount);
}
}
EXPORT_SYMBOL(ieee80211_tx_status_noskb);
@@ -703,7 +730,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
ieee80211_get_qos_ctl(hdr),
sta, true, acked);
- if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) &&
+ if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
(ieee80211_is_data(hdr->frame_control)) &&
(rates_idx != -1))
sta->last_tx_rate = info->status.rates[rates_idx];
@@ -770,11 +797,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
ieee80211_frame_acked(sta, skb);
if ((sta->sdata->vif.type == NL80211_IFTYPE_STATION) &&
- (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS))
+ ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data,
acked, info->status.tx_time);
- if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
+ if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
if (info->flags & IEEE80211_TX_STAT_ACK) {
if (sta->lost_packets)
sta->lost_packets = 0;
@@ -802,13 +829,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
if ((info->flags & IEEE80211_TX_STAT_ACK) ||
(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) {
if (ieee80211_is_first_frag(hdr->seq_ctrl)) {
- local->dot11TransmittedFrameCount++;
+ I802_DEBUG_INC(local->dot11TransmittedFrameCount);
if (is_multicast_ether_addr(ieee80211_get_DA(hdr)))
- local->dot11MulticastTransmittedFrameCount++;
+ I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount);
if (retry_count > 0)
- local->dot11RetryCount++;
+ I802_DEBUG_INC(local->dot11RetryCount);
if (retry_count > 1)
- local->dot11MultipleRetryCount++;
+ I802_DEBUG_INC(local->dot11MultipleRetryCount);
}
/* This counter shall be incremented for an acknowledged MPDU
@@ -818,14 +845,14 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
if (!is_multicast_ether_addr(hdr->addr1) ||
ieee80211_is_data(fc) ||
ieee80211_is_mgmt(fc))
- local->dot11TransmittedFragmentCount++;
+ I802_DEBUG_INC(local->dot11TransmittedFragmentCount);
} else {
if (ieee80211_is_first_frag(hdr->seq_ctrl))
- local->dot11FailedCount++;
+ I802_DEBUG_INC(local->dot11FailedCount);
}
if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) &&
- (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) &&
+ ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) &&
!(info->flags & IEEE80211_TX_CTL_INJECTED) &&
local->ps_sdata && !(local->scanning)) {
if (info->flags & IEEE80211_TX_STAT_ACK) {
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index fff0d864a..8db6e2994 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -60,6 +60,7 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *ch;
struct cfg80211_chan_def chandef;
int i, subband_start;
+ struct wiphy *wiphy = sdata->local->hw.wiphy;
for (i = start; i <= end; i += spacing) {
if (!ch_cnt)
@@ -70,9 +71,8 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata,
/* we will be active on the channel */
cfg80211_chandef_create(&chandef, ch,
NL80211_CHAN_NO_HT);
- if (cfg80211_reg_can_beacon(sdata->local->hw.wiphy,
- &chandef,
- sdata->wdev.iftype)) {
+ if (cfg80211_reg_can_beacon_relax(wiphy, &chandef,
+ sdata->wdev.iftype)) {
ch_cnt++;
/*
* check if the next channel is also part of
@@ -167,23 +167,16 @@ static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb)
static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata,
u16 status_code)
{
- struct ieee80211_local *local = sdata->local;
- u16 capab;
-
/* The capability will be 0 when sending a failure code */
if (status_code != 0)
return 0;
- capab = 0;
- if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ)
- return capab;
-
- if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
- capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
- if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
- capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
+ if (ieee80211_get_sdata_band(sdata) == IEEE80211_BAND_2GHZ) {
+ return WLAN_CAPABILITY_SHORT_SLOT_TIME |
+ WLAN_CAPABILITY_SHORT_PREAMBLE;
+ }
- return capab;
+ return 0;
}
static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata,
@@ -527,30 +520,19 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
/* if HT support is only added in TDLS, we need an HT-operation IE */
if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
- struct ieee80211_chanctx_conf *chanctx_conf =
- rcu_dereference(sdata->vif.chanctx_conf);
- if (!WARN_ON(!chanctx_conf)) {
- pos = skb_put(skb, 2 +
- sizeof(struct ieee80211_ht_operation));
- /* send an empty HT operation IE */
- ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap,
- &chanctx_conf->def, 0);
- }
+ pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
+ /* send an empty HT operation IE */
+ ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap,
+ &sdata->vif.bss_conf.chandef, 0);
}
ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
/* only include VHT-operation if not on the 2.4GHz band */
- if (band != IEEE80211_BAND_2GHZ && !ap_sta->sta.vht_cap.vht_supported &&
- sta->sta.vht_cap.vht_supported) {
- struct ieee80211_chanctx_conf *chanctx_conf =
- rcu_dereference(sdata->vif.chanctx_conf);
- if (!WARN_ON(!chanctx_conf)) {
- pos = skb_put(skb, 2 +
- sizeof(struct ieee80211_vht_operation));
- ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap,
- &chanctx_conf->def);
- }
+ if (band != IEEE80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) {
+ pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation));
+ ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap,
+ &sdata->vif.bss_conf.chandef);
}
rcu_read_unlock();
@@ -953,7 +935,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
* packet through the AP.
*/
if ((action_code == WLAN_TDLS_TEARDOWN) &&
- (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
+ ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
bool try_resend; /* Should we keep skb for possible resend */
/* If not sending directly to peer - no point in keeping skb */
@@ -1194,6 +1176,12 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
switch (oper) {
case NL80211_TDLS_ENABLE_LINK:
+ if (sdata->vif.csa_active) {
+ tdls_dbg(sdata, "TDLS: disallow link during CSA\n");
+ ret = -EBUSY;
+ break;
+ }
+
rcu_read_lock();
sta = sta_info_get(sdata, peer);
if (!sta) {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 4c2e76902..6f14591d8 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -69,6 +69,17 @@
#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \
__entry->rx_chains_static, __entry->rx_chains_dynamic
+#define KEY_ENTRY __field(u32, cipher) \
+ __field(u8, hw_key_idx) \
+ __field(u8, flags) \
+ __field(s8, keyidx)
+#define KEY_ASSIGN(k) __entry->cipher = (k)->cipher; \
+ __entry->flags = (k)->flags; \
+ __entry->keyidx = (k)->keyidx; \
+ __entry->hw_key_idx = (k)->hw_key_idx;
+#define KEY_PR_FMT " cipher:0x%x, flags=%#x, keyidx=%d, hw_key_idx=%d"
+#define KEY_PR_ARG __entry->cipher, __entry->flags, __entry->keyidx, __entry->hw_key_idx
+
/*
@@ -522,25 +533,19 @@ TRACE_EVENT(drv_set_key,
LOCAL_ENTRY
VIF_ENTRY
STA_ENTRY
- __field(u32, cipher)
- __field(u8, hw_key_idx)
- __field(u8, flags)
- __field(s8, keyidx)
+ KEY_ENTRY
),
TP_fast_assign(
LOCAL_ASSIGN;
VIF_ASSIGN;
STA_ASSIGN;
- __entry->cipher = key->cipher;
- __entry->flags = key->flags;
- __entry->keyidx = key->keyidx;
- __entry->hw_key_idx = key->hw_key_idx;
+ KEY_ASSIGN(key);
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT,
- LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
+ LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT KEY_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, KEY_PR_ARG
)
);
@@ -656,28 +661,25 @@ TRACE_EVENT(drv_get_stats,
)
);
-TRACE_EVENT(drv_get_tkip_seq,
+TRACE_EVENT(drv_get_key_seq,
TP_PROTO(struct ieee80211_local *local,
- u8 hw_key_idx, u32 *iv32, u16 *iv16),
+ struct ieee80211_key_conf *key),
- TP_ARGS(local, hw_key_idx, iv32, iv16),
+ TP_ARGS(local, key),
TP_STRUCT__entry(
LOCAL_ENTRY
- __field(u8, hw_key_idx)
- __field(u32, iv32)
- __field(u16, iv16)
+ KEY_ENTRY
),
TP_fast_assign(
LOCAL_ASSIGN;
- __entry->hw_key_idx = hw_key_idx;
- __entry->iv32 = *iv32;
- __entry->iv16 = *iv16;
+ KEY_ASSIGN(key);
),
TP_printk(
- LOCAL_PR_FMT, LOCAL_PR_ARG
+ LOCAL_PR_FMT KEY_PR_FMT,
+ LOCAL_PR_ARG, KEY_PR_ARG
)
);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 667111ee6..b8233505b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -37,6 +37,16 @@
/* misc utils */
+static inline void ieee80211_tx_stats(struct net_device *dev, u32 len)
+{
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->tx_packets++;
+ tstats->tx_bytes += len;
+ u64_stats_update_end(&tstats->syncp);
+}
+
static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
struct sk_buff *skb, int group_addr,
int next_frag_len)
@@ -201,11 +211,11 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
struct ieee80211_if_managed *ifmgd;
/* driver doesn't support power save */
- if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS))
+ if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS))
return TX_CONTINUE;
/* hardware does dynamic power save */
- if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
+ if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
return TX_CONTINUE;
/* dynamic power save disabled */
@@ -421,7 +431,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
if (ieee80211_is_probe_req(hdr->frame_control))
return TX_CONTINUE;
- if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
+ if (ieee80211_hw_check(&tx->local->hw, QUEUE_CONTROL))
info->hw_queue = tx->sdata->vif.cab_queue;
/* no stations in PS mode */
@@ -431,7 +441,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM;
/* device releases frame after DTIM beacon */
- if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING))
+ if (!ieee80211_hw_check(&tx->local->hw, HOST_BROADCAST_PS_BUFFERING))
return TX_CONTINUE;
/* buffered in mac80211 */
@@ -987,7 +997,6 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
skb_queue_walk(&tx->skbs, skb) {
ac = skb_get_queue_mapping(skb);
- tx->sta->tx_fragments++;
tx->sta->tx_bytes[ac] += skb->len;
}
if (ac >= 0)
@@ -1108,7 +1117,9 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
queued = true;
info->control.vif = &tx->sdata->vif;
info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
- info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
+ info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS |
+ IEEE80211_TX_CTL_NO_PS_BUFFER |
+ IEEE80211_TX_STATUS_EOSP;
__skb_queue_tail(&tid_tx->pending, skb);
if (skb_queue_len(&tid_tx->pending) > STA_MAX_TX_BUFFER)
purge_skb = __skb_dequeue(&tid_tx->pending);
@@ -1176,8 +1187,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
!ieee80211_is_qos_nullfunc(hdr->frame_control) &&
- (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) &&
- !(local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) {
+ ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) &&
+ !ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) {
struct tid_ampdu_tx *tid_tx;
qc = ieee80211_get_qos_ctl(hdr);
@@ -1420,7 +1431,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
vif = &sdata->vif;
info->hw_queue =
vif->hw_queue[skb_get_queue_mapping(skb)];
- } else if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) {
+ } else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) {
dev_kfree_skb(skb);
return true;
} else
@@ -1466,7 +1477,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
CALL_TXH(ieee80211_tx_h_ps_buf);
CALL_TXH(ieee80211_tx_h_check_control_port_protocol);
CALL_TXH(ieee80211_tx_h_select_key);
- if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
+ if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
CALL_TXH(ieee80211_tx_h_rate_ctrl);
if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) {
@@ -1481,7 +1492,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
/* handlers after fragment must be aware of tx info fragmentation! */
CALL_TXH(ieee80211_tx_h_stats);
CALL_TXH(ieee80211_tx_h_encrypt);
- if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
+ if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
CALL_TXH(ieee80211_tx_h_calculate_duration);
#undef CALL_TXH
@@ -1571,7 +1582,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
/* set up hw_queue value early */
if (!(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) ||
- !(local->hw.flags & IEEE80211_HW_QUEUE_CONTROL))
+ !ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
info->hw_queue =
sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
@@ -1598,9 +1609,9 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
}
if (skb_cloned(skb) &&
- (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CLONED_SKBS) ||
+ (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) ||
!skb_clone_writable(skb, ETH_HLEN) ||
- sdata->crypto_tx_tailroom_needed_cnt))
+ (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt)))
I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
else if (head_need || tail_need)
I802_DEBUG_INC(local->tx_expand_skb_head);
@@ -2387,12 +2398,455 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
return ERR_PTR(ret);
}
+/*
+ * fast-xmit overview
+ *
+ * The core idea of this fast-xmit is to remove per-packet checks by checking
+ * them out of band. ieee80211_check_fast_xmit() implements the out-of-band
+ * checks that are needed to get the sta->fast_tx pointer assigned, after which
+ * much less work can be done per packet. For example, fragmentation must be
+ * disabled or the fast_tx pointer will not be set. All the conditions are seen
+ * in the code here.
+ *
+ * Once assigned, the fast_tx data structure also caches the per-packet 802.11
+ * header and other data to aid packet processing in ieee80211_xmit_fast().
+ *
+ * The most difficult part of this is that when any of these assumptions
+ * change, an external trigger (i.e. a call to ieee80211_clear_fast_xmit(),
+ * ieee80211_check_fast_xmit() or friends) is required to reset the data,
+ * since the per-packet code no longer checks the conditions. This is reflected
+ * by the calls to these functions throughout the rest of the code, and must be
+ * maintained if any of the TX path checks change.
+ */
+
+void ieee80211_check_fast_xmit(struct sta_info *sta)
+{
+ struct ieee80211_fast_tx build = {}, *fast_tx = NULL, *old;
+ struct ieee80211_local *local = sta->local;
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_hdr *hdr = (void *)build.hdr;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ __le16 fc;
+
+ if (!ieee80211_hw_check(&local->hw, SUPPORT_FAST_XMIT))
+ return;
+
+ /* Locking here protects both the pointer itself, and against concurrent
+ * invocations winning data access races to, e.g., the key pointer that
+ * is used.
+ * Without it, the invocation of this function right after the key
+ * pointer changes wouldn't be sufficient, as another CPU could access
+ * the pointer, then stall, and then do the cache update after the CPU
+ * that invalidated the key.
+ * With the locking, such scenarios cannot happen as the check for the
+ * key and the fast-tx assignment are done atomically, so the CPU that
+ * modifies the key will either wait or other one will see the key
+ * cleared/changed already.
+ */
+ spin_lock_bh(&sta->lock);
+ if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
+ !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS) &&
+ sdata->vif.type == NL80211_IFTYPE_STATION)
+ goto out;
+
+ if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ goto out;
+
+ if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
+ test_sta_flag(sta, WLAN_STA_PS_DRIVER) ||
+ test_sta_flag(sta, WLAN_STA_PS_DELIVER))
+ goto out;
+
+ if (sdata->noack_map)
+ goto out;
+
+ /* fast-xmit doesn't handle fragmentation at all */
+ if (local->hw.wiphy->frag_threshold != (u32)-1 &&
+ !local->ops->set_frag_threshold)
+ goto out;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ if (!chanctx_conf) {
+ rcu_read_unlock();
+ goto out;
+ }
+ build.band = chanctx_conf->def.chan->band;
+ rcu_read_unlock();
+
+ fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_ADHOC:
+ /* DA SA BSSID */
+ build.da_offs = offsetof(struct ieee80211_hdr, addr1);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+ memcpy(hdr->addr3, sdata->u.ibss.bssid, ETH_ALEN);
+ build.hdr_len = 24;
+ break;
+ case NL80211_IFTYPE_STATION:
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+ /* DA SA BSSID */
+ build.da_offs = offsetof(struct ieee80211_hdr, addr1);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+ memcpy(hdr->addr3, sdata->u.mgd.bssid, ETH_ALEN);
+ build.hdr_len = 24;
+ break;
+ }
+
+ if (sdata->u.mgd.use_4addr) {
+ /* non-regular ethertype cannot use the fastpath */
+ fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS |
+ IEEE80211_FCTL_TODS);
+ /* RA TA DA SA */
+ memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+ build.da_offs = offsetof(struct ieee80211_hdr, addr3);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+ build.hdr_len = 30;
+ break;
+ }
+ fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
+ /* BSSID SA DA */
+ memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN);
+ build.da_offs = offsetof(struct ieee80211_hdr, addr3);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+ build.hdr_len = 24;
+ break;
+ case NL80211_IFTYPE_AP_VLAN:
+ if (sdata->wdev.use_4addr) {
+ fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS |
+ IEEE80211_FCTL_TODS);
+ /* RA TA DA SA */
+ memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN);
+ memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+ build.da_offs = offsetof(struct ieee80211_hdr, addr3);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+ build.hdr_len = 30;
+ break;
+ }
+ /* fall through */
+ case NL80211_IFTYPE_AP:
+ fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
+ /* DA BSSID SA */
+ build.da_offs = offsetof(struct ieee80211_hdr, addr1);
+ memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr3);
+ build.hdr_len = 24;
+ break;
+ default:
+ /* not handled on fast-xmit */
+ goto out;
+ }
+
+ if (sta->sta.wme) {
+ build.hdr_len += 2;
+ fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
+ }
+
+ /* We store the key here so there's no point in using rcu_dereference()
+ * but that's fine because the code that changes the pointers will call
+ * this function after doing so. For a single CPU that would be enough,
+ * for multiple see the comment above.
+ */
+ build.key = rcu_access_pointer(sta->ptk[sta->ptk_idx]);
+ if (!build.key)
+ build.key = rcu_access_pointer(sdata->default_unicast_key);
+ if (build.key) {
+ bool gen_iv, iv_spc, mmic;
+
+ gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV;
+ iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE;
+ mmic = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC;
+
+ /* don't handle software crypto */
+ if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+ goto out;
+
+ switch (build.key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_CCMP:
+ case WLAN_CIPHER_SUITE_CCMP_256:
+ /* add fixed key ID */
+ if (gen_iv) {
+ (build.hdr + build.hdr_len)[3] =
+ 0x20 | (build.key->conf.keyidx << 6);
+ build.pn_offs = build.hdr_len;
+ }
+ if (gen_iv || iv_spc)
+ build.hdr_len += IEEE80211_CCMP_HDR_LEN;
+ break;
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ /* add fixed key ID */
+ if (gen_iv) {
+ (build.hdr + build.hdr_len)[3] =
+ 0x20 | (build.key->conf.keyidx << 6);
+ build.pn_offs = build.hdr_len;
+ }
+ if (gen_iv || iv_spc)
+ build.hdr_len += IEEE80211_GCMP_HDR_LEN;
+ break;
+ case WLAN_CIPHER_SUITE_TKIP:
+ /* cannot handle MMIC or IV generation in xmit-fast */
+ if (mmic || gen_iv)
+ goto out;
+ if (iv_spc)
+ build.hdr_len += IEEE80211_TKIP_IV_LEN;
+ break;
+ case WLAN_CIPHER_SUITE_WEP40:
+ case WLAN_CIPHER_SUITE_WEP104:
+ /* cannot handle IV generation in fast-xmit */
+ if (gen_iv)
+ goto out;
+ if (iv_spc)
+ build.hdr_len += IEEE80211_WEP_IV_LEN;
+ break;
+ case WLAN_CIPHER_SUITE_AES_CMAC:
+ case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+ case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+ case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+ WARN(1,
+ "management cipher suite 0x%x enabled for data\n",
+ build.key->conf.cipher);
+ goto out;
+ default:
+ /* we don't know how to generate IVs for this at all */
+ if (WARN_ON(gen_iv))
+ goto out;
+ /* pure hardware keys are OK, of course */
+ if (!(build.key->flags & KEY_FLAG_CIPHER_SCHEME))
+ break;
+ /* cipher scheme might require space allocation */
+ if (iv_spc &&
+ build.key->conf.iv_len > IEEE80211_FAST_XMIT_MAX_IV)
+ goto out;
+ if (iv_spc)
+ build.hdr_len += build.key->conf.iv_len;
+ }
+
+ fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+ }
+
+ hdr->frame_control = fc;
+
+ memcpy(build.hdr + build.hdr_len,
+ rfc1042_header, sizeof(rfc1042_header));
+ build.hdr_len += sizeof(rfc1042_header);
+
+ fast_tx = kmemdup(&build, sizeof(build), GFP_ATOMIC);
+ /* if the kmemdup fails, continue w/o fast_tx */
+ if (!fast_tx)
+ goto out;
+
+ out:
+ /* we might have raced against another call to this function */
+ old = rcu_dereference_protected(sta->fast_tx,
+ lockdep_is_held(&sta->lock));
+ rcu_assign_pointer(sta->fast_tx, fast_tx);
+ if (old)
+ kfree_rcu(old, rcu_head);
+ spin_unlock_bh(&sta->lock);
+}
+
+void ieee80211_check_fast_xmit_all(struct ieee80211_local *local)
+{
+ struct sta_info *sta;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sta, &local->sta_list, list)
+ ieee80211_check_fast_xmit(sta);
+ rcu_read_unlock();
+}
+
+void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ if (sdata != sta->sdata &&
+ (!sta->sdata->bss || sta->sdata->bss != sdata->bss))
+ continue;
+ ieee80211_check_fast_xmit(sta);
+ }
+
+ rcu_read_unlock();
+}
+
+void ieee80211_clear_fast_xmit(struct sta_info *sta)
+{
+ struct ieee80211_fast_tx *fast_tx;
+
+ spin_lock_bh(&sta->lock);
+ fast_tx = rcu_dereference_protected(sta->fast_tx,
+ lockdep_is_held(&sta->lock));
+ RCU_INIT_POINTER(sta->fast_tx, NULL);
+ spin_unlock_bh(&sta->lock);
+
+ if (fast_tx)
+ kfree_rcu(fast_tx, rcu_head);
+}
+
+static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
+ struct net_device *dev, struct sta_info *sta,
+ struct ieee80211_fast_tx *fast_tx,
+ struct sk_buff *skb)
+{
+ struct ieee80211_local *local = sdata->local;
+ u16 ethertype = (skb->data[12] << 8) | skb->data[13];
+ int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2);
+ int hw_headroom = sdata->local->hw.extra_tx_headroom;
+ struct ethhdr eth;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_hdr *hdr = (void *)fast_tx->hdr;
+ struct ieee80211_tx_data tx;
+ ieee80211_tx_result r;
+ struct tid_ampdu_tx *tid_tx = NULL;
+ u8 tid = IEEE80211_NUM_TIDS;
+
+ /* control port protocol needs a lot of special handling */
+ if (cpu_to_be16(ethertype) == sdata->control_port_protocol)
+ return false;
+
+ /* only RFC 1042 SNAP */
+ if (ethertype < ETH_P_802_3_MIN)
+ return false;
+
+ /* don't handle TX status request here either */
+ if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)
+ return false;
+
+ if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+ tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+ tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
+ if (tid_tx) {
+ if (!test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state))
+ return false;
+ if (tid_tx->timeout)
+ tid_tx->last_tx = jiffies;
+ }
+ }
+
+ /* after this point (skb is modified) we cannot return false */
+
+ if (skb_shared(skb)) {
+ struct sk_buff *tmp_skb = skb;
+
+ skb = skb_clone(skb, GFP_ATOMIC);
+ kfree_skb(tmp_skb);
+
+ if (!skb)
+ return true;
+ }
+
+ ieee80211_tx_stats(dev, skb->len + extra_head);
+
+ /* will not be crypto-handled beyond what we do here, so use false
+ * as the may-encrypt argument for the resize to not account for
+ * more room than we already have in 'extra_head'
+ */
+ if (unlikely(ieee80211_skb_resize(sdata, skb,
+ max_t(int, extra_head + hw_headroom -
+ skb_headroom(skb), 0),
+ false))) {
+ kfree_skb(skb);
+ return true;
+ }
+
+ memcpy(&eth, skb->data, ETH_HLEN - 2);
+ hdr = (void *)skb_push(skb, extra_head);
+ memcpy(skb->data, fast_tx->hdr, fast_tx->hdr_len);
+ memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN);
+ memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN);
+
+ memset(info, 0, sizeof(*info));
+ info->band = fast_tx->band;
+ info->control.vif = &sdata->vif;
+ info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
+ IEEE80211_TX_CTL_DONTFRAG |
+ (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
+
+ if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+ *ieee80211_get_qos_ctl(hdr) = tid;
+ hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
+ } else {
+ info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
+ hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
+ sdata->sequence_number += 0x10;
+ }
+
+ sta->tx_msdu[tid]++;
+
+ info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+
+ __skb_queue_head_init(&tx.skbs);
+
+ tx.flags = IEEE80211_TX_UNICAST;
+ tx.local = local;
+ tx.sdata = sdata;
+ tx.sta = sta;
+ tx.key = fast_tx->key;
+
+ if (fast_tx->key)
+ info->control.hw_key = &fast_tx->key->conf;
+
+ if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
+ tx.skb = skb;
+ r = ieee80211_tx_h_rate_ctrl(&tx);
+ skb = tx.skb;
+ tx.skb = NULL;
+
+ if (r != TX_CONTINUE) {
+ if (r != TX_QUEUED)
+ kfree_skb(skb);
+ return true;
+ }
+ }
+
+ /* statistics normally done by ieee80211_tx_h_stats (but that
+ * has to consider fragmentation, so is more complex)
+ */
+ sta->tx_bytes[skb_get_queue_mapping(skb)] += skb->len;
+ sta->tx_packets[skb_get_queue_mapping(skb)]++;
+
+ if (fast_tx->pn_offs) {
+ u64 pn;
+ u8 *crypto_hdr = skb->data + fast_tx->pn_offs;
+
+ switch (fast_tx->key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_CCMP:
+ case WLAN_CIPHER_SUITE_CCMP_256:
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn);
+ crypto_hdr[0] = pn;
+ crypto_hdr[1] = pn >> 8;
+ crypto_hdr[4] = pn >> 16;
+ crypto_hdr[5] = pn >> 24;
+ crypto_hdr[6] = pn >> 32;
+ crypto_hdr[7] = pn >> 40;
+ break;
+ }
+ }
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ sdata = container_of(sdata->bss,
+ struct ieee80211_sub_if_data, u.ap);
+
+ __skb_queue_tail(&tx.skbs, skb);
+ ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false);
+ return true;
+}
+
void __ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev,
u32 info_flags)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct sta_info *sta;
+ struct sk_buff *next;
if (unlikely(skb->len < ETH_HLEN)) {
kfree_skb(skb);
@@ -2401,20 +2855,67 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
rcu_read_lock();
- if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) {
- kfree_skb(skb);
- goto out;
+ if (ieee80211_lookup_ra_sta(sdata, skb, &sta))
+ goto out_free;
+
+ if (!IS_ERR_OR_NULL(sta)) {
+ struct ieee80211_fast_tx *fast_tx;
+
+ fast_tx = rcu_dereference(sta->fast_tx);
+
+ if (fast_tx &&
+ ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb))
+ goto out;
}
- skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
- if (IS_ERR(skb))
- goto out;
+ if (skb_is_gso(skb)) {
+ struct sk_buff *segs;
+
+ segs = skb_gso_segment(skb, 0);
+ if (IS_ERR(segs)) {
+ goto out_free;
+ } else if (segs) {
+ consume_skb(skb);
+ skb = segs;
+ }
+ } else {
+ /* we cannot process non-linear frames on this path */
+ if (skb_linearize(skb)) {
+ kfree_skb(skb);
+ goto out;
+ }
+
+ /* the frame could be fragmented, software-encrypted, and other
+ * things so we cannot really handle checksum offload with it -
+ * fix it up in software before we handle anything else.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ skb_set_transport_header(skb,
+ skb_checksum_start_offset(skb));
+ if (skb_checksum_help(skb))
+ goto out_free;
+ }
+ }
+
+ next = skb;
+ while (next) {
+ skb = next;
+ next = skb->next;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
- dev->trans_start = jiffies;
+ skb->prev = NULL;
+ skb->next = NULL;
+
+ skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
+ if (IS_ERR(skb))
+ goto out;
- ieee80211_xmit(sdata, sta, skb);
+ ieee80211_tx_stats(dev, skb->len);
+
+ ieee80211_xmit(sdata, sta, skb);
+ }
+ goto out;
+ out_free:
+ kfree_skb(skb);
out:
rcu_read_unlock();
}
@@ -3308,7 +3809,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid)
synchronize_net();
/* Tear down BA sessions so we stop aggregating on this TID */
- if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) {
+ if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION)) {
set_sta_flag(sta, WLAN_STA_BLOCK_BA);
__ieee80211_stop_tx_ba_session(sta, tid,
AGG_STOP_LOCAL_REQUEST);
@@ -3322,7 +3823,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid)
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_RESERVE_TID);
- if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)
+ if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION))
clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
ret = 0;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b864ebc6a..43e5aadd7 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -564,7 +564,7 @@ ieee80211_get_vif_queues(struct ieee80211_local *local,
{
unsigned int queues;
- if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) {
+ if (sdata && ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) {
int ac;
queues = 0;
@@ -592,7 +592,7 @@ void __ieee80211_flush_queues(struct ieee80211_local *local,
* If no queue was set, or if the HW doesn't support
* IEEE80211_HW_QUEUE_CONTROL - flush all queues
*/
- if (!queues || !(local->hw.flags & IEEE80211_HW_QUEUE_CONTROL))
+ if (!queues || !ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
queues = ieee80211_get_vif_queues(local, sdata);
ieee80211_stop_queues_by_reason(&local->hw, queues,
@@ -2046,7 +2046,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
* about the sessions, but we and the AP still think they
* are active. This is really a workaround though.
*/
- if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
+ if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) {
mutex_lock(&local->sta_mtx);
list_for_each_entry(sta, &local->sta_list, list) {
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 9d63d93c8..943f76065 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -444,7 +444,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
hdr = (struct ieee80211_hdr *) pos;
pos += hdrlen;
- pn64 = atomic64_inc_return(&key->u.ccmp.tx_pn);
+ pn64 = atomic64_inc_return(&key->conf.tx_pn);
pn[5] = pn64;
pn[4] = pn64 >> 8;
@@ -670,7 +670,7 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
hdr = (struct ieee80211_hdr *)pos;
pos += hdrlen;
- pn64 = atomic64_inc_return(&key->u.gcmp.tx_pn);
+ pn64 = atomic64_inc_return(&key->conf.tx_pn);
pn[5] = pn64;
pn[4] = pn64 >> 8;
@@ -940,7 +940,7 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
mmie->key_id = cpu_to_le16(key->conf.keyidx);
/* PN = PN + 1 */
- pn64 = atomic64_inc_return(&key->u.aes_cmac.tx_pn);
+ pn64 = atomic64_inc_return(&key->conf.tx_pn);
bip_ipn_set64(mmie->sequence_number, pn64);
@@ -984,7 +984,7 @@ ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx)
mmie->key_id = cpu_to_le16(key->conf.keyidx);
/* PN = PN + 1 */
- pn64 = atomic64_inc_return(&key->u.aes_cmac.tx_pn);
+ pn64 = atomic64_inc_return(&key->conf.tx_pn);
bip_ipn_set64(mmie->sequence_number, pn64);
@@ -1129,7 +1129,7 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx)
mmie->key_id = cpu_to_le16(key->conf.keyidx);
/* PN = PN + 1 */
- pn64 = atomic64_inc_return(&key->u.aes_gmac.tx_pn);
+ pn64 = atomic64_inc_return(&key->conf.tx_pn);
bip_ipn_set64(mmie->sequence_number, pn64);
diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig
index aa462b480..fb45287eb 100644
--- a/net/mac802154/Kconfig
+++ b/net/mac802154/Kconfig
@@ -2,6 +2,7 @@ config MAC802154
tristate "Generic IEEE 802.15.4 Soft Networking Stack (mac802154)"
depends on IEEE802154
select CRC_CCITT
+ select CRYPTO
select CRYPTO_AUTHENC
select CRYPTO_CCM
select CRYPTO_CTR
diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile
index 702d8b466..17a51e838 100644
--- a/net/mac802154/Makefile
+++ b/net/mac802154/Makefile
@@ -1,5 +1,7 @@
obj-$(CONFIG_MAC802154) += mac802154.o
mac802154-objs := main.o rx.o tx.o mac_cmd.o mib.o \
- iface.o llsec.o util.o cfg.o
+ iface.o llsec.o util.o cfg.o trace.o
+
+CFLAGS_trace.o := -I$(src)
ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index 70be9c799..317c4662e 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -73,9 +73,9 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel)
ASSERT_RTNL();
- /* check if phy support this setting */
- if (!(wpan_phy->channels_supported[page] & BIT(channel)))
- return -EINVAL;
+ if (wpan_phy->current_page == page &&
+ wpan_phy->current_channel == channel)
+ return 0;
ret = drv_set_channel(local, page, channel);
if (!ret) {
@@ -95,9 +95,8 @@ ieee802154_set_cca_mode(struct wpan_phy *wpan_phy,
ASSERT_RTNL();
- /* check if phy support this setting */
- if (!(local->hw.flags & IEEE802154_HW_CCA_MODE))
- return -EOPNOTSUPP;
+ if (wpan_phy_cca_cmp(&wpan_phy->cca, cca))
+ return 0;
ret = drv_set_cca_mode(local, cca);
if (!ret)
@@ -107,20 +106,49 @@ ieee802154_set_cca_mode(struct wpan_phy *wpan_phy,
}
static int
+ieee802154_set_cca_ed_level(struct wpan_phy *wpan_phy, s32 ed_level)
+{
+ struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+ int ret;
+
+ ASSERT_RTNL();
+
+ if (wpan_phy->cca_ed_level == ed_level)
+ return 0;
+
+ ret = drv_set_cca_ed_level(local, ed_level);
+ if (!ret)
+ wpan_phy->cca_ed_level = ed_level;
+
+ return ret;
+}
+
+static int
+ieee802154_set_tx_power(struct wpan_phy *wpan_phy, s32 power)
+{
+ struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+ int ret;
+
+ ASSERT_RTNL();
+
+ if (wpan_phy->transmit_power == power)
+ return 0;
+
+ ret = drv_set_tx_power(local, power);
+ if (!ret)
+ wpan_phy->transmit_power = power;
+
+ return ret;
+}
+
+static int
ieee802154_set_pan_id(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
__le16 pan_id)
{
ASSERT_RTNL();
- /* TODO
- * I am not sure about to check here on broadcast pan_id.
- * Broadcast is a valid setting, comment from 802.15.4:
- * If this value is 0xffff, the device is not associated.
- *
- * This could useful to simple deassociate an device.
- */
- if (pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST))
- return -EINVAL;
+ if (wpan_dev->pan_id == pan_id)
+ return 0;
wpan_dev->pan_id = pan_id;
return 0;
@@ -131,12 +159,11 @@ ieee802154_set_backoff_exponent(struct wpan_phy *wpan_phy,
struct wpan_dev *wpan_dev,
u8 min_be, u8 max_be)
{
- struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
-
ASSERT_RTNL();
- if (!(local->hw.flags & IEEE802154_HW_CSMA_PARAMS))
- return -EOPNOTSUPP;
+ if (wpan_dev->min_be == min_be &&
+ wpan_dev->max_be == max_be)
+ return 0;
wpan_dev->min_be = min_be;
wpan_dev->max_be = max_be;
@@ -149,20 +176,8 @@ ieee802154_set_short_addr(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
{
ASSERT_RTNL();
- /* TODO
- * I am not sure about to check here on broadcast short_addr.
- * Broadcast is a valid setting, comment from 802.15.4:
- * A value of 0xfffe indicates that the device has
- * associated but has not been allocated an address. A
- * value of 0xffff indicates that the device does not
- * have a short address.
- *
- * I think we should allow to set these settings but
- * don't allow to allow socket communication with it.
- */
- if (short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC) ||
- short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST))
- return -EINVAL;
+ if (wpan_dev->short_addr == short_addr)
+ return 0;
wpan_dev->short_addr = short_addr;
return 0;
@@ -173,12 +188,10 @@ ieee802154_set_max_csma_backoffs(struct wpan_phy *wpan_phy,
struct wpan_dev *wpan_dev,
u8 max_csma_backoffs)
{
- struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
-
ASSERT_RTNL();
- if (!(local->hw.flags & IEEE802154_HW_CSMA_PARAMS))
- return -EOPNOTSUPP;
+ if (wpan_dev->csma_retries == max_csma_backoffs)
+ return 0;
wpan_dev->csma_retries = max_csma_backoffs;
return 0;
@@ -189,12 +202,10 @@ ieee802154_set_max_frame_retries(struct wpan_phy *wpan_phy,
struct wpan_dev *wpan_dev,
s8 max_frame_retries)
{
- struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
-
ASSERT_RTNL();
- if (!(local->hw.flags & IEEE802154_HW_FRAME_RETRIES))
- return -EOPNOTSUPP;
+ if (wpan_dev->frame_retries == max_frame_retries)
+ return 0;
wpan_dev->frame_retries = max_frame_retries;
return 0;
@@ -204,12 +215,10 @@ static int
ieee802154_set_lbt_mode(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
bool mode)
{
- struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
-
ASSERT_RTNL();
- if (!(local->hw.flags & IEEE802154_HW_LBT))
- return -EOPNOTSUPP;
+ if (wpan_dev->lbt == mode)
+ return 0;
wpan_dev->lbt = mode;
return 0;
@@ -222,6 +231,8 @@ const struct cfg802154_ops mac802154_config_ops = {
.del_virtual_intf = ieee802154_del_iface,
.set_channel = ieee802154_set_channel,
.set_cca_mode = ieee802154_set_cca_mode,
+ .set_cca_ed_level = ieee802154_set_cca_ed_level,
+ .set_tx_power = ieee802154_set_tx_power,
.set_pan_id = ieee802154_set_pan_id,
.set_short_addr = ieee802154_set_short_addr,
.set_backoff_exponent = ieee802154_set_backoff_exponent,
diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h
index a0533357b..0550f3365 100644
--- a/net/mac802154/driver-ops.h
+++ b/net/mac802154/driver-ops.h
@@ -7,6 +7,7 @@
#include <net/mac802154.h>
#include "ieee802154_i.h"
+#include "trace.h"
static inline int
drv_xmit_async(struct ieee802154_local *local, struct sk_buff *skb)
@@ -27,19 +28,25 @@ drv_xmit_sync(struct ieee802154_local *local, struct sk_buff *skb)
static inline int drv_start(struct ieee802154_local *local)
{
+ int ret;
+
might_sleep();
+ trace_802154_drv_start(local);
local->started = true;
smp_mb();
-
- return local->ops->start(&local->hw);
+ ret = local->ops->start(&local->hw);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
static inline void drv_stop(struct ieee802154_local *local)
{
might_sleep();
+ trace_802154_drv_stop(local);
local->ops->stop(&local->hw);
+ trace_802154_drv_return_void(local);
/* sync away all work on the tasklet before clearing started */
tasklet_disable(&local->tasklet);
@@ -53,13 +60,20 @@ static inline void drv_stop(struct ieee802154_local *local)
static inline int
drv_set_channel(struct ieee802154_local *local, u8 page, u8 channel)
{
+ int ret;
+
might_sleep();
- return local->ops->set_channel(&local->hw, page, channel);
+ trace_802154_drv_set_channel(local, page, channel);
+ ret = local->ops->set_channel(&local->hw, page, channel);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
-static inline int drv_set_tx_power(struct ieee802154_local *local, s8 dbm)
+static inline int drv_set_tx_power(struct ieee802154_local *local, s32 mbm)
{
+ int ret;
+
might_sleep();
if (!local->ops->set_txpower) {
@@ -67,12 +81,17 @@ static inline int drv_set_tx_power(struct ieee802154_local *local, s8 dbm)
return -EOPNOTSUPP;
}
- return local->ops->set_txpower(&local->hw, dbm);
+ trace_802154_drv_set_tx_power(local, mbm);
+ ret = local->ops->set_txpower(&local->hw, mbm);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
static inline int drv_set_cca_mode(struct ieee802154_local *local,
const struct wpan_phy_cca *cca)
{
+ int ret;
+
might_sleep();
if (!local->ops->set_cca_mode) {
@@ -80,11 +99,16 @@ static inline int drv_set_cca_mode(struct ieee802154_local *local,
return -EOPNOTSUPP;
}
- return local->ops->set_cca_mode(&local->hw, cca);
+ trace_802154_drv_set_cca_mode(local, cca);
+ ret = local->ops->set_cca_mode(&local->hw, cca);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode)
{
+ int ret;
+
might_sleep();
if (!local->ops->set_lbt) {
@@ -92,12 +116,17 @@ static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode)
return -EOPNOTSUPP;
}
- return local->ops->set_lbt(&local->hw, mode);
+ trace_802154_drv_set_lbt_mode(local, mode);
+ ret = local->ops->set_lbt(&local->hw, mode);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
static inline int
-drv_set_cca_ed_level(struct ieee802154_local *local, s32 ed_level)
+drv_set_cca_ed_level(struct ieee802154_local *local, s32 mbm)
{
+ int ret;
+
might_sleep();
if (!local->ops->set_cca_ed_level) {
@@ -105,12 +134,16 @@ drv_set_cca_ed_level(struct ieee802154_local *local, s32 ed_level)
return -EOPNOTSUPP;
}
- return local->ops->set_cca_ed_level(&local->hw, ed_level);
+ trace_802154_drv_set_cca_ed_level(local, mbm);
+ ret = local->ops->set_cca_ed_level(&local->hw, mbm);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id)
{
struct ieee802154_hw_addr_filt filt;
+ int ret;
might_sleep();
@@ -121,14 +154,18 @@ static inline int drv_set_pan_id(struct ieee802154_local *local, __le16 pan_id)
filt.pan_id = pan_id;
- return local->ops->set_hw_addr_filt(&local->hw, &filt,
+ trace_802154_drv_set_pan_id(local, pan_id);
+ ret = local->ops->set_hw_addr_filt(&local->hw, &filt,
IEEE802154_AFILT_PANID_CHANGED);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
static inline int
drv_set_extended_addr(struct ieee802154_local *local, __le64 extended_addr)
{
struct ieee802154_hw_addr_filt filt;
+ int ret;
might_sleep();
@@ -139,14 +176,18 @@ drv_set_extended_addr(struct ieee802154_local *local, __le64 extended_addr)
filt.ieee_addr = extended_addr;
- return local->ops->set_hw_addr_filt(&local->hw, &filt,
+ trace_802154_drv_set_extended_addr(local, extended_addr);
+ ret = local->ops->set_hw_addr_filt(&local->hw, &filt,
IEEE802154_AFILT_IEEEADDR_CHANGED);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
static inline int
drv_set_short_addr(struct ieee802154_local *local, __le16 short_addr)
{
struct ieee802154_hw_addr_filt filt;
+ int ret;
might_sleep();
@@ -157,14 +198,18 @@ drv_set_short_addr(struct ieee802154_local *local, __le16 short_addr)
filt.short_addr = short_addr;
- return local->ops->set_hw_addr_filt(&local->hw, &filt,
+ trace_802154_drv_set_short_addr(local, short_addr);
+ ret = local->ops->set_hw_addr_filt(&local->hw, &filt,
IEEE802154_AFILT_SADDR_CHANGED);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
static inline int
drv_set_pan_coord(struct ieee802154_local *local, bool is_coord)
{
struct ieee802154_hw_addr_filt filt;
+ int ret;
might_sleep();
@@ -175,14 +220,19 @@ drv_set_pan_coord(struct ieee802154_local *local, bool is_coord)
filt.pan_coord = is_coord;
- return local->ops->set_hw_addr_filt(&local->hw, &filt,
+ trace_802154_drv_set_pan_coord(local, is_coord);
+ ret = local->ops->set_hw_addr_filt(&local->hw, &filt,
IEEE802154_AFILT_PANC_CHANGED);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
static inline int
drv_set_csma_params(struct ieee802154_local *local, u8 min_be, u8 max_be,
u8 max_csma_backoffs)
{
+ int ret;
+
might_sleep();
if (!local->ops->set_csma_params) {
@@ -190,13 +240,19 @@ drv_set_csma_params(struct ieee802154_local *local, u8 min_be, u8 max_be,
return -EOPNOTSUPP;
}
- return local->ops->set_csma_params(&local->hw, min_be, max_be,
+ trace_802154_drv_set_csma_params(local, min_be, max_be,
+ max_csma_backoffs);
+ ret = local->ops->set_csma_params(&local->hw, min_be, max_be,
max_csma_backoffs);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
static inline int
drv_set_max_frame_retries(struct ieee802154_local *local, s8 max_frame_retries)
{
+ int ret;
+
might_sleep();
if (!local->ops->set_frame_retries) {
@@ -204,12 +260,17 @@ drv_set_max_frame_retries(struct ieee802154_local *local, s8 max_frame_retries)
return -EOPNOTSUPP;
}
- return local->ops->set_frame_retries(&local->hw, max_frame_retries);
+ trace_802154_drv_set_max_frame_retries(local, max_frame_retries);
+ ret = local->ops->set_frame_retries(&local->hw, max_frame_retries);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
static inline int
drv_set_promiscuous_mode(struct ieee802154_local *local, bool on)
{
+ int ret;
+
might_sleep();
if (!local->ops->set_promiscuous_mode) {
@@ -217,7 +278,10 @@ drv_set_promiscuous_mode(struct ieee802154_local *local, bool on)
return -EOPNOTSUPP;
}
- return local->ops->set_promiscuous_mode(&local->hw, on);
+ trace_802154_drv_set_promiscuous_mode(local, on);
+ ret = local->ops->set_promiscuous_mode(&local->hw, on);
+ trace_802154_drv_return_int(local, ret);
+ return ret;
}
#endif /* __MAC802154_DRIVER_OPS */
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index 127ba1838..34755d575 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -86,16 +86,12 @@ struct ieee802154_sub_if_data {
unsigned long state;
char name[IFNAMSIZ];
- spinlock_t mib_lock;
-
/* protects sec from concurrent access by netlink. access by
* encrypt/decrypt/header_create safe without additional protection.
*/
struct mutex sec_mtx;
struct mac802154_llsec sec;
- /* must be last, dynamically sized area in this! */
- struct ieee802154_vif vif;
};
#define MAC802154_CHAN_NONE 0xff /* No channel is assigned */
@@ -136,12 +132,7 @@ ieee802154_subif_start_xmit(struct sk_buff *skb, struct net_device *dev);
enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer);
/* MIB callbacks */
-void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val);
-__le16 mac802154_dev_get_short_addr(const struct net_device *dev);
-__le16 mac802154_dev_get_pan_id(const struct net_device *dev);
-void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val);
void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan);
-u8 mac802154_dev_get_dsn(const struct net_device *dev);
int mac802154_get_params(struct net_device *dev,
struct ieee802154_llsec_params *params);
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 91b75abbd..8b698246a 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -62,9 +62,10 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
(struct sockaddr_ieee802154 *)&ifr->ifr_addr;
int err = -ENOIOCTLCMD;
- ASSERT_RTNL();
+ if (cmd != SIOCGIFADDR && cmd != SIOCSIFADDR)
+ return err;
- spin_lock_bh(&sdata->mib_lock);
+ rtnl_lock();
switch (cmd) {
case SIOCGIFADDR:
@@ -89,7 +90,7 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
case SIOCSIFADDR:
if (netif_running(dev)) {
- spin_unlock_bh(&sdata->mib_lock);
+ rtnl_unlock();
return -EBUSY;
}
@@ -111,7 +112,7 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
- spin_unlock_bh(&sdata->mib_lock);
+ rtnl_unlock();
return err;
}
@@ -125,7 +126,7 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
return -EBUSY;
ieee802154_be64_to_le64(&extended_addr, addr->sa_data);
- if (!ieee802154_is_valid_extended_addr(extended_addr))
+ if (!ieee802154_is_valid_extended_unicast_addr(extended_addr))
return -EINVAL;
memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
@@ -134,19 +135,72 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
return mac802154_wpan_update_llsec(dev);
}
+static int ieee802154_setup_hw(struct ieee802154_sub_if_data *sdata)
+{
+ struct ieee802154_local *local = sdata->local;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ int ret;
+
+ if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) {
+ ret = drv_set_promiscuous_mode(local,
+ wpan_dev->promiscuous_mode);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (local->hw.flags & IEEE802154_HW_AFILT) {
+ ret = drv_set_pan_id(local, wpan_dev->pan_id);
+ if (ret < 0)
+ return ret;
+
+ ret = drv_set_extended_addr(local, wpan_dev->extended_addr);
+ if (ret < 0)
+ return ret;
+
+ ret = drv_set_short_addr(local, wpan_dev->short_addr);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (local->hw.flags & IEEE802154_HW_LBT) {
+ ret = drv_set_lbt_mode(local, wpan_dev->lbt);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) {
+ ret = drv_set_csma_params(local, wpan_dev->min_be,
+ wpan_dev->max_be,
+ wpan_dev->csma_retries);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) {
+ ret = drv_set_max_frame_retries(local, wpan_dev->frame_retries);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
static int mac802154_slave_open(struct net_device *dev)
{
struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
struct ieee802154_local *local = sdata->local;
- int res = 0;
+ int res;
ASSERT_RTNL();
set_bit(SDATA_STATE_RUNNING, &sdata->state);
if (!local->open_count) {
+ res = ieee802154_setup_hw(sdata);
+ if (res)
+ goto err;
+
res = drv_start(local);
- WARN_ON(res);
if (res)
goto err;
}
@@ -218,8 +272,8 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata,
* exist really an use case if we need to support
* multiple node types at the same time.
*/
- if (sdata->vif.type == NL802154_IFTYPE_NODE &&
- nsdata->vif.type == NL802154_IFTYPE_NODE)
+ if (wpan_dev->iftype == NL802154_IFTYPE_NODE &&
+ nsdata->wpan_dev.iftype == NL802154_IFTYPE_NODE)
return -EBUSY;
/* check all phy mac sublayer settings are the same.
@@ -239,67 +293,13 @@ static int mac802154_wpan_open(struct net_device *dev)
{
int rc;
struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
- struct ieee802154_local *local = sdata->local;
struct wpan_dev *wpan_dev = &sdata->wpan_dev;
- struct wpan_phy *phy = sdata->local->phy;
- rc = ieee802154_check_concurrent_iface(sdata, sdata->vif.type);
+ rc = ieee802154_check_concurrent_iface(sdata, wpan_dev->iftype);
if (rc < 0)
return rc;
- rc = mac802154_slave_open(dev);
- if (rc < 0)
- return rc;
-
- mutex_lock(&phy->pib_lock);
-
- if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) {
- rc = drv_set_promiscuous_mode(local,
- wpan_dev->promiscuous_mode);
- if (rc < 0)
- goto out;
- }
-
- if (local->hw.flags & IEEE802154_HW_AFILT) {
- rc = drv_set_pan_id(local, wpan_dev->pan_id);
- if (rc < 0)
- goto out;
-
- rc = drv_set_extended_addr(local, wpan_dev->extended_addr);
- if (rc < 0)
- goto out;
-
- rc = drv_set_short_addr(local, wpan_dev->short_addr);
- if (rc < 0)
- goto out;
- }
-
- if (local->hw.flags & IEEE802154_HW_LBT) {
- rc = drv_set_lbt_mode(local, wpan_dev->lbt);
- if (rc < 0)
- goto out;
- }
-
- if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) {
- rc = drv_set_csma_params(local, wpan_dev->min_be,
- wpan_dev->max_be,
- wpan_dev->csma_retries);
- if (rc < 0)
- goto out;
- }
-
- if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) {
- rc = drv_set_max_frame_retries(local, wpan_dev->frame_retries);
- if (rc < 0)
- goto out;
- }
-
- mutex_unlock(&phy->pib_lock);
- return 0;
-
-out:
- mutex_unlock(&phy->pib_lock);
- return rc;
+ return mac802154_slave_open(dev);
}
static int mac802154_slave_close(struct net_device *dev)
@@ -309,15 +309,16 @@ static int mac802154_slave_close(struct net_device *dev)
ASSERT_RTNL();
- hrtimer_cancel(&local->ifs_timer);
-
netif_stop_queue(dev);
local->open_count--;
clear_bit(SDATA_STATE_RUNNING, &sdata->state);
- if (!local->open_count)
+ if (!local->open_count) {
+ flush_workqueue(local->workqueue);
+ hrtimer_cancel(&local->ifs_timer);
drv_stop(local);
+ }
return 0;
}
@@ -374,14 +375,12 @@ static int mac802154_header_create(struct sk_buff *skb,
hdr.fc.type = cb->type;
hdr.fc.security_enabled = cb->secen;
hdr.fc.ack_request = cb->ackreq;
- hdr.seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
+ hdr.seq = atomic_inc_return(&dev->ieee802154_ptr->dsn) & 0xFF;
if (mac802154_set_header_security(sdata, &hdr, cb) < 0)
return -EINVAL;
if (!saddr) {
- spin_lock_bh(&sdata->mib_lock);
-
if (wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST) ||
wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) ||
wpan_dev->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST)) {
@@ -393,8 +392,6 @@ static int mac802154_header_create(struct sk_buff *skb,
}
hdr.source.pan_id = wpan_dev->pan_id;
-
- spin_unlock_bh(&sdata->mib_lock);
} else {
hdr.source = *(const struct ieee802154_addr *)saddr;
}
@@ -474,13 +471,15 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
enum nl802154_iftype type)
{
struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ u8 tmp;
/* set some type-dependent values */
- sdata->vif.type = type;
sdata->wpan_dev.iftype = type;
- get_random_bytes(&wpan_dev->bsn, 1);
- get_random_bytes(&wpan_dev->dsn, 1);
+ get_random_bytes(&tmp, sizeof(tmp));
+ atomic_set(&wpan_dev->bsn, tmp);
+ get_random_bytes(&tmp, sizeof(tmp));
+ atomic_set(&wpan_dev->dsn, tmp);
/* defaults per 802.15.4-2011 */
wpan_dev->min_be = 3;
@@ -503,7 +502,6 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
sdata->dev->ml_priv = &mac802154_mlme_wpan;
wpan_dev->promiscuous_mode = false;
- spin_lock_init(&sdata->mib_lock);
mutex_init(&sdata->sec_mtx);
mac802154_llsec_init(&sdata->sec);
@@ -531,7 +529,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
ASSERT_RTNL();
- ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, name,
+ ndev = alloc_netdev(sizeof(*sdata), name,
name_assign_type, ieee802154_if_setup);
if (!ndev)
return ERR_PTR(-ENOMEM);
@@ -547,7 +545,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
switch (type) {
case NL802154_IFTYPE_NODE:
ndev->type = ARPHRD_IEEE802154;
- if (ieee802154_is_valid_extended_addr(extended_addr))
+ if (ieee802154_is_valid_extended_unicast_addr(extended_addr))
ieee802154_le64_to_be64(ndev->dev_addr, &extended_addr);
else
memcpy(ndev->dev_addr, ndev->perm_addr,
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 5b2be1283..985e9394e 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -17,8 +17,9 @@
#include <linux/err.h>
#include <linux/bug.h>
#include <linux/completion.h>
+#include <linux/crypto.h>
#include <linux/ieee802154.h>
-#include <crypto/algapi.h>
+#include <crypto/aead.h>
#include "ieee802154_i.h"
#include "llsec.h"
@@ -649,7 +650,7 @@ llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
u8 iv[16];
unsigned char *data;
int authlen, assoclen, datalen, rc;
- struct scatterlist src, assoc[2], dst[2];
+ struct scatterlist sg;
struct aead_request *req;
authlen = ieee802154_sechdr_authtag_len(&hdr->sec);
@@ -659,30 +660,23 @@ llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
if (!req)
return -ENOMEM;
- sg_init_table(assoc, 2);
- sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len);
assoclen = skb->mac_len;
data = skb_mac_header(skb) + skb->mac_len;
datalen = skb_tail_pointer(skb) - data;
- if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) {
- sg_set_buf(&assoc[1], data, 0);
- } else {
- sg_set_buf(&assoc[1], data, datalen);
+ skb_put(skb, authlen);
+
+ sg_init_one(&sg, skb_mac_header(skb), assoclen + datalen + authlen);
+
+ if (!(hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC)) {
assoclen += datalen;
datalen = 0;
}
- sg_init_one(&src, data, datalen);
-
- sg_init_table(dst, 2);
- sg_set_buf(&dst[0], data, datalen);
- sg_set_buf(&dst[1], skb_put(skb, authlen), authlen);
-
aead_request_set_callback(req, 0, NULL, NULL);
- aead_request_set_assoc(req, assoc, assoclen);
- aead_request_set_crypt(req, &src, dst, datalen, iv);
+ aead_request_set_crypt(req, &sg, &sg, datalen, iv);
+ aead_request_set_ad(req, assoclen);
rc = crypto_aead_encrypt(req);
@@ -858,7 +852,7 @@ llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
u8 iv[16];
unsigned char *data;
int authlen, datalen, assoclen, rc;
- struct scatterlist src, assoc[2];
+ struct scatterlist sg;
struct aead_request *req;
authlen = ieee802154_sechdr_authtag_len(&hdr->sec);
@@ -868,27 +862,21 @@ llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
if (!req)
return -ENOMEM;
- sg_init_table(assoc, 2);
- sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len);
assoclen = skb->mac_len;
data = skb_mac_header(skb) + skb->mac_len;
datalen = skb_tail_pointer(skb) - data;
- if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) {
- sg_set_buf(&assoc[1], data, 0);
- } else {
- sg_set_buf(&assoc[1], data, datalen - authlen);
+ sg_init_one(&sg, skb_mac_header(skb), assoclen + datalen);
+
+ if (!(hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC)) {
assoclen += datalen - authlen;
- data += datalen - authlen;
datalen = authlen;
}
- sg_init_one(&src, data, datalen);
-
aead_request_set_callback(req, 0, NULL, NULL);
- aead_request_set_assoc(req, assoc, assoclen);
- aead_request_set_crypt(req, &src, &src, datalen, iv);
+ aead_request_set_crypt(req, &sg, &sg, datalen, iv);
+ aead_request_set_ad(req, assoclen);
rc = crypto_aead_decrypt(req);
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index bdccb4ecd..8606da459 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -36,37 +36,30 @@ static int mac802154_mlme_start_req(struct net_device *dev,
u8 pan_coord, u8 blx,
u8 coord_realign)
{
- struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
- int rc = 0;
+ struct ieee802154_llsec_params params;
+ int changed = 0;
ASSERT_RTNL();
BUG_ON(addr->mode != IEEE802154_ADDR_SHORT);
- mac802154_dev_set_pan_id(dev, addr->pan_id);
- mac802154_dev_set_short_addr(dev, addr->short_addr);
+ dev->ieee802154_ptr->pan_id = addr->pan_id;
+ dev->ieee802154_ptr->short_addr = addr->short_addr;
mac802154_dev_set_page_channel(dev, page, channel);
- if (ops->llsec) {
- struct ieee802154_llsec_params params;
- int changed = 0;
+ params.pan_id = addr->pan_id;
+ changed |= IEEE802154_LLSEC_PARAM_PAN_ID;
- params.coord_shortaddr = addr->short_addr;
- changed |= IEEE802154_LLSEC_PARAM_COORD_SHORTADDR;
+ params.hwaddr = ieee802154_devaddr_from_raw(dev->dev_addr);
+ changed |= IEEE802154_LLSEC_PARAM_HWADDR;
- params.pan_id = addr->pan_id;
- changed |= IEEE802154_LLSEC_PARAM_PAN_ID;
+ params.coord_hwaddr = params.hwaddr;
+ changed |= IEEE802154_LLSEC_PARAM_COORD_HWADDR;
- params.hwaddr = ieee802154_devaddr_from_raw(dev->dev_addr);
- changed |= IEEE802154_LLSEC_PARAM_HWADDR;
+ params.coord_shortaddr = addr->short_addr;
+ changed |= IEEE802154_LLSEC_PARAM_COORD_SHORTADDR;
- params.coord_hwaddr = params.hwaddr;
- changed |= IEEE802154_LLSEC_PARAM_COORD_HWADDR;
-
- rc = ops->llsec->set_params(dev, &params, changed);
- }
-
- return rc;
+ return mac802154_set_params(dev, &params, changed);
}
static int mac802154_set_mac_params(struct net_device *dev,
@@ -91,19 +84,19 @@ static int mac802154_set_mac_params(struct net_device *dev,
wpan_dev->frame_retries = params->frame_retries;
wpan_dev->lbt = params->lbt;
- if (local->hw.flags & IEEE802154_HW_TXPOWER) {
+ if (local->hw.phy->flags & WPAN_PHY_FLAG_TXPOWER) {
ret = drv_set_tx_power(local, params->transmit_power);
if (ret < 0)
return ret;
}
- if (local->hw.flags & IEEE802154_HW_CCA_MODE) {
+ if (local->hw.phy->flags & WPAN_PHY_FLAG_CCA_MODE) {
ret = drv_set_cca_mode(local, &params->cca);
if (ret < 0)
return ret;
}
- if (local->hw.flags & IEEE802154_HW_CCA_ED_LEVEL) {
+ if (local->hw.phy->flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) {
ret = drv_set_cca_ed_level(local, params->cca_ed_level);
if (ret < 0)
return ret;
@@ -151,9 +144,6 @@ static struct ieee802154_llsec_ops mac802154_llsec_ops = {
struct ieee802154_mlme_ops mac802154_mlme_wpan = {
.start_req = mac802154_mlme_start_req,
- .get_pan_id = mac802154_dev_get_pan_id,
- .get_short_addr = mac802154_dev_get_short_addr,
- .get_dsn = mac802154_dev_get_dsn,
.llsec = &mac802154_llsec_ops,
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 08cb32dc8..356b346e1 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -107,6 +107,18 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
skb_queue_head_init(&local->skb_queue);
+ /* init supported flags with 802.15.4 default ranges */
+ phy->supported.max_minbe = 8;
+ phy->supported.min_maxbe = 3;
+ phy->supported.max_maxbe = 8;
+ phy->supported.min_frame_retries = -1;
+ phy->supported.max_frame_retries = 7;
+ phy->supported.max_csma_backoffs = 5;
+ phy->supported.lbt = NL802154_SUPPORTED_BOOL_FALSE;
+
+ /* always supported */
+ phy->supported.iftypes = BIT(NL802154_IFTYPE_NODE);
+
return &local->hw;
}
EXPORT_SYMBOL(ieee802154_alloc_hw);
@@ -155,6 +167,26 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
ieee802154_setup_wpan_phy_pib(local->phy);
+ if (!(hw->flags & IEEE802154_HW_CSMA_PARAMS)) {
+ local->phy->supported.min_csma_backoffs = 4;
+ local->phy->supported.max_csma_backoffs = 4;
+ local->phy->supported.min_maxbe = 5;
+ local->phy->supported.max_maxbe = 5;
+ local->phy->supported.min_minbe = 3;
+ local->phy->supported.max_minbe = 3;
+ }
+
+ if (!(hw->flags & IEEE802154_HW_FRAME_RETRIES)) {
+ /* TODO should be 3, but our default value is -1 which means
+ * no ARET handling.
+ */
+ local->phy->supported.min_frame_retries = -1;
+ local->phy->supported.max_frame_retries = -1;
+ }
+
+ if (hw->flags & IEEE802154_HW_PROMISCUOUS)
+ local->phy->supported.iftypes |= BIT(NL802154_IFTYPE_MONITOR);
+
rc = wpan_phy_register(local->phy);
if (rc < 0)
goto out_wq;
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index 5cf019a57..73f94fbf8 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -26,81 +26,22 @@
#include "ieee802154_i.h"
#include "driver-ops.h"
-void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val)
-{
- struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
-
- BUG_ON(dev->type != ARPHRD_IEEE802154);
-
- spin_lock_bh(&sdata->mib_lock);
- sdata->wpan_dev.short_addr = val;
- spin_unlock_bh(&sdata->mib_lock);
-}
-
-__le16 mac802154_dev_get_short_addr(const struct net_device *dev)
-{
- struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
- __le16 ret;
-
- BUG_ON(dev->type != ARPHRD_IEEE802154);
-
- spin_lock_bh(&sdata->mib_lock);
- ret = sdata->wpan_dev.short_addr;
- spin_unlock_bh(&sdata->mib_lock);
-
- return ret;
-}
-
-__le16 mac802154_dev_get_pan_id(const struct net_device *dev)
-{
- struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
- __le16 ret;
-
- BUG_ON(dev->type != ARPHRD_IEEE802154);
-
- spin_lock_bh(&sdata->mib_lock);
- ret = sdata->wpan_dev.pan_id;
- spin_unlock_bh(&sdata->mib_lock);
-
- return ret;
-}
-
-void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val)
-{
- struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
-
- BUG_ON(dev->type != ARPHRD_IEEE802154);
-
- spin_lock_bh(&sdata->mib_lock);
- sdata->wpan_dev.pan_id = val;
- spin_unlock_bh(&sdata->mib_lock);
-}
-
-u8 mac802154_dev_get_dsn(const struct net_device *dev)
-{
- struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
-
- BUG_ON(dev->type != ARPHRD_IEEE802154);
-
- return sdata->wpan_dev.dsn++;
-}
-
void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
{
struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
struct ieee802154_local *local = sdata->local;
int res;
+ ASSERT_RTNL();
+
BUG_ON(dev->type != ARPHRD_IEEE802154);
res = drv_set_channel(local, page, chan);
if (res) {
pr_debug("set_channel failed\n");
} else {
- mutex_lock(&local->phy->pib_lock);
local->phy->current_channel = chan;
local->phy->current_page = page;
- mutex_unlock(&local->phy->pib_lock);
}
}
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index c0d67b2b4..d93ad2d4a 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -47,8 +47,6 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
pr_debug("getting packet via slave interface %s\n", sdata->dev->name);
- spin_lock_bh(&sdata->mib_lock);
-
span = wpan_dev->pan_id;
sshort = wpan_dev->short_addr;
@@ -83,13 +81,10 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
skb->pkt_type = PACKET_OTHERHOST;
break;
default:
- spin_unlock_bh(&sdata->mib_lock);
pr_debug("invalid dest mode\n");
goto fail;
}
- spin_unlock_bh(&sdata->mib_lock);
-
skb->dev = sdata->dev;
rc = mac802154_llsec_decrypt(&sdata->sec, skb);
@@ -207,8 +202,10 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
}
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (sdata->vif.type != NL802154_IFTYPE_NODE ||
- !netif_running(sdata->dev))
+ if (sdata->wpan_dev.iftype != NL802154_IFTYPE_NODE)
+ continue;
+
+ if (!ieee802154_sdata_running(sdata))
continue;
ieee802154_subif_frame(sdata, skb, &hdr);
@@ -232,7 +229,7 @@ ieee802154_monitors_rx(struct ieee802154_local *local, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IEEE802154);
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (sdata->vif.type != NL802154_IFTYPE_MONITOR)
+ if (sdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR)
continue;
if (!ieee802154_sdata_running(sdata))
diff --git a/net/mac802154/trace.c b/net/mac802154/trace.c
new file mode 100644
index 000000000..863e5e6b9
--- /dev/null
+++ b/net/mac802154/trace.c
@@ -0,0 +1,9 @@
+#include <linux/module.h>
+
+#ifndef __CHECKER__
+#include <net/cfg802154.h>
+#include "driver-ops.h"
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+#endif
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
new file mode 100644
index 000000000..6f30e0c93
--- /dev/null
+++ b/net/mac802154/trace.h
@@ -0,0 +1,272 @@
+/* Based on net/mac80211/trace.h */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mac802154
+
+#if !defined(__MAC802154_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
+#define __MAC802154_DRIVER_TRACE
+
+#include <linux/tracepoint.h>
+
+#include <net/mac802154.h>
+#include "ieee802154_i.h"
+
+#define MAXNAME 32
+#define LOCAL_ENTRY __array(char, wpan_phy_name, MAXNAME)
+#define LOCAL_ASSIGN strlcpy(__entry->wpan_phy_name, \
+ wpan_phy_name(local->hw.phy), MAXNAME)
+#define LOCAL_PR_FMT "%s"
+#define LOCAL_PR_ARG __entry->wpan_phy_name
+
+#define CCA_ENTRY __field(enum nl802154_cca_modes, cca_mode) \
+ __field(enum nl802154_cca_opts, cca_opt)
+#define CCA_ASSIGN \
+ do { \
+ (__entry->cca_mode) = cca->mode; \
+ (__entry->cca_opt) = cca->opt; \
+ } while (0)
+#define CCA_PR_FMT "cca_mode: %d, cca_opt: %d"
+#define CCA_PR_ARG __entry->cca_mode, __entry->cca_opt
+
+#define BOOL_TO_STR(bo) (bo) ? "true" : "false"
+
+/* Tracing for driver callbacks */
+
+DECLARE_EVENT_CLASS(local_only_evt,
+ TP_PROTO(struct ieee802154_local *local),
+ TP_ARGS(local),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ ),
+ TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
+);
+
+DEFINE_EVENT(local_only_evt, 802154_drv_return_void,
+ TP_PROTO(struct ieee802154_local *local),
+ TP_ARGS(local)
+);
+
+TRACE_EVENT(802154_drv_return_int,
+ TP_PROTO(struct ieee802154_local *local, int ret),
+ TP_ARGS(local, ret),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(int, ret)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->ret = ret;
+ ),
+ TP_printk(LOCAL_PR_FMT ", returned: %d", LOCAL_PR_ARG,
+ __entry->ret)
+);
+
+DEFINE_EVENT(local_only_evt, 802154_drv_start,
+ TP_PROTO(struct ieee802154_local *local),
+ TP_ARGS(local)
+);
+
+DEFINE_EVENT(local_only_evt, 802154_drv_stop,
+ TP_PROTO(struct ieee802154_local *local),
+ TP_ARGS(local)
+);
+
+TRACE_EVENT(802154_drv_set_channel,
+ TP_PROTO(struct ieee802154_local *local, u8 page, u8 channel),
+ TP_ARGS(local, page, channel),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(u8, page)
+ __field(u8, channel)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->page = page;
+ __entry->channel = channel;
+ ),
+ TP_printk(LOCAL_PR_FMT ", page: %d, channel: %d", LOCAL_PR_ARG,
+ __entry->page, __entry->channel)
+);
+
+TRACE_EVENT(802154_drv_set_cca_mode,
+ TP_PROTO(struct ieee802154_local *local,
+ const struct wpan_phy_cca *cca),
+ TP_ARGS(local, cca),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ CCA_ENTRY
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ CCA_ASSIGN;
+ ),
+ TP_printk(LOCAL_PR_FMT ", " CCA_PR_FMT, LOCAL_PR_ARG,
+ CCA_PR_ARG)
+);
+
+TRACE_EVENT(802154_drv_set_cca_ed_level,
+ TP_PROTO(struct ieee802154_local *local, s32 mbm),
+ TP_ARGS(local, mbm),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(s32, mbm)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->mbm = mbm;
+ ),
+ TP_printk(LOCAL_PR_FMT ", ed level: %d", LOCAL_PR_ARG,
+ __entry->mbm)
+);
+
+TRACE_EVENT(802154_drv_set_tx_power,
+ TP_PROTO(struct ieee802154_local *local, s32 power),
+ TP_ARGS(local, power),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(s32, power)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->power = power;
+ ),
+ TP_printk(LOCAL_PR_FMT ", mbm: %d", LOCAL_PR_ARG,
+ __entry->power)
+);
+
+TRACE_EVENT(802154_drv_set_lbt_mode,
+ TP_PROTO(struct ieee802154_local *local, bool mode),
+ TP_ARGS(local, mode),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(bool, mode)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->mode = mode;
+ ),
+ TP_printk(LOCAL_PR_FMT ", lbt mode: %s", LOCAL_PR_ARG,
+ BOOL_TO_STR(__entry->mode))
+);
+
+TRACE_EVENT(802154_drv_set_short_addr,
+ TP_PROTO(struct ieee802154_local *local, __le16 short_addr),
+ TP_ARGS(local, short_addr),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(__le16, short_addr)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->short_addr = short_addr;
+ ),
+ TP_printk(LOCAL_PR_FMT ", short addr: 0x%04x", LOCAL_PR_ARG,
+ le16_to_cpu(__entry->short_addr))
+);
+
+TRACE_EVENT(802154_drv_set_pan_id,
+ TP_PROTO(struct ieee802154_local *local, __le16 pan_id),
+ TP_ARGS(local, pan_id),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(__le16, pan_id)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->pan_id = pan_id;
+ ),
+ TP_printk(LOCAL_PR_FMT ", pan id: 0x%04x", LOCAL_PR_ARG,
+ le16_to_cpu(__entry->pan_id))
+);
+
+TRACE_EVENT(802154_drv_set_extended_addr,
+ TP_PROTO(struct ieee802154_local *local, __le64 extended_addr),
+ TP_ARGS(local, extended_addr),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(__le64, extended_addr)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->extended_addr = extended_addr;
+ ),
+ TP_printk(LOCAL_PR_FMT ", extended addr: 0x%llx", LOCAL_PR_ARG,
+ le64_to_cpu(__entry->extended_addr))
+);
+
+TRACE_EVENT(802154_drv_set_pan_coord,
+ TP_PROTO(struct ieee802154_local *local, bool is_coord),
+ TP_ARGS(local, is_coord),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(bool, is_coord)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->is_coord = is_coord;
+ ),
+ TP_printk(LOCAL_PR_FMT ", is_coord: %s", LOCAL_PR_ARG,
+ BOOL_TO_STR(__entry->is_coord))
+);
+
+TRACE_EVENT(802154_drv_set_csma_params,
+ TP_PROTO(struct ieee802154_local *local, u8 min_be, u8 max_be,
+ u8 max_csma_backoffs),
+ TP_ARGS(local, min_be, max_be, max_csma_backoffs),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(u8, min_be)
+ __field(u8, max_be)
+ __field(u8, max_csma_backoffs)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN,
+ __entry->min_be = min_be;
+ __entry->max_be = max_be;
+ __entry->max_csma_backoffs = max_csma_backoffs;
+ ),
+ TP_printk(LOCAL_PR_FMT ", min be: %d, max be: %d, max csma backoffs: %d",
+ LOCAL_PR_ARG, __entry->min_be, __entry->max_be,
+ __entry->max_csma_backoffs)
+);
+
+TRACE_EVENT(802154_drv_set_max_frame_retries,
+ TP_PROTO(struct ieee802154_local *local, s8 max_frame_retries),
+ TP_ARGS(local, max_frame_retries),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(s8, max_frame_retries)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->max_frame_retries = max_frame_retries;
+ ),
+ TP_printk(LOCAL_PR_FMT ", max frame retries: %d", LOCAL_PR_ARG,
+ __entry->max_frame_retries)
+);
+
+TRACE_EVENT(802154_drv_set_promiscuous_mode,
+ TP_PROTO(struct ieee802154_local *local, bool on),
+ TP_ARGS(local, on),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(bool, on)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->on = on;
+ ),
+ TP_printk(LOCAL_PR_FMT ", promiscuous mode: %s", LOCAL_PR_ARG,
+ BOOL_TO_STR(__entry->on))
+);
+
+#endif /* !__MAC802154_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/net/mac802154/util.c b/net/mac802154/util.c
index 150bf807e..583435f38 100644
--- a/net/mac802154/util.c
+++ b/net/mac802154/util.c
@@ -85,11 +85,10 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
hrtimer_start(&local->ifs_timer,
ktime_set(0, hw->phy->sifs_period * NSEC_PER_USEC),
HRTIMER_MODE_REL);
-
- consume_skb(skb);
} else {
ieee802154_wake_queue(hw);
- consume_skb(skb);
}
+
+ dev_consume_skb_any(skb);
}
EXPORT_SYMBOL(ieee802154_xmit_complete);
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 809df534a..0183b32da 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -62,6 +62,7 @@ out:
static struct packet_offload mpls_mc_offload __read_mostly = {
.type = cpu_to_be16(ETH_P_MPLS_MC),
+ .priority = 15,
.callbacks = {
.gso_segment = mpls_gso_segment,
},
@@ -69,6 +70,7 @@ static struct packet_offload mpls_mc_offload __read_mostly = {
static struct packet_offload mpls_uc_offload __read_mostly = {
.type = cpu_to_be16(ETH_P_MPLS_UC),
+ .priority = 15,
.callbacks = {
.gso_segment = mpls_gso_segment,
},
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a0f3e6a3c..6eae69a69 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1,6 +1,14 @@
menu "Core Netfilter Configuration"
depends on NET && INET && NETFILTER
+config NETFILTER_INGRESS
+ bool "Netfilter ingress support"
+ default y
+ select NET_INGRESS
+ help
+ This allows you to classify packets from ingress using the Netfilter
+ infrastructure.
+
config NETFILTER_NETLINK
tristate
@@ -198,7 +206,7 @@ config NF_CONNTRACK_FTP
config NF_CONNTRACK_H323
tristate "H.323 protocol support"
- depends on (IPV6 || IPV6=n)
+ depends on IPV6 || IPV6=n
depends on NETFILTER_ADVANCED
help
H.323 is a VoIP signalling protocol from ITU-T. As one of the most
@@ -448,6 +456,11 @@ config NF_TABLES_INET
help
This option enables support for a mixed IPv4/IPv6 "inet" table.
+config NF_TABLES_NETDEV
+ tristate "Netfilter nf_tables netdev tables support"
+ help
+ This option enables support for the "netdev" table.
+
config NFT_EXTHDR
tristate "Netfilter nf_tables IPv6 exthdr module"
help
@@ -710,7 +723,7 @@ config NETFILTER_XT_TARGET_HL
config NETFILTER_XT_TARGET_HMARK
tristate '"HMARK" target support'
- depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+ depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on NETFILTER_ADVANCED
---help---
This option adds the "HMARK" target.
@@ -852,7 +865,7 @@ config NETFILTER_XT_TARGET_REDIRECT
config NETFILTER_XT_TARGET_TEE
tristate '"TEE" - packet cloning to alternate destination'
depends on NETFILTER_ADVANCED
- depends on (IPV6 || IPV6=n)
+ depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
---help---
This option adds a "TEE" target with which a packet can be cloned and
@@ -862,8 +875,8 @@ config NETFILTER_XT_TARGET_TPROXY
tristate '"TPROXY" target transparent proxying support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
- depends on (IPV6 || IPV6=n)
- depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+ depends on IPV6 || IPV6=n
+ depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
@@ -902,7 +915,7 @@ config NETFILTER_XT_TARGET_SECMARK
config NETFILTER_XT_TARGET_TCPMSS
tristate '"TCPMSS" target support'
- depends on (IPV6 || IPV6=n)
+ depends on IPV6 || IPV6=n
default m if NETFILTER_ADVANCED=n
---help---
This option adds a `TCPMSS' target, which allows you to alter the
@@ -1114,7 +1127,7 @@ config NETFILTER_XT_MATCH_ESP
config NETFILTER_XT_MATCH_HASHLIMIT
tristate '"hashlimit" match support'
- depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+ depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on NETFILTER_ADVANCED
help
This option adds a `hashlimit' match.
@@ -1356,8 +1369,8 @@ config NETFILTER_XT_MATCH_SOCKET
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
depends on !NF_CONNTRACK || NF_CONNTRACK
- depends on (IPV6 || IPV6=n)
- depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+ depends on IPV6 || IPV6=n
+ depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a87d8b8ec..70d026d46 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -75,6 +75,7 @@ nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o
+obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
obj-$(CONFIG_NFT_META) += nft_meta.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index e6163017c..a0e54974e 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex);
int nf_register_hook(struct nf_hook_ops *reg)
{
+ struct list_head *nf_hook_list;
struct nf_hook_ops *elem;
mutex_lock(&nf_hook_mutex);
- list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
+ switch (reg->pf) {
+ case NFPROTO_NETDEV:
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS) {
+ BUG_ON(reg->dev == NULL);
+ nf_hook_list = &reg->dev->nf_hooks_ingress;
+ net_inc_ingress_queue();
+ break;
+ }
+#endif
+ /* Fall through. */
+ default:
+ nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
+ break;
+ }
+
+ list_for_each_entry(elem, nf_hook_list, list) {
if (reg->priority < elem->priority)
break;
}
@@ -85,10 +102,23 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
mutex_lock(&nf_hook_mutex);
list_del_rcu(&reg->list);
mutex_unlock(&nf_hook_mutex);
+ switch (reg->pf) {
+ case NFPROTO_NETDEV:
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS) {
+ net_dec_ingress_queue();
+ break;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
#ifdef HAVE_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
+ nf_queue_nf_hook_drop(reg);
}
EXPORT_SYMBOL(nf_unregister_hook);
@@ -166,11 +196,9 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
- elem = list_entry_rcu(&nf_hooks[state->pf][state->hook],
- struct nf_hook_ops, list);
+ elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
next_hook:
- verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state,
- &elem);
+ verdict = nf_iterate(state->hook_list, skb, state, &elem);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 6f024a8a1..d05e759ed 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -41,7 +41,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
struct mtype *map = set->data;
init_timer(&map->gc);
- map->gc.data = (unsigned long) set;
+ map->gc.data = (unsigned long)set;
map->gc.function = gc;
map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
@@ -144,10 +144,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (ret == IPSET_ADD_FAILED) {
if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(x, set)))
+ ip_set_timeout_expired(ext_timeout(x, set))) {
ret = 0;
- else if (!(flags & IPSET_FLAG_EXIST))
+ } else if (!(flags & IPSET_FLAG_EXIST)) {
+ set_bit(e->id, map->members);
return -IPSET_ERR_EXIST;
+ }
/* Element is re-added, cleanup extensions */
ip_set_ext_destroy(set, x);
}
@@ -165,6 +167,10 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_init_comment(ext_comment(x, set), ext);
if (SET_WITH_SKBINFO(set))
ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
+
+ /* Activate element */
+ set_bit(e->id, map->members);
+
return 0;
}
@@ -203,10 +209,13 @@ mtype_list(const struct ip_set *set,
struct nlattr *adt, *nested;
void *x;
u32 id, first = cb->args[IPSET_CB_ARG0];
+ int ret = 0;
adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
if (!adt)
return -EMSGSIZE;
+ /* Extensions may be replaced */
+ rcu_read_lock();
for (; cb->args[IPSET_CB_ARG0] < map->elements;
cb->args[IPSET_CB_ARG0]++) {
id = cb->args[IPSET_CB_ARG0];
@@ -214,7 +223,7 @@ mtype_list(const struct ip_set *set,
if (!test_bit(id, map->members) ||
(SET_WITH_TIMEOUT(set) &&
#ifdef IP_SET_BITMAP_STORED_TIMEOUT
- mtype_is_filled((const struct mtype_elem *) x) &&
+ mtype_is_filled((const struct mtype_elem *)x) &&
#endif
ip_set_timeout_expired(ext_timeout(x, set))))
continue;
@@ -222,14 +231,16 @@ mtype_list(const struct ip_set *set,
if (!nested) {
if (id == first) {
nla_nest_cancel(skb, adt);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
+ ret = -EMSGSIZE;
+ goto out;
+ }
+
+ goto nla_put_failure;
}
if (mtype_do_list(skb, map, id, set->dsize))
goto nla_put_failure;
if (ip_set_put_extensions(skb, set, x,
- mtype_is_filled((const struct mtype_elem *) x)))
+ mtype_is_filled((const struct mtype_elem *)x)))
goto nla_put_failure;
ipset_nest_end(skb, nested);
}
@@ -238,29 +249,32 @@ mtype_list(const struct ip_set *set,
/* Set listing finished */
cb->args[IPSET_CB_ARG0] = 0;
- return 0;
+ goto out;
nla_put_failure:
nla_nest_cancel(skb, nested);
if (unlikely(id == first)) {
cb->args[IPSET_CB_ARG0] = 0;
- return -EMSGSIZE;
+ ret = -EMSGSIZE;
}
ipset_nest_end(skb, adt);
- return 0;
+out:
+ rcu_read_unlock();
+ return ret;
}
static void
mtype_gc(unsigned long ul_set)
{
- struct ip_set *set = (struct ip_set *) ul_set;
+ struct ip_set *set = (struct ip_set *)ul_set;
struct mtype *map = set->data;
void *x;
u32 id;
/* We run parallel with other readers (test element)
- * but adding/deleting new entries is locked out */
- read_lock_bh(&set->lock);
+ * but adding/deleting new entries is locked out
+ */
+ spin_lock_bh(&set->lock);
for (id = 0; id < map->elements; id++)
if (mtype_gc_test(id, map, set->dsize)) {
x = get_ext(set, map, id);
@@ -269,7 +283,7 @@ mtype_gc(unsigned long ul_set)
ip_set_ext_destroy(set, x);
}
}
- read_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 55b083ec5..64a564334 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -36,6 +36,7 @@ IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_bitmap:ip");
#define MTYPE bitmap_ip
+#define HOST_MASK 32
/* Type structure */
struct bitmap_ip {
@@ -58,7 +59,7 @@ struct bitmap_ip_adt_elem {
static inline u32
ip_to_id(const struct bitmap_ip *m, u32 ip)
{
- return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts;
+ return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts;
}
/* Common functions */
@@ -80,7 +81,7 @@ static inline int
bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
u32 flags, size_t dsize)
{
- return !!test_and_set_bit(e->id, map->members);
+ return !!test_bit(e->id, map->members);
}
static inline int
@@ -137,20 +138,17 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret = 0;
- if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ if (unlikely(!tb[IPSET_ATTR_IP]))
+ return -IPSET_ERR_PROTOCOL;
+
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -174,11 +172,12 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
- } else
+ } else {
ip_to = ip;
+ }
if (ip_to > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
@@ -189,8 +188,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -277,16 +276,17 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (cidr >= 32)
+ if (cidr >= HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(first_ip, last_ip, cidr);
- } else
+ } else {
return -IPSET_ERR_PROTOCOL;
+ }
if (tb[IPSET_ATTR_NETMASK]) {
netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
- if (netmask > 32)
+ if (netmask > HOST_MASK)
return -IPSET_ERR_INVALID_NETMASK;
first_ip &= ip_set_hostmask(netmask);
@@ -360,7 +360,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -377,6 +378,7 @@ bitmap_ip_init(void)
static void __exit
bitmap_ip_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&bitmap_ip_type);
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 86104744b..143053511 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -36,6 +36,7 @@ IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_bitmap:ip,mac");
#define MTYPE bitmap_ipmac
+#define HOST_MASK 32
#define IP_SET_BITMAP_STORED_TIMEOUT
enum {
@@ -89,7 +90,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
return 0;
elem = get_elem(map->extensions, e->id, dsize);
if (elem->filled == MAC_FILLED)
- return e->ether == NULL ||
+ return !e->ether ||
ether_addr_equal(e->ether, elem->ether);
/* Trigger kernel to fill out the ethernet address */
return -EAGAIN;
@@ -130,7 +131,8 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
/* If MAC is unset yet, we store plain timeout value
* because the timer is not activated yet
* and we can reuse it later when MAC is filled out,
- * possibly by the kernel */
+ * possibly by the kernel
+ */
if (e->ether)
ip_set_timeout_set(timeout, t);
else
@@ -146,28 +148,35 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
struct bitmap_ipmac_elem *elem;
elem = get_elem(map->extensions, e->id, dsize);
- if (test_and_set_bit(e->id, map->members)) {
+ if (test_bit(e->id, map->members)) {
if (elem->filled == MAC_FILLED) {
- if (e->ether && (flags & IPSET_FLAG_EXIST))
- memcpy(elem->ether, e->ether, ETH_ALEN);
+ if (e->ether &&
+ (flags & IPSET_FLAG_EXIST) &&
+ !ether_addr_equal(e->ether, elem->ether)) {
+ /* memcpy isn't atomic */
+ clear_bit(e->id, map->members);
+ smp_mb__after_atomic();
+ ether_addr_copy(elem->ether, e->ether);
+ }
return IPSET_ADD_FAILED;
} else if (!e->ether)
/* Already added without ethernet address */
return IPSET_ADD_FAILED;
/* Fill the MAC address and trigger the timer activation */
- memcpy(elem->ether, e->ether, ETH_ALEN);
+ clear_bit(e->id, map->members);
+ smp_mb__after_atomic();
+ ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
return IPSET_ADD_START_STORED_TIMEOUT;
} else if (e->ether) {
/* We can store MAC too */
- memcpy(elem->ether, e->ether, ETH_ALEN);
+ ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
return 0;
- } else {
- elem->filled = MAC_UNSET;
- /* MAC is not stored yet, don't start timer */
- return IPSET_ADD_STORE_PLAIN_TIMEOUT;
}
+ elem->filled = MAC_UNSET;
+ /* MAC is not stored yet, don't start timer */
+ return IPSET_ADD_STORE_PLAIN_TIMEOUT;
}
static inline int
@@ -238,20 +247,17 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
u32 ip = 0;
int ret = 0;
- if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ if (unlikely(!tb[IPSET_ATTR_IP]))
+ return -IPSET_ERR_PROTOCOL;
+
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -343,11 +349,12 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (cidr >= 32)
+ if (cidr >= HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(first_ip, last_ip, cidr);
- } else
+ } else {
return -IPSET_ERR_PROTOCOL;
+ }
elements = (u64)last_ip - first_ip + 1;
@@ -397,7 +404,8 @@ static struct ip_set_type bitmap_ipmac_type = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -414,6 +422,7 @@ bitmap_ipmac_init(void)
static void __exit
bitmap_ipmac_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&bitmap_ipmac_type);
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 005dd3644..5338ccd5d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -73,7 +73,7 @@ static inline int
bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
struct bitmap_port *map, u32 flags, size_t dsize)
{
- return !!test_and_set_bit(e->id, map->members);
+ return !!test_bit(e->id, map->members);
}
static inline int
@@ -136,19 +136,13 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
u16 port_to;
int ret = 0;
- if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
+ return -IPSET_ERR_PROTOCOL;
+
port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
if (port < map->first_port || port > map->last_port)
return -IPSET_ERR_BITMAP_RANGE;
@@ -168,8 +162,9 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
if (port < map->first_port)
return -IPSET_ERR_BITMAP_RANGE;
}
- } else
+ } else {
port_to = port;
+ }
if (port_to > map->last_port)
return -IPSET_ERR_BITMAP_RANGE;
@@ -180,8 +175,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -294,7 +289,8 @@ static struct ip_set_type bitmap_port_type = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -311,6 +307,7 @@ bitmap_port_init(void)
static void __exit
bitmap_port_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&bitmap_port_type);
}
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index d259da3ce..338b40477 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -32,8 +32,10 @@ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
struct ip_set_net {
struct ip_set * __rcu *ip_set_list; /* all individual sets */
ip_set_id_t ip_set_max; /* max number of sets */
- int is_deleted; /* deleted by ip_set_net_exit */
+ bool is_deleted; /* deleted by ip_set_net_exit */
+ bool is_destroyed; /* all sets are destroyed */
};
+
static int ip_set_net_id __read_mostly;
static inline struct ip_set_net *ip_set_pernet(struct net *net)
@@ -42,7 +44,7 @@ static inline struct ip_set_net *ip_set_pernet(struct net *net)
}
#define IP_SET_INC 64
-#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
+#define STRNCMP(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
static unsigned int max_sets;
@@ -59,8 +61,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
#define ip_set(inst, id) \
ip_set_dereference((inst)->ip_set_list)[id]
-/*
- * The set types are implemented in modules and registered set types
+/* The set types are implemented in modules and registered set types
* can be found in ip_set_type_list. Adding/deleting types is
* serialized by ip_set_type_mutex.
*/
@@ -85,7 +86,7 @@ find_set_type(const char *name, u8 family, u8 revision)
struct ip_set_type *type;
list_for_each_entry_rcu(type, &ip_set_type_list, list)
- if (STREQ(type->name, name) &&
+ if (STRNCMP(type->name, name) &&
(type->family == family ||
type->family == NFPROTO_UNSPEC) &&
revision >= type->revision_min &&
@@ -130,9 +131,10 @@ __find_set_type_get(const char *name, u8 family, u8 revision,
goto unlock;
}
/* Make sure the type is already loaded
- * but we don't support the revision */
+ * but we don't support the revision
+ */
list_for_each_entry_rcu(type, &ip_set_type_list, list)
- if (STREQ(type->name, name)) {
+ if (STRNCMP(type->name, name)) {
err = -IPSET_ERR_FIND_TYPE;
goto unlock;
}
@@ -166,7 +168,7 @@ __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
*min = 255; *max = 0;
rcu_read_lock();
list_for_each_entry_rcu(type, &ip_set_type_list, list)
- if (STREQ(type->name, name) &&
+ if (STRNCMP(type->name, name) &&
(type->family == family ||
type->family == NFPROTO_UNSPEC)) {
found = true;
@@ -208,15 +210,15 @@ ip_set_type_register(struct ip_set_type *type)
pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
type->name, family_name(type->family),
type->revision_min);
- ret = -EINVAL;
- goto unlock;
+ ip_set_type_unlock();
+ return -EINVAL;
}
list_add_rcu(&type->list, &ip_set_type_list);
pr_debug("type %s, family %s, revision %u:%u registered.\n",
type->name, family_name(type->family),
type->revision_min, type->revision_max);
-unlock:
ip_set_type_unlock();
+
return ret;
}
EXPORT_SYMBOL_GPL(ip_set_type_register);
@@ -230,12 +232,12 @@ ip_set_type_unregister(struct ip_set_type *type)
pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
type->name, family_name(type->family),
type->revision_min);
- goto unlock;
+ ip_set_type_unlock();
+ return;
}
list_del_rcu(&type->list);
pr_debug("type %s, family %s with revision min %u unregistered.\n",
type->name, family_name(type->family), type->revision_min);
-unlock:
ip_set_type_unlock();
synchronize_rcu();
@@ -289,7 +291,7 @@ static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
int
ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
{
- struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
+ struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
if (unlikely(!flag_nested(nla)))
return -IPSET_ERR_PROTOCOL;
@@ -306,7 +308,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
int
ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
{
- struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
+ struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
if (unlikely(!flag_nested(nla)))
return -IPSET_ERR_PROTOCOL;
@@ -317,7 +319,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
return -IPSET_ERR_PROTOCOL;
memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
- sizeof(struct in6_addr));
+ sizeof(struct in6_addr));
return 0;
}
EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
@@ -365,7 +367,7 @@ size_t
ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
{
enum ip_set_ext_id id;
- size_t offset = 0;
+ size_t offset = len;
u32 cadt_flags = 0;
if (tb[IPSET_ATTR_CADT_FLAGS])
@@ -375,12 +377,12 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
- offset += ALIGN(len + offset, ip_set_extensions[id].align);
+ offset = ALIGN(offset, ip_set_extensions[id].align);
set->offset[id] = offset;
set->extensions |= ip_set_extensions[id].type;
offset += ip_set_extensions[id].len;
}
- return len + offset;
+ return offset;
}
EXPORT_SYMBOL_GPL(ip_set_elem_len);
@@ -389,13 +391,22 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext *ext)
{
u64 fullmark;
+
+ if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ return -IPSET_ERR_PROTOCOL;
+
if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!(set->extensions & IPSET_EXT_TIMEOUT))
+ if (!SET_WITH_TIMEOUT(set))
return -IPSET_ERR_TIMEOUT;
ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
}
if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
- if (!(set->extensions & IPSET_EXT_COUNTER))
+ if (!SET_WITH_COUNTER(set))
return -IPSET_ERR_COUNTER;
if (tb[IPSET_ATTR_BYTES])
ext->bytes = be64_to_cpu(nla_get_be64(
@@ -405,25 +416,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
tb[IPSET_ATTR_PACKETS]));
}
if (tb[IPSET_ATTR_COMMENT]) {
- if (!(set->extensions & IPSET_EXT_COMMENT))
+ if (!SET_WITH_COMMENT(set))
return -IPSET_ERR_COMMENT;
ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
}
if (tb[IPSET_ATTR_SKBMARK]) {
- if (!(set->extensions & IPSET_EXT_SKBINFO))
+ if (!SET_WITH_SKBINFO(set))
return -IPSET_ERR_SKBINFO;
fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
ext->skbmark = fullmark >> 32;
ext->skbmarkmask = fullmark & 0xffffffff;
}
if (tb[IPSET_ATTR_SKBPRIO]) {
- if (!(set->extensions & IPSET_EXT_SKBINFO))
+ if (!SET_WITH_SKBINFO(set))
return -IPSET_ERR_SKBINFO;
ext->skbprio = be32_to_cpu(nla_get_be32(
tb[IPSET_ATTR_SKBPRIO]));
}
if (tb[IPSET_ATTR_SKBQUEUE]) {
- if (!(set->extensions & IPSET_EXT_SKBINFO))
+ if (!SET_WITH_SKBINFO(set))
return -IPSET_ERR_SKBINFO;
ext->skbqueue = be16_to_cpu(nla_get_be16(
tb[IPSET_ATTR_SKBQUEUE]));
@@ -432,8 +443,32 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
-/*
- * Creating/destroying/renaming/swapping affect the existence and
+int
+ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
+ const void *e, bool active)
+{
+ if (SET_WITH_TIMEOUT(set)) {
+ unsigned long *timeout = ext_timeout(e, set);
+
+ if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+ htonl(active ? ip_set_timeout_get(timeout)
+ : *timeout)))
+ return -EMSGSIZE;
+ }
+ if (SET_WITH_COUNTER(set) &&
+ ip_set_put_counter(skb, ext_counter(e, set)))
+ return -EMSGSIZE;
+ if (SET_WITH_COMMENT(set) &&
+ ip_set_put_comment(skb, ext_comment(e, set)))
+ return -EMSGSIZE;
+ if (SET_WITH_SKBINFO(set) &&
+ ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
+ return -EMSGSIZE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_set_put_extensions);
+
+/* Creating/destroying/renaming/swapping affect the existence and
* the properties of a set. All of these can be executed from userspace
* only and serialized by the nfnl mutex indirectly from nfnetlink.
*
@@ -460,8 +495,7 @@ __ip_set_put(struct ip_set *set)
write_unlock_bh(&ip_set_ref_lock);
}
-/*
- * Add, del and test set entries from kernel.
+/* Add, del and test set entries from kernel.
*
* The set behind the index must exist and must be referenced
* so it can't be destroyed (or changed) under our foot.
@@ -489,23 +523,23 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
dev_net(par->in ? par->in : par->out), index);
int ret = 0;
- BUG_ON(set == NULL);
+ BUG_ON(!set);
pr_debug("set %s, index %u\n", set->name, index);
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return 0;
- read_lock_bh(&set->lock);
+ rcu_read_lock_bh();
ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
- read_unlock_bh(&set->lock);
+ rcu_read_unlock_bh();
if (ret == -EAGAIN) {
/* Type requests element to be completed */
pr_debug("element must be completed, ADD is triggered\n");
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
ret = 1;
} else {
/* --return-nomatch: invert matched element */
@@ -528,16 +562,16 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
dev_net(par->in ? par->in : par->out), index);
int ret;
- BUG_ON(set == NULL);
+ BUG_ON(!set);
pr_debug("set %s, index %u\n", set->name, index);
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
return ret;
}
@@ -551,23 +585,22 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
dev_net(par->in ? par->in : par->out), index);
int ret = 0;
- BUG_ON(set == NULL);
+ BUG_ON(!set);
pr_debug("set %s, index %u\n", set->name, index);
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
return ret;
}
EXPORT_SYMBOL_GPL(ip_set_del);
-/*
- * Find set by name, reference it once. The reference makes sure the
+/* Find set by name, reference it once. The reference makes sure the
* thing pointed to, does not go away under our feet.
*
*/
@@ -581,7 +614,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
rcu_read_lock();
for (i = 0; i < inst->ip_set_max; i++) {
s = rcu_dereference(inst->ip_set_list)[i];
- if (s != NULL && STREQ(s->name, name)) {
+ if (s && STRNCMP(s->name, name)) {
__ip_set_get(s);
index = i;
*set = s;
@@ -594,8 +627,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
}
EXPORT_SYMBOL_GPL(ip_set_get_byname);
-/*
- * If the given set pointer points to a valid set, decrement
+/* If the given set pointer points to a valid set, decrement
* reference count by 1. The caller shall not assume the index
* to be valid, after calling this function.
*
@@ -608,7 +640,7 @@ __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
rcu_read_lock();
set = rcu_dereference(inst->ip_set_list)[index];
- if (set != NULL)
+ if (set)
__ip_set_put(set);
rcu_read_unlock();
}
@@ -622,8 +654,7 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
}
EXPORT_SYMBOL_GPL(ip_set_put_byindex);
-/*
- * Get the name of a set behind a set index.
+/* Get the name of a set behind a set index.
* We assume the set is referenced, so it does exist and
* can't be destroyed. The set cannot be renamed due to
* the referencing either.
@@ -634,7 +665,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index)
{
const struct ip_set *set = ip_set_rcu_get(net, index);
- BUG_ON(set == NULL);
+ BUG_ON(!set);
BUG_ON(set->ref == 0);
/* Referenced, so it's safe */
@@ -642,13 +673,11 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index)
}
EXPORT_SYMBOL_GPL(ip_set_name_byindex);
-/*
- * Routines to call by external subsystems, which do not
+/* Routines to call by external subsystems, which do not
* call nfnl_lock for us.
*/
-/*
- * Find set by index, reference it once. The reference makes sure the
+/* Find set by index, reference it once. The reference makes sure the
* thing pointed to, does not go away under our feet.
*
* The nfnl mutex is used in the function.
@@ -674,8 +703,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
}
EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
-/*
- * If the given set pointer points to a valid set, decrement
+/* If the given set pointer points to a valid set, decrement
* reference count by 1. The caller shall not assume the index
* to be valid, after calling this function.
*
@@ -690,15 +718,14 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index)
nfnl_lock(NFNL_SUBSYS_IPSET);
if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
set = ip_set(inst, index);
- if (set != NULL)
+ if (set)
__ip_set_put(set);
}
nfnl_unlock(NFNL_SUBSYS_IPSET);
}
EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
-/*
- * Communication protocol with userspace over netlink.
+/* Communication protocol with userspace over netlink.
*
* The commands are serialized by the nfnl mutex.
*/
@@ -725,7 +752,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ if (!nlh)
return NULL;
nfmsg = nlmsg_data(nlh);
@@ -758,7 +785,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
*id = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) {
set = ip_set(inst, i);
- if (set != NULL && STREQ(set->name, name)) {
+ if (set && STRNCMP(set->name, name)) {
*id = i;
break;
}
@@ -784,10 +811,10 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
*index = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s == NULL) {
+ if (!s) {
if (*index == IPSET_INVALID_ID)
*index = i;
- } else if (STREQ(name, s->name)) {
+ } else if (STRNCMP(name, s->name)) {
/* Name clash */
*set = s;
return -EEXIST;
@@ -816,18 +843,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set, *clash = NULL;
ip_set_id_t index = IPSET_INVALID_ID;
- struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
+ struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
const char *name, *typename;
u8 family, revision;
u32 flags = flag_exist(nlh);
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
- attr[IPSET_ATTR_TYPENAME] == NULL ||
- attr[IPSET_ATTR_REVISION] == NULL ||
- attr[IPSET_ATTR_FAMILY] == NULL ||
- (attr[IPSET_ATTR_DATA] != NULL &&
+ !attr[IPSET_ATTR_SETNAME] ||
+ !attr[IPSET_ATTR_TYPENAME] ||
+ !attr[IPSET_ATTR_REVISION] ||
+ !attr[IPSET_ATTR_FAMILY] ||
+ (attr[IPSET_ATTR_DATA] &&
!flag_nested(attr[IPSET_ATTR_DATA]))))
return -IPSET_ERR_PROTOCOL;
@@ -838,33 +865,29 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
name, typename, family_name(family), revision);
- /*
- * First, and without any locks, allocate and initialize
+ /* First, and without any locks, allocate and initialize
* a normal base set structure.
*/
- set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
+ set = kzalloc(sizeof(*set), GFP_KERNEL);
if (!set)
return -ENOMEM;
- rwlock_init(&set->lock);
+ spin_lock_init(&set->lock);
strlcpy(set->name, name, IPSET_MAXNAMELEN);
set->family = family;
set->revision = revision;
- /*
- * Next, check that we know the type, and take
+ /* Next, check that we know the type, and take
* a reference on the type, to make sure it stays available
* while constructing our new set.
*
* After referencing the type, we try to create the type
* specific part of the set without holding any locks.
*/
- ret = find_set_type_get(typename, family, revision, &(set->type));
+ ret = find_set_type_get(typename, family, revision, &set->type);
if (ret)
goto out;
- /*
- * Without holding any locks, create private part.
- */
+ /* Without holding any locks, create private part. */
if (attr[IPSET_ATTR_DATA] &&
nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
set->type->create_policy)) {
@@ -878,8 +901,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
/* BTW, ret==0 here. */
- /*
- * Here, we have a valid, constructed set and we are protected
+ /* Here, we have a valid, constructed set and we are protected
* by the nfnl mutex. Find the first free index in ip_set_list
* and check clashing.
*/
@@ -887,7 +909,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
if (ret == -EEXIST) {
/* If this is the same set and requested, ignore error */
if ((flags & IPSET_FLAG_EXIST) &&
- STREQ(set->type->name, clash->type->name) &&
+ STRNCMP(set->type->name, clash->type->name) &&
set->type->family == clash->type->family &&
set->type->revision_min == clash->type->revision_min &&
set->type->revision_max == clash->type->revision_max &&
@@ -902,7 +924,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
/* Wraparound */
goto cleanup;
- list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL);
+ list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
if (!list)
goto cleanup;
/* nfnl mutex is held, both lists are valid */
@@ -916,12 +938,11 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
inst->ip_set_max = i;
kfree(tmp);
ret = 0;
- } else if (ret)
+ } else if (ret) {
goto cleanup;
+ }
- /*
- * Finally! Add our shiny new set to the list, and be done.
- */
+ /* Finally! Add our shiny new set to the list, and be done. */
pr_debug("create: '%s' created with index %u!\n", set->name, index);
ip_set(inst, index) = set;
@@ -946,12 +967,9 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
};
static void
-ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index)
+ip_set_destroy_set(struct ip_set *set)
{
- struct ip_set *set = ip_set(inst, index);
-
pr_debug("set: %s\n", set->name);
- ip_set(inst, index) = NULL;
/* Must call it without holding any lock */
set->variant->destroy(set);
@@ -986,30 +1004,36 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s != NULL && s->ref) {
+ if (s && s->ref) {
ret = -IPSET_ERR_BUSY;
goto out;
}
}
+ inst->is_destroyed = true;
read_unlock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s != NULL)
- ip_set_destroy_set(inst, i);
+ if (s) {
+ ip_set(inst, i) = NULL;
+ ip_set_destroy_set(s);
+ }
}
+ /* Modified by ip_set_destroy() only, which is serialized */
+ inst->is_destroyed = false;
} else {
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
- if (s == NULL) {
+ if (!s) {
ret = -ENOENT;
goto out;
} else if (s->ref) {
ret = -IPSET_ERR_BUSY;
goto out;
}
+ ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
- ip_set_destroy_set(inst, i);
+ ip_set_destroy_set(s);
}
return 0;
out:
@@ -1024,9 +1048,9 @@ ip_set_flush_set(struct ip_set *set)
{
pr_debug("set: %s\n", set->name);
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
set->variant->flush(set);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
}
static int
@@ -1044,12 +1068,12 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s != NULL)
+ if (s)
ip_set_flush_set(s);
}
} else {
s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (s == NULL)
+ if (!s)
return -ENOENT;
ip_set_flush_set(s);
@@ -1081,12 +1105,12 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
- attr[IPSET_ATTR_SETNAME2] == NULL))
+ !attr[IPSET_ATTR_SETNAME] ||
+ !attr[IPSET_ATTR_SETNAME2]))
return -IPSET_ERR_PROTOCOL;
set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (set == NULL)
+ if (!set)
return -ENOENT;
read_lock_bh(&ip_set_ref_lock);
@@ -1098,7 +1122,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s != NULL && STREQ(s->name, name2)) {
+ if (s && STRNCMP(s->name, name2)) {
ret = -IPSET_ERR_EXIST_SETNAME2;
goto out;
}
@@ -1130,23 +1154,24 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
char from_name[IPSET_MAXNAMELEN];
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
- attr[IPSET_ATTR_SETNAME2] == NULL))
+ !attr[IPSET_ATTR_SETNAME] ||
+ !attr[IPSET_ATTR_SETNAME2]))
return -IPSET_ERR_PROTOCOL;
from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&from_id);
- if (from == NULL)
+ if (!from)
return -ENOENT;
to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
&to_id);
- if (to == NULL)
+ if (!to)
return -IPSET_ERR_EXIST_SETNAME2;
/* Features must not change.
- * Not an artificial restriction anymore, as we must prevent
- * possible loops created by swapping in setlist type of sets. */
+ * Not an artifical restriction anymore, as we must prevent
+ * possible loops created by swapping in setlist type of sets.
+ */
if (!(from->type->features == to->type->features &&
from->family == to->family))
return -IPSET_ERR_TYPE_MISMATCH;
@@ -1177,12 +1202,16 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
static int
ip_set_dump_done(struct netlink_callback *cb)
{
- struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
if (cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n",
- ip_set(inst, cb->args[IPSET_CB_INDEX])->name);
- __ip_set_put_byindex(inst,
- (ip_set_id_t) cb->args[IPSET_CB_INDEX]);
+ struct ip_set_net *inst =
+ (struct ip_set_net *)cb->args[IPSET_CB_NET];
+ ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
+ struct ip_set *set = ip_set(inst, index);
+
+ if (set->variant->uref)
+ set->variant->uref(set, cb, false);
+ pr_debug("release set %s\n", set->name);
+ __ip_set_put_byindex(inst, index);
}
return 0;
}
@@ -1204,7 +1233,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
{
struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
- struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
+ struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
struct nlattr *attr = (void *)nlh + min_len;
u32 dump_type;
ip_set_id_t index;
@@ -1213,27 +1242,23 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
nla_parse(cda, IPSET_ATTR_CMD_MAX,
attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
- /* cb->args[IPSET_CB_NET]: net namespace
- * [IPSET_CB_DUMP]: dump single set/all sets
- * [IPSET_CB_INDEX]: set index
- * [IPSET_CB_ARG0]: type specific
- */
-
if (cda[IPSET_ATTR_SETNAME]) {
struct ip_set *set;
set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
&index);
- if (set == NULL)
+ if (!set)
return -ENOENT;
dump_type = DUMP_ONE;
cb->args[IPSET_CB_INDEX] = index;
- } else
+ } else {
dump_type = DUMP_ALL;
+ }
if (cda[IPSET_ATTR_FLAGS]) {
u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
+
dump_type |= (f << 16);
}
cb->args[IPSET_CB_NET] = (unsigned long)inst;
@@ -1251,6 +1276,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
u32 dump_type, dump_flags;
+ bool is_destroyed;
int ret = 0;
if (!cb->args[IPSET_CB_DUMP]) {
@@ -1258,7 +1284,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
if (ret < 0) {
nlh = nlmsg_hdr(cb->skb);
/* We have to create and send the error message
- * manually :-( */
+ * manually :-(
+ */
if (nlh->nlmsg_flags & NLM_F_ACK)
netlink_ack(cb->skb, nlh, ret);
return ret;
@@ -1276,13 +1303,21 @@ dump_last:
pr_debug("dump type, flag: %u %u index: %ld\n",
dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
- index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
+ index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
+ write_lock_bh(&ip_set_ref_lock);
set = ip_set(inst, index);
- if (set == NULL) {
+ is_destroyed = inst->is_destroyed;
+ if (!set || is_destroyed) {
+ write_unlock_bh(&ip_set_ref_lock);
if (dump_type == DUMP_ONE) {
ret = -ENOENT;
goto out;
}
+ if (is_destroyed) {
+ /* All sets are just being destroyed */
+ ret = 0;
+ goto out;
+ }
continue;
}
/* When dumping all sets, we must dump "sorted"
@@ -1290,14 +1325,17 @@ dump_last:
*/
if (dump_type != DUMP_ONE &&
((dump_type == DUMP_ALL) ==
- !!(set->type->features & IPSET_DUMP_LAST)))
+ !!(set->type->features & IPSET_DUMP_LAST))) {
+ write_unlock_bh(&ip_set_ref_lock);
continue;
+ }
pr_debug("List set: %s\n", set->name);
if (!cb->args[IPSET_CB_ARG0]) {
/* Start listing: make sure set won't be destroyed */
pr_debug("reference set\n");
- __ip_set_get(set);
+ set->ref++;
}
+ write_unlock_bh(&ip_set_ref_lock);
nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, flags,
IPSET_CMD_LIST);
@@ -1325,11 +1363,13 @@ dump_last:
goto release_refcount;
if (dump_flags & IPSET_FLAG_LIST_HEADER)
goto next_set;
+ if (set->variant->uref)
+ set->variant->uref(set, cb, true);
/* Fall through and add elements */
default:
- read_lock_bh(&set->lock);
+ rcu_read_lock_bh();
ret = set->variant->list(set, skb, cb);
- read_unlock_bh(&set->lock);
+ rcu_read_unlock_bh();
if (!cb->args[IPSET_CB_ARG0])
/* Set is done, proceed with next one */
goto next_set;
@@ -1341,6 +1381,8 @@ dump_last:
dump_type = DUMP_LAST;
cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
cb->args[IPSET_CB_INDEX] = 0;
+ if (set && set->variant->uref)
+ set->variant->uref(set, cb, false);
goto dump_last;
}
goto out;
@@ -1355,7 +1397,10 @@ next_set:
release_refcount:
/* If there was an error or set is done, release set */
if (ret || !cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n", ip_set(inst, index)->name);
+ set = ip_set(inst, index);
+ if (set->variant->uref)
+ set->variant->uref(set, cb, false);
+ pr_debug("release set %s\n", set->name);
__ip_set_put_byindex(inst, index);
cb->args[IPSET_CB_ARG0] = 0;
}
@@ -1407,9 +1452,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do {
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
retried = true;
} while (ret == -EAGAIN &&
set->variant->resize &&
@@ -1425,12 +1470,12 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
size_t payload = min(SIZE_MAX,
sizeof(*errmsg) + nlmsg_len(nlh));
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
- struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
+ struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
struct nlattr *cmdattr;
u32 *errline;
skb2 = nlmsg_new(payload, GFP_KERNEL);
- if (skb2 == NULL)
+ if (!skb2)
return -ENOMEM;
rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
@@ -1447,7 +1492,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
*errline = lineno;
- netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
/* Signal netlink not to send its ACK/errmsg. */
return -EINTR;
}
@@ -1462,25 +1508,25 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
{
struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *set;
- struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+ struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
const struct nlattr *nla;
u32 flags = flag_exist(nlh);
bool use_lineno;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
+ !attr[IPSET_ATTR_SETNAME] ||
!((attr[IPSET_ATTR_DATA] != NULL) ^
(attr[IPSET_ATTR_ADT] != NULL)) ||
- (attr[IPSET_ATTR_DATA] != NULL &&
+ (attr[IPSET_ATTR_DATA] &&
!flag_nested(attr[IPSET_ATTR_DATA])) ||
- (attr[IPSET_ATTR_ADT] != NULL &&
+ (attr[IPSET_ATTR_ADT] &&
(!flag_nested(attr[IPSET_ATTR_ADT]) ||
- attr[IPSET_ATTR_LINENO] == NULL))))
+ !attr[IPSET_ATTR_LINENO]))))
return -IPSET_ERR_PROTOCOL;
set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (set == NULL)
+ if (!set)
return -ENOENT;
use_lineno = !!attr[IPSET_ATTR_LINENO];
@@ -1517,25 +1563,25 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
{
struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *set;
- struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+ struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
const struct nlattr *nla;
u32 flags = flag_exist(nlh);
bool use_lineno;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
+ !attr[IPSET_ATTR_SETNAME] ||
!((attr[IPSET_ATTR_DATA] != NULL) ^
(attr[IPSET_ATTR_ADT] != NULL)) ||
- (attr[IPSET_ATTR_DATA] != NULL &&
+ (attr[IPSET_ATTR_DATA] &&
!flag_nested(attr[IPSET_ATTR_DATA])) ||
- (attr[IPSET_ATTR_ADT] != NULL &&
+ (attr[IPSET_ATTR_ADT] &&
(!flag_nested(attr[IPSET_ATTR_ADT]) ||
- attr[IPSET_ATTR_LINENO] == NULL))))
+ !attr[IPSET_ATTR_LINENO]))))
return -IPSET_ERR_PROTOCOL;
set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (set == NULL)
+ if (!set)
return -ENOENT;
use_lineno = !!attr[IPSET_ATTR_LINENO];
@@ -1572,26 +1618,26 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
{
struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *set;
- struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+ struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
- attr[IPSET_ATTR_DATA] == NULL ||
+ !attr[IPSET_ATTR_SETNAME] ||
+ !attr[IPSET_ATTR_DATA] ||
!flag_nested(attr[IPSET_ATTR_DATA])))
return -IPSET_ERR_PROTOCOL;
set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (set == NULL)
+ if (!set)
return -ENOENT;
if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
set->type->adt_policy))
return -IPSET_ERR_PROTOCOL;
- read_lock_bh(&set->lock);
+ rcu_read_lock_bh();
ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
- read_unlock_bh(&set->lock);
+ rcu_read_unlock_bh();
/* Userspace can't trigger element to be re-added */
if (ret == -EAGAIN)
ret = 1;
@@ -1613,15 +1659,15 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL))
+ !attr[IPSET_ATTR_SETNAME]))
return -IPSET_ERR_PROTOCOL;
set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (set == NULL)
+ if (!set)
return -ENOENT;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL)
+ if (!skb2)
return -ENOMEM;
nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1670,8 +1716,8 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_TYPENAME] == NULL ||
- attr[IPSET_ATTR_FAMILY] == NULL))
+ !attr[IPSET_ATTR_TYPENAME] ||
+ !attr[IPSET_ATTR_FAMILY]))
return -IPSET_ERR_PROTOCOL;
family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
@@ -1681,7 +1727,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
return ret;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL)
+ if (!skb2)
return -ENOMEM;
nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1726,11 +1772,11 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
struct nlmsghdr *nlh2;
int ret = 0;
- if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
+ if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
return -IPSET_ERR_PROTOCOL;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL)
+ if (!skb2)
return -ENOMEM;
nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1858,7 +1904,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
ret = -EFAULT;
goto done;
}
- op = (unsigned int *) data;
+ op = (unsigned int *)data;
if (*op < IP_SET_OP_VERSION) {
/* Check the version at the beginning of operations */
@@ -1970,10 +2016,11 @@ ip_set_net_init(struct net *net)
if (inst->ip_set_max >= IPSET_INVALID_ID)
inst->ip_set_max = IPSET_INVALID_ID - 1;
- list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL);
+ list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
if (!list)
return -ENOMEM;
- inst->is_deleted = 0;
+ inst->is_deleted = false;
+ inst->is_destroyed = false;
rcu_assign_pointer(inst->ip_set_list, list);
return 0;
}
@@ -1986,12 +2033,14 @@ ip_set_net_exit(struct net *net)
struct ip_set *set = NULL;
ip_set_id_t i;
- inst->is_deleted = 1; /* flag for ip_set_nfnl_put */
+ inst->is_deleted = true; /* flag for ip_set_nfnl_put */
for (i = 0; i < inst->ip_set_max; i++) {
set = ip_set(inst, i);
- if (set != NULL)
- ip_set_destroy_set(inst, i);
+ if (set) {
+ ip_set(inst, i) = NULL;
+ ip_set_destroy_set(set);
+ }
}
kfree(rcu_dereference_protected(inst->ip_set_list, 1));
}
@@ -2003,11 +2052,11 @@ static struct pernet_operations ip_set_net_ops = {
.size = sizeof(struct ip_set_net)
};
-
static int __init
ip_set_init(void)
{
int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+
if (ret != 0) {
pr_err("ip_set: cannot register with nfnetlink.\n");
return ret;
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 29fb01ddf..42c3e3ba1 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -30,7 +30,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
const struct tcphdr *th;
th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph);
- if (th == NULL)
+ if (!th)
/* No choice either */
return false;
@@ -42,7 +42,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
const sctp_sctphdr_t *sh;
sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
- if (sh == NULL)
+ if (!sh)
/* No choice either */
return false;
@@ -55,7 +55,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
const struct udphdr *uh;
uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph);
- if (uh == NULL)
+ if (!uh)
/* No choice either */
return false;
@@ -67,7 +67,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
const struct icmphdr *ic;
ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
- if (ic == NULL)
+ if (!ic)
return false;
*port = (__force __be16)htons((ic->type << 8) | ic->code);
@@ -78,7 +78,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
const struct icmp6hdr *ic;
ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
- if (ic == NULL)
+ if (!ic)
return false;
*port = (__force __be16)
@@ -98,7 +98,7 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
__be16 *port, u8 *proto)
{
const struct iphdr *iph = ip_hdr(skb);
- unsigned int protooff = ip_hdrlen(skb);
+ unsigned int protooff = skb_network_offset(skb) + ip_hdrlen(skb);
int protocol = iph->protocol;
/* See comments at tcp_match in ip_tables.c */
@@ -116,7 +116,8 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
return false;
default:
/* Other protocols doesn't have ports,
- so we can match fragments */
+ * so we can match fragments.
+ */
*proto = protocol;
return true;
}
@@ -135,7 +136,9 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
__be16 frag_off = 0;
nexthdr = ipv6_hdr(skb)->nexthdr;
- protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ protoff = ipv6_skip_exthdr(skb,
+ skb_network_offset(skb) +
+ sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
return false;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 974ff386d..afe905c20 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -10,19 +10,19 @@
#include <linux/rcupdate.h>
#include <linux/jhash.h>
+#include <linux/types.h>
#include <linux/netfilter/ipset/ip_set_timeout.h>
-#ifndef rcu_dereference_bh
-#define rcu_dereference_bh(p) rcu_dereference(p)
-#endif
+
+#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c)
+#define ipset_dereference_protected(p, set) \
+ __ipset_dereference_protected(p, spin_is_locked(&(set)->lock))
#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1)
/* Hashing which uses arrays to resolve clashing. The hash table is resized
* (doubled) when searching becomes too long.
* Internally jhash is used with the assumption that the size of the
- * stored data is a multiple of sizeof(u32). If storage supports timeout,
- * the timeout field must be the last one in the data structure - that field
- * is ignored when computing the hash key.
+ * stored data is a multiple of sizeof(u32).
*
* Readers and resizing
*
@@ -35,7 +35,9 @@
/* Number of elements to store in an initial array block */
#define AHASH_INIT_SIZE 4
/* Max number of elements to store in an array block */
-#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE)
+#define AHASH_MAX_SIZE (3 * AHASH_INIT_SIZE)
+/* Max muber of elements in the array block when tuned */
+#define AHASH_MAX_TUNED 64
/* Max number of elements can be tuned */
#ifdef IP_SET_HASH_WITH_MULTI
@@ -53,8 +55,9 @@ tune_ahash_max(u8 curr, u32 multi)
/* Currently, at listing one hash bucket must fit into a message.
* Therefore we have a hard limit here.
*/
- return n > curr && n <= 64 ? n : curr;
+ return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
}
+
#define TUNE_AHASH_MAX(h, multi) \
((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
#else
@@ -64,18 +67,23 @@ tune_ahash_max(u8 curr, u32 multi)
/* A hash bucket */
struct hbucket {
- void *value; /* the array of the values */
+ struct rcu_head rcu; /* for call_rcu_bh */
+ /* Which positions are used in the array */
+ DECLARE_BITMAP(used, AHASH_MAX_TUNED);
u8 size; /* size of the array */
u8 pos; /* position of the first free entry */
-};
+ unsigned char value[0]; /* the array of the values */
+} __attribute__ ((aligned));
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
+ atomic_t ref; /* References for resizing */
+ atomic_t uref; /* References for dumping */
u8 htable_bits; /* size of hash table == 2^htable_bits */
- struct hbucket bucket[0]; /* hashtable buckets */
+ struct hbucket __rcu *bucket[0]; /* hashtable buckets */
};
-#define hbucket(h, i) (&((h)->bucket[i]))
+#define hbucket(h, i) ((h)->bucket[i])
#ifndef IPSET_NET_COUNT
#define IPSET_NET_COUNT 1
@@ -83,8 +91,8 @@ struct htable {
/* Book-keeping of the prefixes added to the set */
struct net_prefixes {
- u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */
- u8 cidr[IPSET_NET_COUNT]; /* the different cidr values in the set */
+ u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */
+ u8 cidr[IPSET_NET_COUNT]; /* the cidr value */
};
/* Compute the hash table size */
@@ -97,11 +105,11 @@ htable_size(u8 hbits)
if (hbits > 31)
return 0;
hsize = jhash_size(hbits);
- if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket)
+ if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
< hsize)
return 0;
- return hsize * sizeof(struct hbucket) + sizeof(struct htable);
+ return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
}
/* Compute htable_bits from the user input parameter hashsize */
@@ -110,6 +118,7 @@ htable_bits(u32 hashsize)
{
/* Assume that hashsize == 2^htable_bits */
u8 bits = fls(hashsize - 1);
+
if (jhash_size(bits) != hashsize)
/* Round up to the first 2^n value */
bits = fls(hashsize);
@@ -117,30 +126,6 @@ htable_bits(u32 hashsize)
return bits;
}
-static int
-hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
-{
- if (n->pos >= n->size) {
- void *tmp;
-
- if (n->size >= ahash_max)
- /* Trigger rehashing */
- return -EAGAIN;
-
- tmp = kzalloc((n->size + AHASH_INIT_SIZE) * dsize,
- GFP_ATOMIC);
- if (!tmp)
- return -ENOMEM;
- if (n->size) {
- memcpy(tmp, n->value, n->size * dsize);
- kfree(n->value);
- }
- n->value = tmp;
- n->size += AHASH_INIT_SIZE;
- }
- return 0;
-}
-
#ifdef IP_SET_HASH_WITH_NETS
#if IPSET_NET_COUNT > 1
#define __CIDR(cidr, i) (cidr[i])
@@ -149,17 +134,21 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#endif
/* cidr + 1 is stored in net_prefixes to support /0 */
-#define SCIDR(cidr, i) (__CIDR(cidr, i) + 1)
+#define NCIDR_PUT(cidr) ((cidr) + 1)
+#define NCIDR_GET(cidr) ((cidr) - 1)
#ifdef IP_SET_HASH_WITH_NETS_PACKED
/* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */
-#define GCIDR(cidr, i) (__CIDR(cidr, i) + 1)
-#define NCIDR(cidr) (cidr)
+#define DCIDR_PUT(cidr) ((cidr) - 1)
+#define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1)
#else
-#define GCIDR(cidr, i) (__CIDR(cidr, i))
-#define NCIDR(cidr) (cidr - 1)
+#define DCIDR_PUT(cidr) (cidr)
+#define DCIDR_GET(cidr, i) __CIDR(cidr, i)
#endif
+#define INIT_CIDR(cidr, host_mask) \
+ DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
+
#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
#ifdef IP_SET_HASH_WITH_NET0
@@ -180,6 +169,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#undef mtype_data_equal
#undef mtype_do_data_match
#undef mtype_data_set_flags
+#undef mtype_data_reset_elem
#undef mtype_data_reset_flags
#undef mtype_data_netmask
#undef mtype_data_list
@@ -193,7 +183,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#undef mtype_ahash_memsize
#undef mtype_flush
#undef mtype_destroy
-#undef mtype_gc_init
#undef mtype_same_set
#undef mtype_kadt
#undef mtype_uadt
@@ -203,6 +192,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#undef mtype_del
#undef mtype_test_cidrs
#undef mtype_test
+#undef mtype_uref
#undef mtype_expire
#undef mtype_resize
#undef mtype_head
@@ -227,6 +217,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#define mtype_data_list IPSET_TOKEN(MTYPE, _data_list)
#define mtype_data_next IPSET_TOKEN(MTYPE, _data_next)
#define mtype_elem IPSET_TOKEN(MTYPE, _elem)
+
#define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy)
#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup)
#define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr)
@@ -234,7 +225,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize)
#define mtype_flush IPSET_TOKEN(MTYPE, _flush)
#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy)
-#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
@@ -244,23 +234,36 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
#define mtype_test IPSET_TOKEN(MTYPE, _test)
+#define mtype_uref IPSET_TOKEN(MTYPE, _uref)
#define mtype_expire IPSET_TOKEN(MTYPE, _expire)
#define mtype_resize IPSET_TOKEN(MTYPE, _resize)
#define mtype_head IPSET_TOKEN(MTYPE, _head)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
+#ifndef MTYPE
+#error "MTYPE is not defined!"
+#endif
+
+#ifndef HOST_MASK
+#error "HOST_MASK is not defined!"
+#endif
+
#ifndef HKEY_DATALEN
#define HKEY_DATALEN sizeof(struct mtype_elem)
#endif
#define HKEY(data, initval, htable_bits) \
-(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \
+(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \
& jhash_mask(htable_bits))
#ifndef htype
+#ifndef HTYPE
+#error "HTYPE is not defined!"
+#endif /* HTYPE */
#define htype HTYPE
/* The generic hash structure */
@@ -280,18 +283,16 @@ struct htype {
#ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask; /* netmask value for subnets to store */
#endif
-#ifdef IP_SET_HASH_WITH_RBTREE
- struct rb_root rbtree;
-#endif
#ifdef IP_SET_HASH_WITH_NETS
struct net_prefixes nets[0]; /* book-keeping of prefixes */
#endif
};
-#endif
+#endif /* htype */
#ifdef IP_SET_HASH_WITH_NETS
/* Network cidr size book keeping when the hash stores different
- * sized networks */
+ * sized networks. cidr == real cidr + 1 to support /0.
+ */
static void
mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
{
@@ -299,11 +300,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
/* Add in increasing prefix order, so larger cidr first */
for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) {
- if (j != -1)
+ if (j != -1) {
continue;
- else if (h->nets[i].cidr[n] < cidr)
+ } else if (h->nets[i].cidr[n] < cidr) {
j = i;
- else if (h->nets[i].cidr[n] == cidr) {
+ } else if (h->nets[i].cidr[n] == cidr) {
h->nets[cidr - 1].nets[n]++;
return;
}
@@ -322,15 +323,15 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
u8 i, j, net_end = nets_length - 1;
for (i = 0; i < nets_length; i++) {
- if (h->nets[i].cidr[n] != cidr)
- continue;
- h->nets[cidr -1].nets[n]--;
- if (h->nets[cidr -1].nets[n] > 0)
- return;
+ if (h->nets[i].cidr[n] != cidr)
+ continue;
+ h->nets[cidr - 1].nets[n]--;
+ if (h->nets[cidr - 1].nets[n] > 0)
+ return;
for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
- h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
+ h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
h->nets[j].cidr[n] = 0;
- return;
+ return;
}
}
#endif
@@ -341,15 +342,18 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t,
u8 nets_length, size_t dsize)
{
u32 i;
- size_t memsize = sizeof(*h)
- + sizeof(*t)
+ struct hbucket *n;
+ size_t memsize = sizeof(*h) + sizeof(*t);
+
#ifdef IP_SET_HASH_WITH_NETS
- + sizeof(struct net_prefixes) * nets_length
+ memsize += sizeof(struct net_prefixes) * nets_length;
#endif
- + jhash_size(t->htable_bits) * sizeof(struct hbucket);
-
- for (i = 0; i < jhash_size(t->htable_bits); i++)
- memsize += t->bucket[i].size * dsize;
+ for (i = 0; i < jhash_size(t->htable_bits); i++) {
+ n = rcu_dereference_bh(hbucket(t, i));
+ if (!n)
+ continue;
+ memsize += sizeof(struct hbucket) + n->size * dsize;
+ }
return memsize;
}
@@ -364,7 +368,8 @@ mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
int i;
for (i = 0; i < n->pos; i++)
- ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
+ if (test_bit(i, n->used))
+ ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
}
/* Flush a hash type of set: destroy all elements */
@@ -376,16 +381,16 @@ mtype_flush(struct ip_set *set)
struct hbucket *n;
u32 i;
- t = rcu_dereference_bh_nfnl(h->table);
+ t = ipset_dereference_protected(h->table, set);
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = hbucket(t, i);
- if (n->size) {
- if (set->extensions & IPSET_EXT_DESTROY)
- mtype_ext_cleanup(set, n);
- n->size = n->pos = 0;
- /* FIXME: use slab cache */
- kfree(n->value);
- }
+ n = __ipset_dereference_protected(hbucket(t, i), 1);
+ if (!n)
+ continue;
+ if (set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set, n);
+ /* FIXME: use slab cache */
+ rcu_assign_pointer(hbucket(t, i), NULL);
+ kfree_rcu(n, rcu);
}
#ifdef IP_SET_HASH_WITH_NETS
memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
@@ -401,13 +406,13 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = hbucket(t, i);
- if (n->size) {
- if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
- mtype_ext_cleanup(set, n);
- /* FIXME: use slab cache */
- kfree(n->value);
- }
+ n = __ipset_dereference_protected(hbucket(t, i), 1);
+ if (!n)
+ continue;
+ if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
+ mtype_ext_cleanup(set, n);
+ /* FIXME: use slab cache */
+ kfree(n);
}
ip_set_free(t);
@@ -419,13 +424,11 @@ mtype_destroy(struct ip_set *set)
{
struct htype *h = set->data;
- if (set->extensions & IPSET_EXT_TIMEOUT)
+ if (SET_WITH_TIMEOUT(set))
del_timer_sync(&h->gc);
- mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table), true);
-#ifdef IP_SET_HASH_WITH_RBTREE
- rbtree_destroy(&h->rbtree);
-#endif
+ mtype_ahash_destroy(set,
+ __ipset_dereference_protected(h->table, 1), true);
kfree(h);
set->data = NULL;
@@ -437,7 +440,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
struct htype *h = set->data;
init_timer(&h->gc);
- h->gc.data = (unsigned long) set;
+ h->gc.data = (unsigned long)set;
h->gc.function = gc;
h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&h->gc);
@@ -470,61 +473,71 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
struct htable *t;
struct hbucket *n;
struct mtype_elem *data;
- u32 i;
- int j;
+ u32 i, j, d;
#ifdef IP_SET_HASH_WITH_NETS
u8 k;
#endif
- rcu_read_lock_bh();
- t = rcu_dereference_bh(h->table);
+ t = ipset_dereference_protected(h->table, set);
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = hbucket(t, i);
- for (j = 0; j < n->pos; j++) {
+ n = __ipset_dereference_protected(hbucket(t, i), 1);
+ if (!n)
+ continue;
+ for (j = 0, d = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used)) {
+ d++;
+ continue;
+ }
data = ahash_data(n, j, dsize);
if (ip_set_timeout_expired(ext_timeout(data, set))) {
pr_debug("expired %u/%u\n", i, j);
+ clear_bit(j, n->used);
+ smp_mb__after_atomic();
#ifdef IP_SET_HASH_WITH_NETS
for (k = 0; k < IPSET_NET_COUNT; k++)
- mtype_del_cidr(h, SCIDR(data->cidr, k),
- nets_length, k);
+ mtype_del_cidr(h,
+ NCIDR_PUT(DCIDR_GET(data->cidr,
+ k)),
+ nets_length, k);
#endif
ip_set_ext_destroy(set, data);
- if (j != n->pos - 1)
- /* Not last one */
- memcpy(data,
- ahash_data(n, n->pos - 1, dsize),
- dsize);
- n->pos--;
h->elements--;
+ d++;
}
}
- if (n->pos + AHASH_INIT_SIZE < n->size) {
- void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
- * dsize,
- GFP_ATOMIC);
+ if (d >= AHASH_INIT_SIZE) {
+ struct hbucket *tmp = kzalloc(sizeof(*tmp) +
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
if (!tmp)
/* Still try to delete expired elements */
continue;
- n->size -= AHASH_INIT_SIZE;
- memcpy(tmp, n->value, n->size * dsize);
- kfree(n->value);
- n->value = tmp;
+ tmp->size = n->size - AHASH_INIT_SIZE;
+ for (j = 0, d = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, dsize);
+ memcpy(tmp->value + d * dsize, data, dsize);
+ set_bit(j, tmp->used);
+ d++;
+ }
+ tmp->pos = d;
+ rcu_assign_pointer(hbucket(t, i), tmp);
+ kfree_rcu(n, rcu);
}
}
- rcu_read_unlock_bh();
}
static void
mtype_gc(unsigned long ul_set)
{
- struct ip_set *set = (struct ip_set *) ul_set;
+ struct ip_set *set = (struct ip_set *)ul_set;
struct htype *h = set->data;
pr_debug("called\n");
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
mtype_expire(set, h, NLEN(set->family), set->dsize);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&h->gc);
@@ -532,93 +545,152 @@ mtype_gc(unsigned long ul_set)
/* Resize a hash: create a new hash table with doubling the hashsize
* and inserting the elements to it. Repeat until we succeed or
- * fail due to memory pressures. */
+ * fail due to memory pressures.
+ */
static int
mtype_resize(struct ip_set *set, bool retried)
{
struct htype *h = set->data;
- struct htable *t, *orig = rcu_dereference_bh_nfnl(h->table);
- u8 htable_bits = orig->htable_bits;
+ struct htable *t, *orig;
+ u8 htable_bits;
+ size_t dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 flags;
+ struct mtype_elem *tmp;
#endif
struct mtype_elem *data;
struct mtype_elem *d;
struct hbucket *n, *m;
- u32 i, j;
+ u32 i, j, key;
int ret;
- /* Try to cleanup once */
- if (SET_WITH_TIMEOUT(set) && !retried) {
- i = h->elements;
- write_lock_bh(&set->lock);
- mtype_expire(set, set->data, NLEN(set->family), set->dsize);
- write_unlock_bh(&set->lock);
- if (h->elements < i)
- return 0;
- }
+#ifdef IP_SET_HASH_WITH_NETS
+ tmp = kmalloc(dsize, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+#endif
+ rcu_read_lock_bh();
+ orig = rcu_dereference_bh_nfnl(h->table);
+ htable_bits = orig->htable_bits;
+ rcu_read_unlock_bh();
retry:
ret = 0;
htable_bits++;
- pr_debug("attempt to resize set %s from %u to %u, t %p\n",
- set->name, orig->htable_bits, htable_bits, orig);
if (!htable_bits) {
/* In case we have plenty of memory :-) */
pr_warn("Cannot increase the hashsize of set %s further\n",
set->name);
- return -IPSET_ERR_HASH_FULL;
+ ret = -IPSET_ERR_HASH_FULL;
+ goto out;
+ }
+ t = ip_set_alloc(htable_size(htable_bits));
+ if (!t) {
+ ret = -ENOMEM;
+ goto out;
}
- t = ip_set_alloc(sizeof(*t)
- + jhash_size(htable_bits) * sizeof(struct hbucket));
- if (!t)
- return -ENOMEM;
t->htable_bits = htable_bits;
- read_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
+ orig = __ipset_dereference_protected(h->table, 1);
+ /* There can't be another parallel resizing, but dumping is possible */
+ atomic_set(&orig->ref, 1);
+ atomic_inc(&orig->uref);
+ pr_debug("attempt to resize set %s from %u to %u, t %p\n",
+ set->name, orig->htable_bits, htable_bits, orig);
for (i = 0; i < jhash_size(orig->htable_bits); i++) {
- n = hbucket(orig, i);
+ n = __ipset_dereference_protected(hbucket(orig, i), 1);
+ if (!n)
+ continue;
for (j = 0; j < n->pos; j++) {
- data = ahash_data(n, j, set->dsize);
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, dsize);
#ifdef IP_SET_HASH_WITH_NETS
+ /* We have readers running parallel with us,
+ * so the live data cannot be modified.
+ */
flags = 0;
+ memcpy(tmp, data, dsize);
+ data = tmp;
mtype_data_reset_flags(data, &flags);
#endif
- m = hbucket(t, HKEY(data, h->initval, htable_bits));
- ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize);
- if (ret < 0) {
-#ifdef IP_SET_HASH_WITH_NETS
- mtype_data_reset_flags(data, &flags);
-#endif
- read_unlock_bh(&set->lock);
- mtype_ahash_destroy(set, t, false);
- if (ret == -EAGAIN)
- goto retry;
- return ret;
+ key = HKEY(data, h->initval, htable_bits);
+ m = __ipset_dereference_protected(hbucket(t, key), 1);
+ if (!m) {
+ m = kzalloc(sizeof(*m) +
+ AHASH_INIT_SIZE * dsize,
+ GFP_ATOMIC);
+ if (!m) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ m->size = AHASH_INIT_SIZE;
+ RCU_INIT_POINTER(hbucket(t, key), m);
+ } else if (m->pos >= m->size) {
+ struct hbucket *ht;
+
+ if (m->size >= AHASH_MAX(h)) {
+ ret = -EAGAIN;
+ } else {
+ ht = kzalloc(sizeof(*ht) +
+ (m->size + AHASH_INIT_SIZE)
+ * dsize,
+ GFP_ATOMIC);
+ if (!ht)
+ ret = -ENOMEM;
+ }
+ if (ret < 0)
+ goto cleanup;
+ memcpy(ht, m, sizeof(struct hbucket) +
+ m->size * dsize);
+ ht->size = m->size + AHASH_INIT_SIZE;
+ kfree(m);
+ m = ht;
+ RCU_INIT_POINTER(hbucket(t, key), ht);
}
- d = ahash_data(m, m->pos++, set->dsize);
- memcpy(d, data, set->dsize);
+ d = ahash_data(m, m->pos, dsize);
+ memcpy(d, data, dsize);
+ set_bit(m->pos++, m->used);
#ifdef IP_SET_HASH_WITH_NETS
mtype_data_reset_flags(d, &flags);
#endif
}
}
-
rcu_assign_pointer(h->table, t);
- read_unlock_bh(&set->lock);
+
+ spin_unlock_bh(&set->lock);
/* Give time to other readers of the set */
synchronize_rcu_bh();
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
- mtype_ahash_destroy(set, orig, false);
+ /* If there's nobody else dumping the table, destroy it */
+ if (atomic_dec_and_test(&orig->uref)) {
+ pr_debug("Table destroy by resize %p\n", orig);
+ mtype_ahash_destroy(set, orig, false);
+ }
- return 0;
+out:
+#ifdef IP_SET_HASH_WITH_NETS
+ kfree(tmp);
+#endif
+ return ret;
+
+cleanup:
+ atomic_set(&orig->ref, 0);
+ atomic_dec(&orig->uref);
+ spin_unlock_bh(&set->lock);
+ mtype_ahash_destroy(set, t, false);
+ if (ret == -EAGAIN)
+ goto retry;
+ goto out;
}
/* Add an element to a hash and update the internal counters when succeeded,
- * otherwise report the proper error code. */
+ * otherwise report the proper error code.
+ */
static int
mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags)
@@ -627,17 +699,49 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct htable *t;
const struct mtype_elem *d = value;
struct mtype_elem *data;
- struct hbucket *n;
- int i, ret = 0;
- int j = AHASH_MAX(h) + 1;
+ struct hbucket *n, *old = ERR_PTR(-ENOENT);
+ int i, j = -1;
bool flag_exist = flags & IPSET_FLAG_EXIST;
+ bool deleted = false, forceadd = false, reuse = false;
u32 key, multi = 0;
- rcu_read_lock_bh();
- t = rcu_dereference_bh(h->table);
+ if (h->elements >= h->maxelem) {
+ if (SET_WITH_TIMEOUT(set))
+ /* FIXME: when set is full, we slow down here */
+ mtype_expire(set, h, NLEN(set->family), set->dsize);
+ if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set))
+ forceadd = true;
+ }
+
+ t = ipset_dereference_protected(h->table, set);
key = HKEY(value, h->initval, t->htable_bits);
- n = hbucket(t, key);
+ n = __ipset_dereference_protected(hbucket(t, key), 1);
+ if (!n) {
+ if (forceadd) {
+ if (net_ratelimit())
+ pr_warn("Set %s is full, maxelem %u reached\n",
+ set->name, h->maxelem);
+ return -IPSET_ERR_HASH_FULL;
+ } else if (h->elements >= h->maxelem) {
+ goto set_full;
+ }
+ old = NULL;
+ n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
+ GFP_ATOMIC);
+ if (!n)
+ return -ENOMEM;
+ n->size = AHASH_INIT_SIZE;
+ goto copy_elem;
+ }
for (i = 0; i < n->pos; i++) {
+ if (!test_bit(i, n->used)) {
+ /* Reuse first deleted entry */
+ if (j == -1) {
+ deleted = reuse = true;
+ j = i;
+ }
+ continue;
+ }
data = ahash_data(n, i, set->dsize);
if (mtype_data_equal(data, d, &multi)) {
if (flag_exist ||
@@ -645,85 +749,94 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_timeout_expired(ext_timeout(data, set)))) {
/* Just the extensions could be overwritten */
j = i;
- goto reuse_slot;
- } else {
- ret = -IPSET_ERR_EXIST;
- goto out;
+ goto overwrite_extensions;
}
+ return -IPSET_ERR_EXIST;
}
/* Reuse first timed out entry */
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(data, set)) &&
- j != AHASH_MAX(h) + 1)
+ j == -1) {
j = i;
+ reuse = true;
+ }
}
- if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set) && n->pos) {
- /* Choosing the first entry in the array to replace */
- j = 0;
- goto reuse_slot;
- }
- if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
- /* FIXME: when set is full, we slow down here */
- mtype_expire(set, h, NLEN(set->family), set->dsize);
-
- if (h->elements >= h->maxelem) {
- if (net_ratelimit())
- pr_warn("Set %s is full, maxelem %u reached\n",
- set->name, h->maxelem);
- ret = -IPSET_ERR_HASH_FULL;
- goto out;
- }
-
-reuse_slot:
- if (j != AHASH_MAX(h) + 1) {
- /* Fill out reused slot */
+ if (reuse || forceadd) {
data = ahash_data(n, j, set->dsize);
+ if (!deleted) {
#ifdef IP_SET_HASH_WITH_NETS
- for (i = 0; i < IPSET_NET_COUNT; i++) {
- mtype_del_cidr(h, SCIDR(data->cidr, i),
- NLEN(set->family), i);
- mtype_add_cidr(h, SCIDR(d->cidr, i),
- NLEN(set->family), i);
- }
+ for (i = 0; i < IPSET_NET_COUNT; i++)
+ mtype_del_cidr(h,
+ NCIDR_PUT(DCIDR_GET(data->cidr, i)),
+ NLEN(set->family), i);
#endif
- ip_set_ext_destroy(set, data);
- } else {
- /* Use/create a new slot */
+ ip_set_ext_destroy(set, data);
+ h->elements--;
+ }
+ goto copy_data;
+ }
+ if (h->elements >= h->maxelem)
+ goto set_full;
+ /* Create a new slot */
+ if (n->pos >= n->size) {
TUNE_AHASH_MAX(h, multi);
- ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize);
- if (ret != 0) {
- if (ret == -EAGAIN)
- mtype_data_next(&h->next, d);
- goto out;
+ if (n->size >= AHASH_MAX(h)) {
+ /* Trigger rehashing */
+ mtype_data_next(&h->next, d);
+ return -EAGAIN;
}
- data = ahash_data(n, n->pos++, set->dsize);
+ old = n;
+ n = kzalloc(sizeof(*n) +
+ (old->size + AHASH_INIT_SIZE) * set->dsize,
+ GFP_ATOMIC);
+ if (!n)
+ return -ENOMEM;
+ memcpy(n, old, sizeof(struct hbucket) +
+ old->size * set->dsize);
+ n->size = old->size + AHASH_INIT_SIZE;
+ }
+
+copy_elem:
+ j = n->pos++;
+ data = ahash_data(n, j, set->dsize);
+copy_data:
+ h->elements++;
#ifdef IP_SET_HASH_WITH_NETS
- for (i = 0; i < IPSET_NET_COUNT; i++)
- mtype_add_cidr(h, SCIDR(d->cidr, i), NLEN(set->family),
- i);
+ for (i = 0; i < IPSET_NET_COUNT; i++)
+ mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)),
+ NLEN(set->family), i);
#endif
- h->elements++;
- }
memcpy(data, d, sizeof(struct mtype_elem));
+overwrite_extensions:
#ifdef IP_SET_HASH_WITH_NETS
mtype_data_set_flags(data, flags);
#endif
- if (SET_WITH_TIMEOUT(set))
- ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
if (SET_WITH_COUNTER(set))
ip_set_init_counter(ext_counter(data, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(data, set), ext);
if (SET_WITH_SKBINFO(set))
ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
+ /* Must come last for the case when timed out entry is reused */
+ if (SET_WITH_TIMEOUT(set))
+ ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
+ smp_mb__before_atomic();
+ set_bit(j, n->used);
+ if (old != ERR_PTR(-ENOENT)) {
+ rcu_assign_pointer(hbucket(t, key), n);
+ if (old)
+ kfree_rcu(old, rcu);
+ }
-out:
- rcu_read_unlock_bh();
- return ret;
+ return 0;
+set_full:
+ if (net_ratelimit())
+ pr_warn("Set %s is full, maxelem %u reached\n",
+ set->name, h->maxelem);
+ return -IPSET_ERR_HASH_FULL;
}
-/* Delete an element from the hash: swap it with the last element
- * and free up space if possible.
+/* Delete an element from the hash and free up space if possible.
*/
static int
mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
@@ -734,55 +847,70 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
const struct mtype_elem *d = value;
struct mtype_elem *data;
struct hbucket *n;
- int i, ret = -IPSET_ERR_EXIST;
-#ifdef IP_SET_HASH_WITH_NETS
- u8 j;
-#endif
+ int i, j, k, ret = -IPSET_ERR_EXIST;
u32 key, multi = 0;
+ size_t dsize = set->dsize;
- rcu_read_lock_bh();
- t = rcu_dereference_bh(h->table);
+ t = ipset_dereference_protected(h->table, set);
key = HKEY(value, h->initval, t->htable_bits);
- n = hbucket(t, key);
- for (i = 0; i < n->pos; i++) {
- data = ahash_data(n, i, set->dsize);
+ n = __ipset_dereference_protected(hbucket(t, key), 1);
+ if (!n)
+ goto out;
+ for (i = 0, k = 0; i < n->pos; i++) {
+ if (!test_bit(i, n->used)) {
+ k++;
+ continue;
+ }
+ data = ahash_data(n, i, dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(data, set)))
goto out;
- if (i != n->pos - 1)
- /* Not last one */
- memcpy(data, ahash_data(n, n->pos - 1, set->dsize),
- set->dsize);
- n->pos--;
+ ret = 0;
+ clear_bit(i, n->used);
+ smp_mb__after_atomic();
+ if (i + 1 == n->pos)
+ n->pos--;
h->elements--;
#ifdef IP_SET_HASH_WITH_NETS
for (j = 0; j < IPSET_NET_COUNT; j++)
- mtype_del_cidr(h, SCIDR(d->cidr, j), NLEN(set->family),
- j);
+ mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
+ NLEN(set->family), j);
#endif
ip_set_ext_destroy(set, data);
- if (n->pos + AHASH_INIT_SIZE < n->size) {
- void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
- * set->dsize,
- GFP_ATOMIC);
- if (!tmp) {
- ret = 0;
+
+ for (; i < n->pos; i++) {
+ if (!test_bit(i, n->used))
+ k++;
+ }
+ if (n->pos == 0 && k == 0) {
+ rcu_assign_pointer(hbucket(t, key), NULL);
+ kfree_rcu(n, rcu);
+ } else if (k >= AHASH_INIT_SIZE) {
+ struct hbucket *tmp = kzalloc(sizeof(*tmp) +
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
+ if (!tmp)
goto out;
+ tmp->size = n->size - AHASH_INIT_SIZE;
+ for (j = 0, k = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, dsize);
+ memcpy(tmp->value + k * dsize, data, dsize);
+ set_bit(j, tmp->used);
+ k++;
}
- n->size -= AHASH_INIT_SIZE;
- memcpy(tmp, n->value, n->size * set->dsize);
- kfree(n->value);
- n->value = tmp;
+ tmp->pos = k;
+ rcu_assign_pointer(hbucket(t, key), tmp);
+ kfree_rcu(n, rcu);
}
- ret = 0;
goto out;
}
out:
- rcu_read_unlock_bh();
return ret;
}
@@ -801,7 +929,8 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
#ifdef IP_SET_HASH_WITH_NETS
/* Special test function which takes into account the different network
- * sizes added to the set */
+ * sizes added to the set
+ */
static int
mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
const struct ip_set_ext *ext,
@@ -824,16 +953,21 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) {
#if IPSET_NET_COUNT == 2
mtype_data_reset_elem(d, &orig);
- mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0]), false);
+ mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi;
k++) {
- mtype_data_netmask(d, NCIDR(h->nets[k].cidr[1]), true);
+ mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
+ true);
#else
- mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0]));
+ mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
#endif
key = HKEY(d, h->initval, t->htable_bits);
- n = hbucket(t, key);
+ n = rcu_dereference_bh(hbucket(t, key));
+ if (!n)
+ continue;
for (i = 0; i < n->pos; i++) {
+ if (!test_bit(i, n->used))
+ continue;
data = ahash_data(n, i, set->dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
@@ -871,13 +1005,13 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
int i, ret = 0;
u32 key, multi = 0;
- rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
#ifdef IP_SET_HASH_WITH_NETS
/* If we test an IP address and not a network address,
- * try all possible network sizes */
+ * try all possible network sizes
+ */
for (i = 0; i < IPSET_NET_COUNT; i++)
- if (GCIDR(d->cidr, i) != SET_HOST_MASK(set->family))
+ if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family))
break;
if (i == IPSET_NET_COUNT) {
ret = mtype_test_cidrs(set, d, ext, mext, flags);
@@ -886,8 +1020,14 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
#endif
key = HKEY(d, h->initval, t->htable_bits);
- n = hbucket(t, key);
+ n = rcu_dereference_bh(hbucket(t, key));
+ if (!n) {
+ ret = 0;
+ goto out;
+ }
for (i = 0; i < n->pos; i++) {
+ if (!test_bit(i, n->used))
+ continue;
data = ahash_data(n, i, set->dsize);
if (mtype_data_equal(data, d, &multi) &&
!(SET_WITH_TIMEOUT(set) &&
@@ -897,7 +1037,6 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
}
out:
- rcu_read_unlock_bh();
return ret;
}
@@ -909,15 +1048,19 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
const struct htable *t;
struct nlattr *nested;
size_t memsize;
+ u8 htable_bits;
+ rcu_read_lock_bh();
t = rcu_dereference_bh_nfnl(h->table);
memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
+ htable_bits = t->htable_bits;
+ rcu_read_unlock_bh();
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
- htonl(jhash_size(t->htable_bits))) ||
+ htonl(jhash_size(htable_bits))) ||
nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
goto nla_put_failure;
#ifdef IP_SET_HASH_WITH_NETMASK
@@ -941,32 +1084,63 @@ nla_put_failure:
return -EMSGSIZE;
}
+/* Make possible to run dumping parallel with resizing */
+static void
+mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
+{
+ struct htype *h = set->data;
+ struct htable *t;
+
+ if (start) {
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh_nfnl(h->table);
+ atomic_inc(&t->uref);
+ cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
+ rcu_read_unlock_bh();
+ } else if (cb->args[IPSET_CB_PRIVATE]) {
+ t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ /* Resizing didn't destroy the hash table */
+ pr_debug("Table destroy by dump: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+ cb->args[IPSET_CB_PRIVATE] = 0;
+ }
+}
+
/* Reply a LIST/SAVE request: dump the elements of the specified set */
static int
mtype_list(const struct ip_set *set,
struct sk_buff *skb, struct netlink_callback *cb)
{
- const struct htype *h = set->data;
- const struct htable *t = rcu_dereference_bh_nfnl(h->table);
+ const struct htable *t;
struct nlattr *atd, *nested;
const struct hbucket *n;
const struct mtype_elem *e;
u32 first = cb->args[IPSET_CB_ARG0];
/* We assume that one hash bucket fills into one page */
void *incomplete;
- int i;
+ int i, ret = 0;
atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
if (!atd)
return -EMSGSIZE;
+
pr_debug("list hash set %s\n", set->name);
+ t = (const struct htable *)cb->args[IPSET_CB_PRIVATE];
+ /* Expire may replace a hbucket with another one */
+ rcu_read_lock();
for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
cb->args[IPSET_CB_ARG0]++) {
incomplete = skb_tail_pointer(skb);
- n = hbucket(t, cb->args[IPSET_CB_ARG0]);
+ n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
pr_debug("cb->arg bucket: %lu, t %p n %p\n",
cb->args[IPSET_CB_ARG0], t, n);
+ if (!n)
+ continue;
for (i = 0; i < n->pos; i++) {
+ if (!test_bit(i, n->used))
+ continue;
e = ahash_data(n, i, set->dsize);
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
@@ -977,9 +1151,10 @@ mtype_list(const struct ip_set *set,
if (!nested) {
if (cb->args[IPSET_CB_ARG0] == first) {
nla_nest_cancel(skb, atd);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
+ ret = -EMSGSIZE;
+ goto out;
+ }
+ goto nla_put_failure;
}
if (mtype_data_list(skb, e))
goto nla_put_failure;
@@ -992,7 +1167,7 @@ mtype_list(const struct ip_set *set,
/* Set listing finished */
cb->args[IPSET_CB_ARG0] = 0;
- return 0;
+ goto out;
nla_put_failure:
nlmsg_trim(skb, incomplete);
@@ -1000,20 +1175,24 @@ nla_put_failure:
pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
set->name);
cb->args[IPSET_CB_ARG0] = 0;
- return -EMSGSIZE;
+ ret = -EMSGSIZE;
+ } else {
+ ipset_nest_end(skb, atd);
}
- ipset_nest_end(skb, atd);
- return 0;
+out:
+ rcu_read_unlock();
+ return ret;
}
static int
IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
- const struct xt_action_param *par,
- enum ipset_adt adt, struct ip_set_adt_opt *opt);
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt);
static int
IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
+ enum ipset_adt adt, u32 *lineno, u32 flags,
+ bool retried);
static const struct ip_set_type_variant mtype_variant = {
.kadt = mtype_kadt,
@@ -1027,6 +1206,7 @@ static const struct ip_set_type_variant mtype_variant = {
.flush = mtype_flush,
.head = mtype_head,
.list = mtype_list,
+ .uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
};
@@ -1045,7 +1225,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
u8 netmask;
#endif
size_t hsize;
- struct HTYPE *h;
+ struct htype *h;
struct htable *t;
#ifndef IP_SET_PROTO_UNDEF
@@ -1064,12 +1244,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
-#ifdef IP_SET_HASH_WITH_MARKMASK
- !ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK) ||
-#endif
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ /* Separated condition in order to avoid directive in argument list */
+ if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
+ return -IPSET_ERR_PROTOCOL;
+#endif
if (tb[IPSET_ATTR_HASHSIZE]) {
hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
@@ -1092,7 +1274,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
if (tb[IPSET_ATTR_MARKMASK]) {
- markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK]));
+ markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
if (markmask == 0)
return -IPSET_ERR_INVALID_MARKMASK;
@@ -1165,3 +1347,5 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
return 0;
}
#endif /* IP_SET_EMIT_CREATE */
+
+#undef HKEY_DATALEN
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 76959d79e..9d6bf19f7 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -56,15 +56,15 @@ hash_ip4_data_equal(const struct hash_ip4_elem *e1,
return e1->ip == e2->ip;
}
-static inline bool
+static bool
hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e)
{
if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -74,7 +74,6 @@ hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
}
#define MTYPE hash_ip4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -109,20 +108,17 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
u32 ip = 0, ip_to = 0, hosts;
int ret = 0;
- if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ if (unlikely(!tb[IPSET_ATTR_IP]))
+ return -IPSET_ERR_PROTOCOL;
+
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -145,7 +141,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
@@ -162,8 +158,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -196,10 +192,10 @@ hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e)
{
if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -208,12 +204,9 @@ hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e)
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
-#undef HKEY_DATALEN
#define MTYPE hash_ip6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
@@ -247,22 +240,25 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
- if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
- tb[IPSET_ATTR_IP_TO] ||
- tb[IPSET_ATTR_CIDR]))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ if (unlikely(!tb[IPSET_ATTR_IP]))
+ return -IPSET_ERR_PROTOCOL;
+ if (unlikely(tb[IPSET_ATTR_IP_TO]))
+ return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+ if (unlikely(tb[IPSET_ATTR_CIDR])) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+ if (cidr != HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
+
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -301,7 +297,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -318,6 +315,7 @@ hash_ip_init(void)
static void __exit
hash_ip_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_ip_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 7abf9788c..a0695a2ab 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -63,10 +63,10 @@ hash_ipmark4_data_list(struct sk_buff *skb,
if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -76,10 +76,8 @@ hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
next->ip = d->ip;
}
-#define MTYPE hash_ipmark4
-#define PF 4
-#define HOST_MASK 32
-#define HKEY_DATALEN sizeof(struct hash_ipmark4_elem)
+#define MTYPE hash_ipmark4
+#define HOST_MASK 32
#include "ip_set_hash_gen.h"
static int
@@ -110,25 +108,22 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
u32 ip, ip_to = 0;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_attr_netorder(tb, IPSET_ATTR_MARK)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
- ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+ e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
if (adt == IPSET_TEST ||
@@ -147,7 +142,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
@@ -160,8 +155,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -191,10 +186,10 @@ hash_ipmark6_data_list(struct sk_buff *skb,
if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -204,18 +199,13 @@ hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
-#undef HKEY_DATALEN
#define MTYPE hash_ipmark6
-#define PF 6
#define HOST_MASK 128
-#define HKEY_DATALEN sizeof(struct hash_ipmark6_elem)
-#define IP_SET_EMIT_CREATE
+#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
-
static int
hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -243,27 +233,30 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
- tb[IPSET_ATTR_IP_TO] ||
- tb[IPSET_ATTR_CIDR]))
+ !ip_set_attr_netorder(tb, IPSET_ATTR_MARK)))
return -IPSET_ERR_PROTOCOL;
+ if (unlikely(tb[IPSET_ATTR_IP_TO]))
+ return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+ if (unlikely(tb[IPSET_ATTR_CIDR])) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (cidr != HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
- e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
if (adt == IPSET_TEST) {
@@ -274,10 +267,8 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
- return ret;
+ return 0;
}
static struct ip_set_type hash_ipmark_type __read_mostly = {
@@ -307,7 +298,8 @@ static struct ip_set_type hash_ipmark_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -324,6 +316,7 @@ hash_ipmark_init(void)
static void __exit
hash_ipmark_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_ipmark_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index dcbcceb9a..9d84b3dff 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -69,10 +69,10 @@ hash_ipport4_data_list(struct sk_buff *skb,
nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -83,10 +83,8 @@ hash_ipport4_data_next(struct hash_ipport4_elem *next,
next->port = d->port;
}
-#define MTYPE hash_ipport4
-#define PF 4
-#define HOST_MASK 32
-#define HKEY_DATALEN sizeof(struct hash_ipport4_elem)
+#define MTYPE hash_ipport4
+#define HOST_MASK 32
#include "ip_set_hash_gen.h"
static int
@@ -118,29 +116,23 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
bool with_ports = false;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
- ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -148,8 +140,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMP))
e.port = 0;
@@ -171,7 +164,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
@@ -195,8 +188,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
}
return ret;
@@ -231,10 +224,10 @@ hash_ipport6_data_list(struct sk_buff *skb,
nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -245,15 +238,11 @@ hash_ipport6_data_next(struct hash_ipport4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
-#undef HKEY_DATALEN
#define MTYPE hash_ipport6
-#define PF 6
#define HOST_MASK 128
-#define HKEY_DATALEN sizeof(struct hash_ipport6_elem)
-#define IP_SET_EMIT_CREATE
+#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
static int
@@ -285,31 +274,31 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
bool with_ports = false;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
- tb[IPSET_ATTR_IP_TO] ||
- tb[IPSET_ATTR_CIDR]))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
return -IPSET_ERR_PROTOCOL;
+ if (unlikely(tb[IPSET_ATTR_IP_TO]))
+ return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+ if (unlikely(tb[IPSET_ATTR_CIDR])) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (cidr != HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -317,8 +306,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMPV6))
e.port = 0;
@@ -341,8 +331,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -376,7 +366,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -393,6 +384,7 @@ hash_ipport_init(void)
static void __exit
hash_ipport_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_ipport_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 7ef93fc88..215b7b942 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -63,17 +63,17 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
static bool
hash_ipportip4_data_list(struct sk_buff *skb,
- const struct hash_ipportip4_elem *data)
+ const struct hash_ipportip4_elem *data)
{
if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) ||
nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -86,7 +86,6 @@ hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
/* Common functions */
#define MTYPE hash_ipportip4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -120,22 +119,19 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
bool with_ports = false;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
- ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -143,10 +139,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -154,8 +147,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMP))
e.port = 0;
@@ -177,7 +171,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
@@ -201,8 +195,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
}
return ret;
@@ -240,10 +234,10 @@ hash_ipportip6_data_list(struct sk_buff *skb,
nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -254,11 +248,9 @@ hash_ipportip6_data_next(struct hash_ipportip4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_ipportip6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -293,24 +285,27 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
bool with_ports = false;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
- tb[IPSET_ATTR_IP_TO] ||
- tb[IPSET_ATTR_CIDR]))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
return -IPSET_ERR_PROTOCOL;
+ if (unlikely(tb[IPSET_ATTR_IP_TO]))
+ return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+ if (unlikely(tb[IPSET_ATTR_CIDR])) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (cidr != HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -318,10 +313,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -329,8 +321,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMPV6))
e.port = 0;
@@ -353,8 +346,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -388,7 +381,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -405,6 +399,7 @@ hash_ipportip_init(void)
static void __exit
hash_ipportip_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_ipportip_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index b6012ad92..9ca719625 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -114,10 +114,10 @@ hash_ipportnet4_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -130,7 +130,6 @@ hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
}
#define MTYPE hash_ipportnet4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -142,7 +141,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -174,23 +173,20 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
u8 cidr;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -205,10 +201,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
e.cidr = cidr - 1;
}
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -216,14 +209,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMP))
e.port = 0;
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -249,7 +244,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
@@ -270,8 +265,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip2_from, ip2_to);
if (ip2_from + UINT_MAX == ip2_to)
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
+ }
if (retried)
ip = ntohl(h->next.ip);
@@ -294,8 +290,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
ip2 = ip2_last + 1;
}
}
@@ -367,10 +363,10 @@ hash_ipportnet6_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -381,11 +377,9 @@ hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_ipportnet6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -398,7 +392,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet6_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -429,27 +423,28 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
u8 cidr;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
- tb[IPSET_ATTR_IP_TO] ||
- tb[IPSET_ATTR_CIDR]))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+ if (unlikely(tb[IPSET_ATTR_CIDR])) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (cidr != HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -466,10 +461,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
ip6_netmask(&e.ip2, e.cidr + 1);
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -477,14 +469,16 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMPV6))
e.port = 0;
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -508,8 +502,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -547,7 +541,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -564,6 +559,7 @@ hash_ipportnet_init(void)
static void __exit
hash_ipportnet_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_ipportnet_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index 65690b52a..f1e7d2c0f 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -52,7 +52,12 @@ hash_mac4_data_equal(const struct hash_mac4_elem *e1,
static inline bool
hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
{
- return nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether);
+ if (nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
+ goto nla_put_failure;
+ return false;
+
+nla_put_failure:
+ return true;
}
static inline void
@@ -62,7 +67,6 @@ hash_mac4_data_next(struct hash_mac4_elem *next,
}
#define MTYPE hash_mac4
-#define PF 4
#define HOST_MASK 32
#define IP_SET_EMIT_CREATE
#define IP_SET_PROTO_UNDEF
@@ -85,10 +89,10 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
return 0;
if (skb_mac_header(skb) < skb->head ||
- (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+ (skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
- memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
return -EINVAL;
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
@@ -103,22 +107,16 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
- if (unlikely(!tb[IPSET_ATTR_ETHER] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!tb[IPSET_ATTR_ETHER]))
+ return -IPSET_ERR_PROTOCOL;
+
ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]));
if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
return -IPSET_ERR_HASH_ELEM;
@@ -149,7 +147,8 @@ static struct ip_set_type hash_mac_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -166,6 +165,7 @@ hash_mac_init(void)
static void __exit
hash_mac_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_mac_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 6b3ac10ac..3e4bffdc1 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -95,10 +95,10 @@ hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -109,7 +109,6 @@ hash_net4_data_next(struct hash_net4_elem *next,
}
#define MTYPE hash_net4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -121,7 +120,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -147,21 +146,18 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
u32 ip = 0, ip_to = 0, last;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -173,6 +169,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -180,7 +177,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
e.ip = htonl(ip & ip_set_hostmask(e.cidr));
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt, set) ? -ret:
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -202,8 +199,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
ip = last + 1;
}
return ret;
@@ -264,10 +261,10 @@ hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data)
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -277,11 +274,9 @@ hash_net6_data_next(struct hash_net4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_net6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -294,7 +289,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net6_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -318,36 +313,34 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- if (tb[IPSET_ATTR_CIDR])
+ if (tb[IPSET_ATTR_CIDR]) {
e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
-
- if (!e.cidr || e.cidr > HOST_MASK)
- return -IPSET_ERR_INVALID_CIDR;
+ if (!e.cidr || e.cidr > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
ip6_netmask(&e.ip, e.cidr);
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -383,7 +376,8 @@ static struct ip_set_type hash_net_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -400,6 +394,7 @@ hash_net_init(void)
static void __exit
hash_net_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_net_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 380ef5148..43d8c9896 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -13,7 +13,6 @@
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/random.h>
-#include <linux/rbtree.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/netlink.h>
@@ -37,88 +36,13 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:net,iface");
-/* Interface name rbtree */
-
-struct iface_node {
- struct rb_node node;
- char iface[IFNAMSIZ];
-};
-
-#define iface_data(n) (rb_entry(n, struct iface_node, node)->iface)
-
-static void
-rbtree_destroy(struct rb_root *root)
-{
- struct iface_node *node, *next;
-
- rbtree_postorder_for_each_entry_safe(node, next, root, node)
- kfree(node);
-
- *root = RB_ROOT;
-}
-
-static int
-iface_test(struct rb_root *root, const char **iface)
-{
- struct rb_node *n = root->rb_node;
-
- while (n) {
- const char *d = iface_data(n);
- int res = strcmp(*iface, d);
-
- if (res < 0)
- n = n->rb_left;
- else if (res > 0)
- n = n->rb_right;
- else {
- *iface = d;
- return 1;
- }
- }
- return 0;
-}
-
-static int
-iface_add(struct rb_root *root, const char **iface)
-{
- struct rb_node **n = &(root->rb_node), *p = NULL;
- struct iface_node *d;
-
- while (*n) {
- char *ifname = iface_data(*n);
- int res = strcmp(*iface, ifname);
-
- p = *n;
- if (res < 0)
- n = &((*n)->rb_left);
- else if (res > 0)
- n = &((*n)->rb_right);
- else {
- *iface = ifname;
- return 0;
- }
- }
-
- d = kzalloc(sizeof(*d), GFP_ATOMIC);
- if (!d)
- return -ENOMEM;
- strcpy(d->iface, *iface);
-
- rb_link_node(&d->node, p, n);
- rb_insert_color(&d->node, root);
-
- *iface = d->iface;
- return 0;
-}
-
/* Type specific function prefix */
#define HTYPE hash_netiface
#define IP_SET_HASH_WITH_NETS
-#define IP_SET_HASH_WITH_RBTREE
#define IP_SET_HASH_WITH_MULTI
#define IP_SET_HASH_WITH_NET0
-#define STREQ(a, b) (strcmp(a, b) == 0)
+#define STRLCPY(a, b) strlcpy(a, b, IFNAMSIZ)
/* IPv4 variant */
@@ -137,7 +61,7 @@ struct hash_netiface4_elem {
u8 cidr;
u8 nomatch;
u8 elem;
- const char *iface;
+ char iface[IFNAMSIZ];
};
/* Common functions */
@@ -151,7 +75,7 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
ip1->cidr == ip2->cidr &&
(++*multi) &&
ip1->physdev == ip2->physdev &&
- ip1->iface == ip2->iface;
+ strcmp(ip1->iface, ip2->iface) == 0;
}
static inline int
@@ -193,10 +117,10 @@ hash_netiface4_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -207,7 +131,6 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next,
}
#define MTYPE hash_netiface4
-#define PF 4
#define HOST_MASK 32
#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
#include "ip_set_hash_gen.h"
@@ -220,7 +143,7 @@ static const char *get_physindev_name(const struct sk_buff *skb)
return dev ? dev->name : NULL;
}
-static const char *get_phyoutdev_name(const struct sk_buff *skb)
+static const char *get_physoutdev_name(const struct sk_buff *skb)
{
struct net_device *dev = nf_bridge_get_physoutdev(skb);
@@ -236,11 +159,10 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
.elem = 1,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- int ret;
if (e.cidr == 0)
return -EINVAL;
@@ -250,35 +172,25 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
e.ip &= ip_set_netmask(e.cidr);
-#define IFACE(dir) (par->dir ? par->dir->name : NULL)
+#define IFACE(dir) (par->dir ? par->dir->name : "")
#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- e.iface = SRCDIR ? get_physindev_name(skb) :
- get_phyoutdev_name(skb);
+ const char *eiface = SRCDIR ? get_physindev_name(skb) :
+ get_physoutdev_name(skb);
- if (!e.iface)
+ if (!eiface)
return -EINVAL;
+ STRLCPY(e.iface, eiface);
e.physdev = 1;
-#else
- e.iface = NULL;
#endif
- } else
- e.iface = SRCDIR ? IFACE(in) : IFACE(out);
+ } else {
+ STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+ }
- if (!e.iface)
+ if (strlen(e.iface) == 0)
return -EINVAL;
- ret = iface_test(&h->rbtree, &e.iface);
- if (adt == IPSET_ADD) {
- if (!ret) {
- ret = iface_add(&h->rbtree, &e.iface);
- if (ret)
- return ret;
- }
- } else if (!ret)
- return ret;
-
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
@@ -291,25 +203,21 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, last;
- char iface[IFNAMSIZ];
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!tb[IPSET_ATTR_IFACE] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -318,21 +226,11 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
-
- strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE]));
- e.iface = iface;
- ret = iface_test(&h->rbtree, &e.iface);
- if (adt == IPSET_ADD) {
- if (!ret) {
- ret = iface_add(&h->rbtree, &e.iface);
- if (ret)
- return ret;
- }
- } else if (!ret)
- return ret;
+ nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_PHYSDEV)
e.physdev = 1;
if (cadt_flags & IPSET_FLAG_NOMATCH)
@@ -353,8 +251,9 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
+ }
if (retried)
ip = ntohl(h->next.ip);
@@ -365,8 +264,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
ip = last + 1;
}
return ret;
@@ -388,7 +287,7 @@ struct hash_netiface6_elem {
u8 cidr;
u8 nomatch;
u8 elem;
- const char *iface;
+ char iface[IFNAMSIZ];
};
/* Common functions */
@@ -402,7 +301,7 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
ip1->cidr == ip2->cidr &&
(++*multi) &&
ip1->physdev == ip2->physdev &&
- ip1->iface == ip2->iface;
+ strcmp(ip1->iface, ip2->iface) == 0;
}
static inline int
@@ -444,10 +343,10 @@ hash_netiface6_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -457,12 +356,9 @@ hash_netiface6_data_next(struct hash_netiface4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
-#undef HKEY_DATALEN
#define MTYPE hash_netiface6
-#define PF 6
#define HOST_MASK 128
#define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed)
#define IP_SET_EMIT_CREATE
@@ -476,11 +372,10 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface6_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
.elem = 1,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- int ret;
if (e.cidr == 0)
return -EINVAL;
@@ -492,85 +387,64 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- e.iface = SRCDIR ? get_physindev_name(skb) :
- get_phyoutdev_name(skb);
- if (!e.iface)
- return -EINVAL;
+ const char *eiface = SRCDIR ? get_physindev_name(skb) :
+ get_physoutdev_name(skb);
+ if (!eiface)
+ return -EINVAL;
+ STRLCPY(e.iface, eiface);
e.physdev = 1;
-#else
- e.iface = NULL;
#endif
- } else
- e.iface = SRCDIR ? IFACE(in) : IFACE(out);
+ } else {
+ STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+ }
- if (!e.iface)
+ if (strlen(e.iface) == 0)
return -EINVAL;
- ret = iface_test(&h->rbtree, &e.iface);
- if (adt == IPSET_ADD) {
- if (!ret) {
- ret = iface_add(&h->rbtree, &e.iface);
- if (ret)
- return ret;
- }
- } else if (!ret)
- return ret;
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- char iface[IFNAMSIZ];
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!tb[IPSET_ATTR_IFACE] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- if (tb[IPSET_ATTR_CIDR])
+ if (tb[IPSET_ATTR_CIDR]) {
e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (e.cidr > HOST_MASK)
- return -IPSET_ERR_INVALID_CIDR;
+ if (e.cidr > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
+
ip6_netmask(&e.ip, e.cidr);
- strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE]));
- e.iface = iface;
- ret = iface_test(&h->rbtree, &e.iface);
- if (adt == IPSET_ADD) {
- if (!ret) {
- ret = iface_add(&h->rbtree, &e.iface);
- if (ret)
- return ret;
- }
- } else if (!ret)
- return ret;
+ nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_PHYSDEV)
e.physdev = 1;
if (cadt_flags & IPSET_FLAG_NOMATCH)
@@ -613,7 +487,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -630,6 +505,7 @@ hash_netiface_init(void)
static void __exit
hash_netiface_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_netiface_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index ea8772afb..3c862c0a7 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -57,8 +57,8 @@ struct hash_netnet4_elem {
static inline bool
hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
- const struct hash_netnet4_elem *ip2,
- u32 *multi)
+ const struct hash_netnet4_elem *ip2,
+ u32 *multi)
{
return ip1->ipcmp == ip2->ipcmp &&
ip1->ccmp == ip2->ccmp;
@@ -84,7 +84,7 @@ hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
static inline void
hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
- struct hash_netnet4_elem *orig)
+ struct hash_netnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
@@ -103,7 +103,7 @@ hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
static bool
hash_netnet4_data_list(struct sk_buff *skb,
- const struct hash_netnet4_elem *data)
+ const struct hash_netnet4_elem *data)
{
u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
@@ -122,28 +122,27 @@ nla_put_failure:
static inline void
hash_netnet4_data_next(struct hash_netnet4_elem *next,
- const struct hash_netnet4_elem *d)
+ const struct hash_netnet4_elem *d)
{
next->ipcmp = d->ipcmp;
}
#define MTYPE hash_netnet4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
static int
hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
- const struct xt_action_param *par,
- enum ipset_adt adt, struct ip_set_adt_opt *opt)
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
const struct hash_netnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
- e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+ e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
@@ -157,53 +156,50 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
static int
hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_netnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netnet4_elem e = { };
+ struct hash_netnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, last;
u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
- u8 cidr, cidr2;
int ret;
- e.cidr[0] = e.cidr[1] = HOST_MASK;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
if (tb[IPSET_ATTR_CIDR]) {
- cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > HOST_MASK)
+ e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- e.cidr[0] = cidr;
}
if (tb[IPSET_ATTR_CIDR2]) {
- cidr2 = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
- if (!cidr2 || cidr2 > HOST_MASK)
+ e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+ if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- e.cidr[1] = cidr2;
}
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -226,8 +222,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (unlikely(ip + UINT_MAX == ip_to))
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
+ }
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
@@ -238,28 +235,27 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip2_from, ip2_to);
if (unlikely(ip2_from + UINT_MAX == ip2_to))
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+ }
if (retried)
ip = ntohl(h->next.ip[0]);
while (!after(ip, ip_to)) {
e.ip[0] = htonl(ip);
- last = ip_set_range_to_cidr(ip, ip_to, &cidr);
- e.cidr[0] = cidr;
+ last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
ip2 = (retried &&
ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
: ip2_from;
while (!after(ip2, ip2_to)) {
e.ip[1] = htonl(ip2);
- last2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr2);
- e.cidr[1] = cidr2;
+ last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
ip2 = last2 + 1;
}
ip = last + 1;
@@ -283,8 +279,8 @@ struct hash_netnet6_elem {
static inline bool
hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
- const struct hash_netnet6_elem *ip2,
- u32 *multi)
+ const struct hash_netnet6_elem *ip2,
+ u32 *multi)
{
return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
@@ -311,7 +307,7 @@ hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
static inline void
hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
- struct hash_netnet6_elem *orig)
+ struct hash_netnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
@@ -330,7 +326,7 @@ hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
static bool
hash_netnet6_data_list(struct sk_buff *skb,
- const struct hash_netnet6_elem *data)
+ const struct hash_netnet6_elem *data)
{
u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
@@ -349,34 +345,32 @@ nla_put_failure:
static inline void
hash_netnet6_data_next(struct hash_netnet4_elem *next,
- const struct hash_netnet6_elem *d)
+ const struct hash_netnet6_elem *d)
{
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_netnet6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
static int
hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
- const struct xt_action_param *par,
- enum ipset_adt adt, struct ip_set_adt_opt *opt)
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
const struct hash_netnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
- e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+ e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
- e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK;
+ e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6);
ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6);
@@ -388,50 +382,52 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
static int
hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netnet6_elem e = { };
+ struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
- e.cidr[0] = e.cidr[1] = HOST_MASK;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]);
+ if (ret)
+ return ret;
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) ||
- ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]);
if (ret)
return ret;
- if (tb[IPSET_ATTR_CIDR])
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ if (tb[IPSET_ATTR_CIDR]) {
e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
- if (tb[IPSET_ATTR_CIDR2])
+ if (tb[IPSET_ATTR_CIDR2]) {
e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
-
- if (!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] ||
- e.cidr[1] > HOST_MASK)
- return -IPSET_ERR_INVALID_CIDR;
+ if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
ip6_netmask(&e.ip[0], e.cidr[0]);
ip6_netmask(&e.ip[1], e.cidr[1]);
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -470,7 +466,8 @@ static struct ip_set_type hash_netnet_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -487,6 +484,7 @@ hash_netnet_init(void)
static void __exit
hash_netnet_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_netnet_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index c0ddb58d1..731813e0f 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -110,10 +110,10 @@ hash_netport4_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -125,7 +125,6 @@ hash_netport4_data_next(struct hash_netport4_elem *next,
}
#define MTYPE hash_netport4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -137,7 +136,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -167,23 +166,20 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
u8 cidr;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -194,10 +190,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
e.cidr = cidr - 1;
}
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -205,8 +198,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMP))
e.port = 0;
@@ -215,6 +209,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -240,8 +235,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
+ }
if (retried)
ip = ntohl(h->next.ip);
@@ -257,8 +253,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
ip = last + 1;
}
@@ -326,10 +322,10 @@ hash_netport6_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -340,11 +336,9 @@ hash_netport6_data_next(struct hash_netport4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_netport6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -357,7 +351,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport6_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -387,25 +381,22 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
u8 cidr;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -417,10 +408,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
}
ip6_netmask(&e.ip, e.cidr + 1);
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -428,14 +416,16 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMPV6))
e.port = 0;
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -459,8 +449,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -495,7 +485,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -512,6 +503,7 @@ hash_netport_init(void)
static void __exit
hash_netport_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_netport_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index bfaa94c7b..0c68734f5 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -54,7 +54,7 @@ struct hash_netportnet4_elem {
u16 ccmp;
};
u16 padding;
- u8 nomatch:1;
+ u8 nomatch;
u8 proto;
};
@@ -62,8 +62,8 @@ struct hash_netportnet4_elem {
static inline bool
hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
- const struct hash_netportnet4_elem *ip2,
- u32 *multi)
+ const struct hash_netportnet4_elem *ip2,
+ u32 *multi)
{
return ip1->ipcmp == ip2->ipcmp &&
ip1->ccmp == ip2->ccmp &&
@@ -91,7 +91,7 @@ hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
static inline void
hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
- struct hash_netportnet4_elem *orig)
+ struct hash_netportnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
@@ -111,7 +111,7 @@ hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
static bool
hash_netportnet4_data_list(struct sk_buff *skb,
- const struct hash_netportnet4_elem *data)
+ const struct hash_netportnet4_elem *data)
{
u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
@@ -124,37 +124,36 @@ hash_netportnet4_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
- const struct hash_netportnet4_elem *d)
+ const struct hash_netportnet4_elem *d)
{
next->ipcmp = d->ipcmp;
next->port = d->port;
}
#define MTYPE hash_netportnet4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
static int
hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
- const struct xt_action_param *par,
- enum ipset_adt adt, struct ip_set_adt_opt *opt)
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
- e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+ e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
@@ -172,58 +171,51 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
static int
hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netportnet4_elem e = { };
+ struct hash_netportnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
bool with_ports = false;
- u8 cidr, cidr2;
int ret;
- e.cidr[0] = e.cidr[1] = HOST_MASK;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from);
+ if (ret)
+ return ret;
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
if (tb[IPSET_ATTR_CIDR]) {
- cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > HOST_MASK)
+ e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- e.cidr[0] = cidr;
}
if (tb[IPSET_ATTR_CIDR2]) {
- cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
- if (!cidr || cidr > HOST_MASK)
+ e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+ if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- e.cidr[1] = cidr;
}
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -231,14 +223,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMP))
e.port = 0;
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -262,8 +256,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (unlikely(ip + UINT_MAX == ip_to))
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
+ }
port_to = port = ntohs(e.port);
if (tb[IPSET_ATTR_PORT_TO]) {
@@ -281,16 +276,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip2_from, ip2_to);
if (unlikely(ip2_from + UINT_MAX == ip2_to))
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+ }
if (retried)
ip = ntohl(h->next.ip[0]);
while (!after(ip, ip_to)) {
e.ip[0] = htonl(ip);
- ip_last = ip_set_range_to_cidr(ip, ip_to, &cidr);
- e.cidr[0] = cidr;
+ ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
@@ -301,13 +296,12 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
while (!after(ip2, ip2_to)) {
e.ip[1] = htonl(ip2);
ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
- &cidr2);
- e.cidr[1] = cidr2;
+ &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
ip2 = ip2_last + 1;
}
}
@@ -326,7 +320,7 @@ struct hash_netportnet6_elem {
u16 ccmp;
};
u16 padding;
- u8 nomatch:1;
+ u8 nomatch;
u8 proto;
};
@@ -334,8 +328,8 @@ struct hash_netportnet6_elem {
static inline bool
hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
- const struct hash_netportnet6_elem *ip2,
- u32 *multi)
+ const struct hash_netportnet6_elem *ip2,
+ u32 *multi)
{
return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
@@ -364,7 +358,7 @@ hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
static inline void
hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
- struct hash_netportnet6_elem *orig)
+ struct hash_netportnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
@@ -384,7 +378,7 @@ hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
static bool
hash_netportnet6_data_list(struct sk_buff *skb,
- const struct hash_netportnet6_elem *data)
+ const struct hash_netportnet6_elem *data)
{
u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
@@ -397,41 +391,39 @@ hash_netportnet6_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
- const struct hash_netportnet6_elem *d)
+ const struct hash_netportnet6_elem *d)
{
next->port = d->port;
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_netportnet6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
static int
hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
- const struct xt_action_param *par,
- enum ipset_adt adt, struct ip_set_adt_opt *opt)
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
- e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+ e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
@@ -449,57 +441,55 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
static int
hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netportnet6_elem e = { };
+ struct hash_netportnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
int ret;
- e.cidr[0] = e.cidr[1] = HOST_MASK;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]);
+ if (ret)
+ return ret;
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) ||
- ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]);
if (ret)
return ret;
- if (tb[IPSET_ATTR_CIDR])
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ if (tb[IPSET_ATTR_CIDR]) {
e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
- if (tb[IPSET_ATTR_CIDR2])
+ if (tb[IPSET_ATTR_CIDR2]) {
e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
-
- if (unlikely(!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] ||
- e.cidr[1] > HOST_MASK))
- return -IPSET_ERR_INVALID_CIDR;
+ if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
ip6_netmask(&e.ip[0], e.cidr[0]);
ip6_netmask(&e.ip[1], e.cidr[1]);
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -507,14 +497,16 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMPV6))
e.port = 0;
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -538,8 +530,8 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -577,7 +569,8 @@ static struct ip_set_type hash_netportnet_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -594,6 +587,7 @@ hash_netportnet_init(void)
static void __exit
hash_netportnet_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_netportnet_type);
}
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index f8f682806..a1fe5377a 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/ip.h>
+#include <linux/rculist.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
@@ -27,6 +28,8 @@ MODULE_ALIAS("ip_set_list:set");
/* Member elements */
struct set_elem {
+ struct rcu_head rcu;
+ struct list_head list;
ip_set_id_t id;
};
@@ -41,12 +44,9 @@ struct list_set {
u32 size; /* size of set list array */
struct timer_list gc; /* garbage collection */
struct net *net; /* namespace */
- struct set_elem members[0]; /* the set members */
+ struct list_head members; /* the set members */
};
-#define list_set_elem(set, map, id) \
- (struct set_elem *)((void *)(map)->members + (id) * (set)->dsize)
-
static int
list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -54,17 +54,14 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
{
struct list_set *map = set->data;
struct set_elem *e;
- u32 i, cmdflags = opt->cmdflags;
+ u32 cmdflags = opt->cmdflags;
int ret;
/* Don't lookup sub-counters at all */
opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS;
if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- return 0;
+ list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -91,13 +88,9 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
{
struct list_set *map = set->data;
struct set_elem *e;
- u32 i;
int ret;
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- return 0;
+ list_for_each_entry(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -115,13 +108,9 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
{
struct list_set *map = set->data;
struct set_elem *e;
- u32 i;
int ret;
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- return 0;
+ list_for_each_entry(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -138,110 +127,65 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+ int ret = -EINVAL;
+ rcu_read_lock();
switch (adt) {
case IPSET_TEST:
- return list_set_ktest(set, skb, par, opt, &ext);
+ ret = list_set_ktest(set, skb, par, opt, &ext);
+ break;
case IPSET_ADD:
- return list_set_kadd(set, skb, par, opt, &ext);
+ ret = list_set_kadd(set, skb, par, opt, &ext);
+ break;
case IPSET_DEL:
- return list_set_kdel(set, skb, par, opt, &ext);
+ ret = list_set_kdel(set, skb, par, opt, &ext);
+ break;
default:
break;
}
- return -EINVAL;
-}
-
-static bool
-id_eq(const struct ip_set *set, u32 i, ip_set_id_t id)
-{
- const struct list_set *map = set->data;
- const struct set_elem *e;
-
- if (i >= map->size)
- return 0;
+ rcu_read_unlock();
- e = list_set_elem(set, map, i);
- return !!(e->id == id &&
- !(SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set))));
+ return ret;
}
-static int
-list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
- const struct ip_set_ext *ext)
-{
- struct list_set *map = set->data;
- struct set_elem *e = list_set_elem(set, map, i);
+/* Userspace interfaces: we are protected by the nfnl mutex */
- if (e->id != IPSET_INVALID_ID) {
- if (i == map->size - 1) {
- /* Last element replaced: e.g. add new,before,last */
- ip_set_put_byindex(map->net, e->id);
- ip_set_ext_destroy(set, e);
- } else {
- struct set_elem *x = list_set_elem(set, map,
- map->size - 1);
-
- /* Last element pushed off */
- if (x->id != IPSET_INVALID_ID) {
- ip_set_put_byindex(map->net, x->id);
- ip_set_ext_destroy(set, x);
- }
- memmove(list_set_elem(set, map, i + 1), e,
- set->dsize * (map->size - (i + 1)));
- /* Extensions must be initialized to zero */
- memset(e, 0, set->dsize);
- }
- }
-
- e->id = d->id;
- if (SET_WITH_TIMEOUT(set))
- ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
- if (SET_WITH_COUNTER(set))
- ip_set_init_counter(ext_counter(e, set), ext);
- if (SET_WITH_COMMENT(set))
- ip_set_init_comment(ext_comment(e, set), ext);
- if (SET_WITH_SKBINFO(set))
- ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
- return 0;
-}
-
-static int
-list_set_del(struct ip_set *set, u32 i)
+static void
+__list_set_del(struct ip_set *set, struct set_elem *e)
{
struct list_set *map = set->data;
- struct set_elem *e = list_set_elem(set, map, i);
ip_set_put_byindex(map->net, e->id);
+ /* We may call it, because we don't have a to be destroyed
+ * extension which is used by the kernel.
+ */
ip_set_ext_destroy(set, e);
+ kfree_rcu(e, rcu);
+}
- if (i < map->size - 1)
- memmove(e, list_set_elem(set, map, i + 1),
- set->dsize * (map->size - (i + 1)));
+static inline void
+list_set_del(struct ip_set *set, struct set_elem *e)
+{
+ list_del_rcu(&e->list);
+ __list_set_del(set, e);
+}
- /* Last element */
- e = list_set_elem(set, map, map->size - 1);
- e->id = IPSET_INVALID_ID;
- return 0;
+static inline void
+list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
+{
+ list_replace_rcu(&old->list, &e->list);
+ __list_set_del(set, old);
}
static void
set_cleanup_entries(struct ip_set *set)
{
struct list_set *map = set->data;
- struct set_elem *e;
- u32 i = 0;
+ struct set_elem *e, *n;
- while (i < map->size) {
- e = list_set_elem(set, map, i);
- if (e->id != IPSET_INVALID_ID &&
- ip_set_timeout_expired(ext_timeout(e, set)))
- list_set_del(set, i);
- /* Check element moved to position i in next loop */
- else
- i++;
- }
+ list_for_each_entry_safe(e, n, &map->members, list)
+ if (ip_set_timeout_expired(ext_timeout(e, set)))
+ list_set_del(set, e);
}
static int
@@ -250,31 +194,46 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct list_set *map = set->data;
struct set_adt_elem *d = value;
- struct set_elem *e;
- u32 i;
+ struct set_elem *e, *next, *prev = NULL;
int ret;
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- return 0;
- else if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set)))
+ list_for_each_entry(e, &map->members, list) {
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
- else if (e->id != d->id)
+ else if (e->id != d->id) {
+ prev = e;
continue;
+ }
- if (d->before == 0)
- return 1;
- else if (d->before > 0)
- ret = id_eq(set, i + 1, d->refid);
- else
- ret = i > 0 && id_eq(set, i - 1, d->refid);
+ if (d->before == 0) {
+ ret = 1;
+ } else if (d->before > 0) {
+ next = list_next_entry(e, list);
+ ret = !list_is_last(&e->list, &map->members) &&
+ next->id == d->refid;
+ } else {
+ ret = prev && prev->id == d->refid;
+ }
return ret;
}
return 0;
}
+static void
+list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext,
+ struct set_elem *e)
+{
+ if (SET_WITH_COUNTER(set))
+ ip_set_init_counter(ext_counter(e, set), ext);
+ if (SET_WITH_COMMENT(set))
+ ip_set_init_comment(ext_comment(e, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
+ /* Update timeout last */
+ if (SET_WITH_TIMEOUT(set))
+ ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
+}
static int
list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
@@ -282,60 +241,78 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct list_set *map = set->data;
struct set_adt_elem *d = value;
- struct set_elem *e;
+ struct set_elem *e, *n, *prev, *next;
bool flag_exist = flags & IPSET_FLAG_EXIST;
- u32 i, ret = 0;
if (SET_WITH_TIMEOUT(set))
set_cleanup_entries(set);
- /* Check already added element */
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- goto insert;
- else if (e->id != d->id)
+ /* Find where to add the new entry */
+ n = prev = next = NULL;
+ list_for_each_entry(e, &map->members, list) {
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
-
- if ((d->before > 1 && !id_eq(set, i + 1, d->refid)) ||
- (d->before < 0 &&
- (i == 0 || !id_eq(set, i - 1, d->refid))))
- /* Before/after doesn't match */
+ else if (d->id == e->id)
+ n = e;
+ else if (d->before == 0 || e->id != d->refid)
+ continue;
+ else if (d->before > 0)
+ next = e;
+ else
+ prev = e;
+ }
+ /* Re-add already existing element */
+ if (n) {
+ if ((d->before > 0 && !next) ||
+ (d->before < 0 && !prev))
return -IPSET_ERR_REF_EXIST;
if (!flag_exist)
- /* Can't re-add */
return -IPSET_ERR_EXIST;
/* Update extensions */
- ip_set_ext_destroy(set, e);
+ ip_set_ext_destroy(set, n);
+ list_set_init_extensions(set, ext, n);
- if (SET_WITH_TIMEOUT(set))
- ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
- if (SET_WITH_COUNTER(set))
- ip_set_init_counter(ext_counter(e, set), ext);
- if (SET_WITH_COMMENT(set))
- ip_set_init_comment(ext_comment(e, set), ext);
- if (SET_WITH_SKBINFO(set))
- ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
/* Set is already added to the list */
ip_set_put_byindex(map->net, d->id);
return 0;
}
-insert:
- ret = -IPSET_ERR_LIST_FULL;
- for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- ret = d->before != 0 ? -IPSET_ERR_REF_EXIST
- : list_set_add(set, i, d, ext);
- else if (e->id != d->refid)
- continue;
- else if (d->before > 0)
- ret = list_set_add(set, i, d, ext);
- else if (i + 1 < map->size)
- ret = list_set_add(set, i + 1, d, ext);
+ /* Add new entry */
+ if (d->before == 0) {
+ /* Append */
+ n = list_empty(&map->members) ? NULL :
+ list_last_entry(&map->members, struct set_elem, list);
+ } else if (d->before > 0) {
+ /* Insert after next element */
+ if (!list_is_last(&next->list, &map->members))
+ n = list_next_entry(next, list);
+ } else {
+ /* Insert before prev element */
+ if (prev->list.prev != &map->members)
+ n = list_prev_entry(prev, list);
}
+ /* Can we replace a timed out entry? */
+ if (n &&
+ !(SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(n, set))))
+ n = NULL;
+
+ e = kzalloc(set->dsize, GFP_KERNEL);
+ if (!e)
+ return -ENOMEM;
+ e->id = d->id;
+ INIT_LIST_HEAD(&e->list);
+ list_set_init_extensions(set, ext, e);
+ if (n)
+ list_set_replace(set, e, n);
+ else if (next)
+ list_add_tail_rcu(&e->list, &next->list);
+ else if (prev)
+ list_add_rcu(&e->list, &prev->list);
+ else
+ list_add_tail_rcu(&e->list, &map->members);
- return ret;
+ return 0;
}
static int
@@ -344,32 +321,30 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct list_set *map = set->data;
struct set_adt_elem *d = value;
- struct set_elem *e;
- u32 i;
-
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- return d->before != 0 ? -IPSET_ERR_REF_EXIST
- : -IPSET_ERR_EXIST;
- else if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set)))
+ struct set_elem *e, *next, *prev = NULL;
+
+ list_for_each_entry(e, &map->members, list) {
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
- else if (e->id != d->id)
+ else if (e->id != d->id) {
+ prev = e;
continue;
+ }
- if (d->before == 0)
- return list_set_del(set, i);
- else if (d->before > 0) {
- if (!id_eq(set, i + 1, d->refid))
+ if (d->before > 0) {
+ next = list_next_entry(e, list);
+ if (list_is_last(&e->list, &map->members) ||
+ next->id != d->refid)
return -IPSET_ERR_REF_EXIST;
- return list_set_del(set, i);
- } else if (i == 0 || !id_eq(set, i - 1, d->refid))
- return -IPSET_ERR_REF_EXIST;
- else
- return list_set_del(set, i);
+ } else if (d->before < 0) {
+ if (!prev || prev->id != d->refid)
+ return -IPSET_ERR_REF_EXIST;
+ }
+ list_set_del(set, e);
+ return 0;
}
- return -IPSET_ERR_EXIST;
+ return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST;
}
static int
@@ -383,19 +358,13 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set *s;
int ret = 0;
- if (unlikely(!tb[IPSET_ATTR_NAME] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!tb[IPSET_ATTR_NAME] ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ return -IPSET_ERR_PROTOCOL;
+
ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -410,6 +379,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
e.before = f & IPSET_FLAG_BEFORE;
}
@@ -447,27 +417,26 @@ static void
list_set_flush(struct ip_set *set)
{
struct list_set *map = set->data;
- struct set_elem *e;
- u32 i;
-
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id != IPSET_INVALID_ID) {
- ip_set_put_byindex(map->net, e->id);
- ip_set_ext_destroy(set, e);
- e->id = IPSET_INVALID_ID;
- }
- }
+ struct set_elem *e, *n;
+
+ list_for_each_entry_safe(e, n, &map->members, list)
+ list_set_del(set, e);
}
static void
list_set_destroy(struct ip_set *set)
{
struct list_set *map = set->data;
+ struct set_elem *e, *n;
if (SET_WITH_TIMEOUT(set))
del_timer_sync(&map->gc);
- list_set_flush(set);
+ list_for_each_entry_safe(e, n, &map->members, list) {
+ list_del(&e->list);
+ ip_set_put_byindex(map->net, e->id);
+ ip_set_ext_destroy(set, e);
+ kfree(e);
+ }
kfree(map);
set->data = NULL;
@@ -478,6 +447,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
{
const struct list_set *map = set->data;
struct nlattr *nested;
+ struct set_elem *e;
+ u32 n = 0;
+
+ list_for_each_entry(e, &map->members, list)
+ n++;
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
@@ -485,7 +459,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
- htonl(sizeof(*map) + map->size * set->dsize)))
+ htonl(sizeof(*map) + n * set->dsize)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
@@ -502,18 +476,22 @@ list_set_list(const struct ip_set *set,
{
const struct list_set *map = set->data;
struct nlattr *atd, *nested;
- u32 i, first = cb->args[IPSET_CB_ARG0];
- const struct set_elem *e;
+ u32 i = 0, first = cb->args[IPSET_CB_ARG0];
+ struct set_elem *e;
+ int ret = 0;
atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
if (!atd)
return -EMSGSIZE;
- for (; cb->args[IPSET_CB_ARG0] < map->size;
- cb->args[IPSET_CB_ARG0]++) {
- i = cb->args[IPSET_CB_ARG0];
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- goto finish;
+ list_for_each_entry(e, &map->members, list) {
+ if (i == first)
+ break;
+ i++;
+ }
+
+ rcu_read_lock();
+ list_for_each_entry_from(e, &map->members, list) {
+ i++;
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -521,9 +499,10 @@ list_set_list(const struct ip_set *set,
if (!nested) {
if (i == first) {
nla_nest_cancel(skb, atd);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
+ ret = -EMSGSIZE;
+ goto out;
+ }
+ goto nla_put_failure;
}
if (nla_put_string(skb, IPSET_ATTR_NAME,
ip_set_name_byindex(map->net, e->id)))
@@ -532,20 +511,23 @@ list_set_list(const struct ip_set *set,
goto nla_put_failure;
ipset_nest_end(skb, nested);
}
-finish:
+
ipset_nest_end(skb, atd);
/* Set listing finished */
cb->args[IPSET_CB_ARG0] = 0;
- return 0;
+ goto out;
nla_put_failure:
nla_nest_cancel(skb, nested);
if (unlikely(i == first)) {
cb->args[IPSET_CB_ARG0] = 0;
- return -EMSGSIZE;
+ ret = -EMSGSIZE;
}
+ cb->args[IPSET_CB_ARG0] = i - 1;
ipset_nest_end(skb, atd);
- return 0;
+out:
+ rcu_read_unlock();
+ return ret;
}
static bool
@@ -577,12 +559,12 @@ static const struct ip_set_type_variant set_variant = {
static void
list_set_gc(unsigned long ul_set)
{
- struct ip_set *set = (struct ip_set *) ul_set;
+ struct ip_set *set = (struct ip_set *)ul_set;
struct list_set *map = set->data;
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
set_cleanup_entries(set);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
@@ -594,7 +576,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
struct list_set *map = set->data;
init_timer(&map->gc);
- map->gc.data = (unsigned long) set;
+ map->gc.data = (unsigned long)set;
map->gc.function = gc;
map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
@@ -606,24 +588,16 @@ static bool
init_list_set(struct net *net, struct ip_set *set, u32 size)
{
struct list_set *map;
- struct set_elem *e;
- u32 i;
- map = kzalloc(sizeof(*map) +
- min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize,
- GFP_KERNEL);
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
if (!map)
return false;
map->size = size;
map->net = net;
+ INIT_LIST_HEAD(&map->members);
set->data = map;
- for (i = 0; i < size; i++) {
- e = list_set_elem(set, map, i);
- e->id = IPSET_INVALID_ID;
- }
-
return true;
}
@@ -678,7 +652,8 @@ static struct ip_set_type list_set_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
@@ -695,6 +670,7 @@ list_set_init(void)
static void __exit
list_set_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&list_set_type);
}
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 04d15fdc9..1c8a42c10 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -1,9 +1,7 @@
#include <linux/export.h>
#include <linux/netfilter/ipset/pfxlen.h>
-/*
- * Prefixlen maps for fast conversions, by Jan Engelhardt.
- */
+/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
#define E(a, b, c, d) \
{.ip6 = { \
@@ -11,8 +9,7 @@
htonl(c), htonl(d), \
} }
-/*
- * This table works for both IPv4 and IPv6;
+/* This table works for both IPv4 and IPv6;
* just use prefixlen_netmask_map[prefixlength].ip.
*/
const union nf_inet_addr ip_set_netmask_map[] = {
@@ -149,13 +146,12 @@ const union nf_inet_addr ip_set_netmask_map[] = {
EXPORT_SYMBOL_GPL(ip_set_netmask_map);
#undef E
-#define E(a, b, c, d) \
- {.ip6 = { (__force __be32) a, (__force __be32) b, \
- (__force __be32) c, (__force __be32) d, \
+#define E(a, b, c, d) \
+ {.ip6 = { (__force __be32)a, (__force __be32)b, \
+ (__force __be32)c, (__force __be32)d, \
} }
-/*
- * This table works for both IPv4 and IPv6;
+/* This table works for both IPv4 and IPv6;
* just use prefixlen_hostmask_map[prefixlength].ip.
*/
const union nf_inet_addr ip_set_hostmask_map[] = {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5d2b806a8..38fbc194b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -319,7 +319,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* return *ignored=0 i.e. ICMP and NF_DROP
*/
sched = rcu_dereference(svc->scheduler);
- dest = sched->schedule(svc, skb, iph);
+ if (sched) {
+ /* read svc->sched_data after svc->scheduler */
+ smp_rmb();
+ dest = sched->schedule(svc, skb, iph);
+ } else {
+ dest = NULL;
+ }
if (!dest) {
IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data);
@@ -467,7 +473,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
}
sched = rcu_dereference(svc->scheduler);
- dest = sched->schedule(svc, skb, iph);
+ if (sched) {
+ /* read svc->sched_data after svc->scheduler */
+ smp_rmb();
+ dest = sched->schedule(svc, skb, iph);
+ } else {
+ dest = NULL;
+ }
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
return NULL;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 285eae3a1..24c554201 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -842,15 +842,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
__ip_vs_dst_cache_reset(dest);
spin_unlock_bh(&dest->dst_lock);
- sched = rcu_dereference_protected(svc->scheduler, 1);
if (add) {
ip_vs_start_estimator(svc->net, &dest->stats);
list_add_rcu(&dest->n_list, &svc->destinations);
svc->num_dests++;
- if (sched->add_dest)
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (sched && sched->add_dest)
sched->add_dest(svc, dest);
} else {
- if (sched->upd_dest)
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (sched && sched->upd_dest)
sched->upd_dest(svc, dest);
}
}
@@ -1084,7 +1085,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
struct ip_vs_scheduler *sched;
sched = rcu_dereference_protected(svc->scheduler, 1);
- if (sched->del_dest)
+ if (sched && sched->del_dest)
sched->del_dest(svc, dest);
}
}
@@ -1175,11 +1176,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
ip_vs_use_count_inc();
/* Lookup the scheduler by 'u->sched_name' */
- sched = ip_vs_scheduler_get(u->sched_name);
- if (sched == NULL) {
- pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
- ret = -ENOENT;
- goto out_err;
+ if (strcmp(u->sched_name, "none")) {
+ sched = ip_vs_scheduler_get(u->sched_name);
+ if (!sched) {
+ pr_info("Scheduler module ip_vs_%s not found\n",
+ u->sched_name);
+ ret = -ENOENT;
+ goto out_err;
+ }
}
if (u->pe_name && *u->pe_name) {
@@ -1240,10 +1244,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
spin_lock_init(&svc->stats.lock);
/* Bind the scheduler */
- ret = ip_vs_bind_scheduler(svc, sched);
- if (ret)
- goto out_err;
- sched = NULL;
+ if (sched) {
+ ret = ip_vs_bind_scheduler(svc, sched);
+ if (ret)
+ goto out_err;
+ sched = NULL;
+ }
/* Bind the ct retriever */
RCU_INIT_POINTER(svc->pe, pe);
@@ -1291,17 +1297,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
static int
ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
{
- struct ip_vs_scheduler *sched, *old_sched;
+ struct ip_vs_scheduler *sched = NULL, *old_sched;
struct ip_vs_pe *pe = NULL, *old_pe = NULL;
int ret = 0;
/*
* Lookup the scheduler, by 'u->sched_name'
*/
- sched = ip_vs_scheduler_get(u->sched_name);
- if (sched == NULL) {
- pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
- return -ENOENT;
+ if (strcmp(u->sched_name, "none")) {
+ sched = ip_vs_scheduler_get(u->sched_name);
+ if (!sched) {
+ pr_info("Scheduler module ip_vs_%s not found\n",
+ u->sched_name);
+ return -ENOENT;
+ }
}
old_sched = sched;
@@ -1329,14 +1338,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
old_sched = rcu_dereference_protected(svc->scheduler, 1);
if (sched != old_sched) {
+ if (old_sched) {
+ ip_vs_unbind_scheduler(svc, old_sched);
+ RCU_INIT_POINTER(svc->scheduler, NULL);
+ /* Wait all svc->sched_data users */
+ synchronize_rcu();
+ }
/* Bind the new scheduler */
- ret = ip_vs_bind_scheduler(svc, sched);
- if (ret) {
- old_sched = sched;
- goto out;
+ if (sched) {
+ ret = ip_vs_bind_scheduler(svc, sched);
+ if (ret) {
+ ip_vs_scheduler_put(sched);
+ goto out;
+ }
}
- /* Unbind the old scheduler on success */
- ip_vs_unbind_scheduler(svc, old_sched);
}
/*
@@ -1982,6 +1997,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
+ char *sched_name = sched ? sched->name : "none";
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
@@ -1990,18 +2006,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
ip_vs_proto_name(svc->protocol),
&svc->addr.in6,
ntohs(svc->port),
- sched->name);
+ sched_name);
else
#endif
seq_printf(seq, "%s %08X:%04X %s %s ",
ip_vs_proto_name(svc->protocol),
ntohl(svc->addr.ip),
ntohs(svc->port),
- sched->name,
+ sched_name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
} else {
seq_printf(seq, "FWM %08X %s %s",
- svc->fwmark, sched->name,
+ svc->fwmark, sched_name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
}
@@ -2427,13 +2443,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
struct ip_vs_scheduler *sched;
struct ip_vs_kstats kstats;
+ char *sched_name;
sched = rcu_dereference_protected(src->scheduler, 1);
+ sched_name = sched ? sched->name : "none";
dst->protocol = src->protocol;
dst->addr = src->addr.ip;
dst->port = src->port;
dst->fwmark = src->fwmark;
- strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
+ strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
dst->flags = src->flags;
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
@@ -2892,6 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
struct ip_vs_kstats kstats;
+ char *sched_name;
nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
if (!nl_service)
@@ -2910,8 +2929,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
}
sched = rcu_dereference_protected(svc->scheduler, 1);
+ sched_name = sched ? sched->name : "none";
pe = rcu_dereference_protected(svc->pe, 1);
- if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
+ if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
(pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 199760c71..7e8141647 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
if (sched->done_service)
sched->done_service(svc);
- /* svc->scheduler can not be set to NULL */
+ /* svc->scheduler can be set to NULL only by caller */
}
@@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
{
- struct ip_vs_scheduler *sched;
+ struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
+ char *sched_name = sched ? sched->name : "none";
- sched = rcu_dereference(svc->scheduler);
if (svc->fwmark) {
IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
- sched->name, svc->fwmark, svc->fwmark, msg);
+ sched_name, svc->fwmark, svc->fwmark, msg);
#ifdef CONFIG_IP_VS_IPV6
} else if (svc->af == AF_INET6) {
IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
- sched->name, ip_vs_proto_name(svc->protocol),
+ sched_name, ip_vs_proto_name(svc->protocol),
&svc->addr.in6, ntohs(svc->port), msg);
#endif
} else {
IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
- sched->name, ip_vs_proto_name(svc->protocol),
+ sched_name, ip_vs_proto_name(svc->protocol),
&svc->addr.ip, ntohs(svc->port), msg);
}
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 19b9cce6c..d99ad93eb 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -612,7 +612,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
pkts = atomic_add_return(1, &cp->in_pkts);
else
pkts = sysctl_sync_threshold(ipvs);
- ip_vs_sync_conn(net, cp->control, pkts);
+ ip_vs_sync_conn(net, cp, pkts);
}
}
@@ -1457,18 +1457,12 @@ static struct socket *make_send_sock(struct net *net, int id)
struct socket *sock;
int result;
- /* First create a socket move it to right name space later */
- result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ /* First create a socket */
+ result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result);
}
- /*
- * Kernel sockets that are a part of a namespace, should not
- * hold a reference to a namespace in order to allow to stop it.
- * After sk_change_net should be released using sk_release_kernel.
- */
- sk_change_net(sock->sk, net);
result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
if (result < 0) {
pr_err("Error setting outbound mcast interface\n");
@@ -1497,7 +1491,7 @@ static struct socket *make_send_sock(struct net *net, int id)
return sock;
error:
- sk_release_kernel(sock->sk);
+ sock_release(sock);
return ERR_PTR(result);
}
@@ -1518,17 +1512,11 @@ static struct socket *make_receive_sock(struct net *net, int id)
int result;
/* First create a socket */
- result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result);
}
- /*
- * Kernel sockets that are a part of a namespace, should not
- * hold a reference to a namespace in order to allow to stop it.
- * After sk_change_net should be released using sk_release_kernel.
- */
- sk_change_net(sock->sk, net);
/* it is equivalent to the REUSEADDR option in user-space */
sock->sk->sk_reuse = SK_CAN_REUSE;
result = sysctl_sync_sock_size(ipvs);
@@ -1554,7 +1542,7 @@ static struct socket *make_receive_sock(struct net *net, int id)
return sock;
error:
- sk_release_kernel(sock->sk);
+ sock_release(sock);
return ERR_PTR(result);
}
@@ -1692,7 +1680,7 @@ done:
ip_vs_sync_buff_release(sb);
/* release the sending multicast socket */
- sk_release_kernel(tinfo->sock->sk);
+ sock_release(tinfo->sock);
kfree(tinfo);
return 0;
@@ -1729,7 +1717,7 @@ static int sync_thread_backup(void *data)
}
/* release the sending multicast socket */
- sk_release_kernel(tinfo->sock->sk);
+ sock_release(tinfo->sock);
kfree(tinfo->buf);
kfree(tinfo);
@@ -1854,11 +1842,11 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
return 0;
outsocket:
- sk_release_kernel(sock->sk);
+ sock_release(sock);
outtinfo:
if (tinfo) {
- sk_release_kernel(tinfo->sock->sk);
+ sock_release(tinfo->sock);
kfree(tinfo->buf);
kfree(tinfo);
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 19986ec5f..258a0b0e8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
- fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
FLOWI_FLAG_KNOWN_NH : 0;
@@ -364,13 +363,16 @@ err_unreach:
#ifdef CONFIG_IP_VS_IPV6
static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
- struct in6_addr *ret_saddr, int do_xfrm)
+ struct in6_addr *ret_saddr, int do_xfrm, int rt_mode)
{
struct dst_entry *dst;
struct flowi6 fl6 = {
.daddr = *daddr,
};
+ if (rt_mode & IP_VS_RT_MODE_KNOWN_NH)
+ fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
+
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error)
goto out_err;
@@ -427,7 +429,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
}
dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
&dest_dst->dst_saddr.in6,
- do_xfrm);
+ do_xfrm, rt_mode);
if (!dst) {
__ip_vs_dst_set(dest, NULL, NULL, 0);
spin_unlock_bh(&dest->dst_lock);
@@ -435,7 +437,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
goto err_unreach;
}
rt = (struct rt6_info *) dst;
- cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+ cookie = rt6_get_cookie(rt);
__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
spin_unlock_bh(&dest->dst_lock);
IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
@@ -446,7 +448,8 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
*ret_saddr = dest_dst->dst_saddr.in6;
} else {
noref = 0;
- dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
+ dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm,
+ rt_mode);
if (!dst)
goto err_unreach;
rt = (struct rt6_info *) dst;
@@ -501,6 +504,13 @@ err_put:
return -1;
err_unreach:
+ /* The ip6_link_failure function requires the dev field to be set
+ * in order to get the net (further for the sake of fwmark
+ * reflection).
+ */
+ if (!skb->dev)
+ skb->dev = skb_dst(skb)->dev;
+
dst_link_failure(skb);
return -1;
}
@@ -519,10 +529,27 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
if (ret == NF_ACCEPT) {
nf_reset(skb);
skb_forward_csum(skb);
+ if (!skb->sk)
+ skb_sender_cpu_clear(skb);
}
return ret;
}
+/* In the event of a remote destination, it's possible that we would have
+ * matches against an old socket (particularly a TIME-WAIT socket). This
+ * causes havoc down the line (ip_local_out et. al. expect regular sockets
+ * and invalid memory accesses will happen) so simply drop the association
+ * in this case.
+*/
+static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb)
+{
+ /* If dev is set, the packet came from the LOCAL_IN callback and
+ * not from a local TCP socket.
+ */
+ if (skb->dev)
+ skb_orphan(skb);
+}
+
/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
struct ip_vs_conn *cp, int local)
@@ -534,12 +561,23 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_notrack(skb);
else
ip_vs_update_conntrack(skb, cp, 1);
+
+ /* Remove the early_demux association unless it's bound for the
+ * exact same port and address on this host after translation.
+ */
+ if (!local || cp->vport != cp->dport ||
+ !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr))
+ ip_vs_drop_early_demux_sk(skb);
+
if (!local) {
skb_forward_csum(skb);
+ if (!skb->sk)
+ skb_sender_cpu_clear(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output_sk);
} else
ret = NF_ACCEPT;
+
return ret;
}
@@ -553,7 +591,10 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
ip_vs_notrack(skb);
if (!local) {
+ ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
+ if (!skb->sk)
+ skb_sender_cpu_clear(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output_sk);
} else
@@ -781,7 +822,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */
if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
- ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
+ ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to loopback address");
@@ -841,6 +882,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
struct ipv6hdr *old_ipv6h = NULL;
#endif
+ ip_vs_drop_early_demux_sk(skb);
+
if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
new_skb = skb_realloc_headroom(skb, max_headroom);
if (!new_skb)
@@ -1164,7 +1207,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL);
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_KNOWN_NH);
if (local < 0)
goto tx_error;
if (local) {
@@ -1346,7 +1390,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */
if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
- ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
+ ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
IP_VS_DBG(1, "%s(): "
"stopping DNAT to loopback %pI6\n",
__func__, &cp->daddr.in6);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 13fad8668..0625a42df 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -287,6 +287,47 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
spin_unlock(&pcpu->lock);
}
+/* Released via destroy_conntrack() */
+struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags)
+{
+ struct nf_conn *tmpl;
+
+ tmpl = kzalloc(sizeof(*tmpl), flags);
+ if (tmpl == NULL)
+ return NULL;
+
+ tmpl->status = IPS_TEMPLATE;
+ write_pnet(&tmpl->ct_net, net);
+
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ if (zone) {
+ struct nf_conntrack_zone *nf_ct_zone;
+
+ nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, flags);
+ if (!nf_ct_zone)
+ goto out_free;
+ nf_ct_zone->id = zone;
+ }
+#endif
+ atomic_set(&tmpl->ct_general.use, 0);
+
+ return tmpl;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+out_free:
+ kfree(tmpl);
+ return NULL;
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
+
+void nf_ct_tmpl_free(struct nf_conn *tmpl)
+{
+ nf_ct_ext_destroy(tmpl);
+ nf_ct_ext_free(tmpl);
+ kfree(tmpl);
+}
+EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
+
static void
destroy_conntrack(struct nf_conntrack *nfct)
{
@@ -298,6 +339,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
NF_CT_ASSERT(!timer_pending(&ct->timeout));
+ if (unlikely(nf_ct_is_template(ct))) {
+ nf_ct_tmpl_free(ct);
+ return;
+ }
rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto && l4proto->destroy)
@@ -540,28 +585,6 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
-/* deletion from this larval template list happens via nf_ct_put() */
-void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
-{
- struct ct_pcpu *pcpu;
-
- __set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
- __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
- nf_conntrack_get(&tmpl->ct_general);
-
- /* add this conntrack to the (per cpu) tmpl list */
- local_bh_disable();
- tmpl->cpu = smp_processor_id();
- pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu);
-
- spin_lock(&pcpu->lock);
- /* Overload tuple linked list to put us in template list. */
- hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &pcpu->tmpl);
- spin_unlock_bh(&pcpu->lock);
-}
-EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
-
/* Confirm a connection given skb; places it in hash table */
int
__nf_conntrack_confirm(struct sk_buff *skb)
@@ -1522,10 +1545,8 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
sz = nr_slots * sizeof(struct hlist_nulls_head);
hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
get_order(sz));
- if (!hash) {
- printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
+ if (!hash)
hash = vzalloc(sz);
- }
if (hash && nulls)
for (i = 0; i < nr_slots; i++)
@@ -1751,7 +1772,6 @@ int nf_conntrack_init_net(struct net *net)
spin_lock_init(&pcpu->lock);
INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
- INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL);
}
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 7a17070c5..b45a4223c 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -219,7 +219,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
}
- return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
+ return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
+ nf_ct_zone(a->master) == nf_ct_zone(b->master);
}
static inline int expect_matches(const struct nf_conntrack_expect *a,
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 1d69f5b97..9511af04d 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -779,8 +779,8 @@ static int callforward_do_filter(struct net *net,
flowi6_to_flowi(&fl1), false)) {
if (!afinfo->route(net, (struct dst_entry **)&rt2,
flowi6_to_flowi(&fl2), false)) {
- if (ipv6_addr_equal(rt6_nexthop(rt1),
- rt6_nexthop(rt2)) &&
+ if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
+ rt6_nexthop(rt2, &fl2.daddr)) &&
rt1->dst.dev == rt2->dst.dev)
ret = 1;
dst_release(&rt2->dst);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d1c23940a..6b8b0abbf 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2995,11 +2995,6 @@ ctnetlink_create_expect(struct net *net, u16 zone,
}
err = nf_ct_expect_related_report(exp, portid, report);
- if (err < 0)
- goto err_exp;
-
- return 0;
-err_exp:
nf_ct_expect_put(exp);
err_ct:
nf_ct_put(ct);
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 60865f110..2281be419 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -90,7 +90,13 @@ static int generic_packet(struct nf_conn *ct,
static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
{
- return nf_generic_should_process(nf_ct_protonum(ct));
+ bool ret;
+
+ ret = nf_generic_should_process(nf_ct_protonum(ct));
+ if (!ret)
+ pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
+ nf_ct_protonum(ct));
+ return ret;
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index ea7f36784..399210693 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -19,6 +19,7 @@ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
/* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
struct nf_hook_state *state, unsigned int queuenum);
+void nf_queue_nf_hook_drop(struct nf_hook_ops *ops);
int __init netfilter_queue_init(void);
/* nf_log.c */
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 2e88032cd..8a8b2abc3 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -105,6 +105,23 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
+void nf_queue_nf_hook_drop(struct nf_hook_ops *ops)
+{
+ const struct nf_queue_handler *qh;
+ struct net *net;
+
+ rtnl_lock();
+ rcu_read_lock();
+ qh = rcu_dereference(queue_handler);
+ if (qh) {
+ for_each_net(net) {
+ qh->nf_hook_drop(net, ops);
+ }
+ }
+ rcu_read_unlock();
+ rtnl_unlock();
+}
+
/*
* Any packet that leaves via this function must come back
* through nf_reinject().
@@ -196,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
if (verdict == NF_ACCEPT) {
next_hook:
- verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook],
+ verdict = nf_iterate(entry->state.hook_list,
skb, &entry->state, &elem);
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 52e20c9a4..d6ee8f8b1 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -11,6 +11,7 @@
#include <asm/unaligned.h>
#include <net/tcp.h>
#include <net/netns/generic.h>
+#include <linux/proc_fs.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter/x_tables.h>
@@ -348,23 +349,20 @@ static void __net_exit synproxy_proc_exit(struct net *net)
static int __net_init synproxy_net_init(struct net *net)
{
struct synproxy_net *snet = synproxy_pernet(net);
- struct nf_conntrack_tuple t;
struct nf_conn *ct;
int err = -ENOMEM;
- memset(&t, 0, sizeof(t));
- ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL);
- if (IS_ERR(ct)) {
- err = PTR_ERR(ct);
+ ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
+ if (!ct)
goto err1;
- }
if (!nfct_seqadj_ext_add(ct))
goto err2;
if (!nfct_synproxy_ext_add(ct))
goto err2;
- nf_conntrack_tmpl_insert(net, ct);
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ nf_conntrack_get(&ct->ct_general);
snet->tmpl = ct;
snet->stats = alloc_percpu(struct synproxy_stats);
@@ -380,7 +378,7 @@ static int __net_init synproxy_net_init(struct net *net)
err3:
free_percpu(snet->stats);
err2:
- nf_conntrack_free(ct);
+ nf_ct_tmpl_free(ct);
err1:
return err;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 34ded0931..cfe636808 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -127,13 +127,46 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
+int nft_register_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops)
+{
+ if (basechain->flags & NFT_BASECHAIN_DISABLED)
+ return 0;
+
+ return nf_register_hooks(basechain->ops, hook_nops);
+}
+EXPORT_SYMBOL_GPL(nft_register_basechain);
+
+void nft_unregister_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops)
+{
+ if (basechain->flags & NFT_BASECHAIN_DISABLED)
+ return;
+
+ nf_unregister_hooks(basechain->ops, hook_nops);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_basechain);
+
+static int nf_tables_register_hooks(const struct nft_table *table,
+ struct nft_chain *chain,
+ unsigned int hook_nops)
+{
+ if (table->flags & NFT_TABLE_F_DORMANT ||
+ !(chain->flags & NFT_BASE_CHAIN))
+ return 0;
+
+ return nft_register_basechain(nft_base_chain(chain), hook_nops);
+}
+
static void nf_tables_unregister_hooks(const struct nft_table *table,
- const struct nft_chain *chain,
+ struct nft_chain *chain,
unsigned int hook_nops)
{
- if (!(table->flags & NFT_TABLE_F_DORMANT) &&
- chain->flags & NFT_BASE_CHAIN)
- nf_unregister_hooks(nft_base_chain(chain)->ops, hook_nops);
+ if (table->flags & NFT_TABLE_F_DORMANT ||
+ !(chain->flags & NFT_BASE_CHAIN))
+ return;
+
+ nft_unregister_basechain(nft_base_chain(chain), hook_nops);
}
/* Internal table flags */
@@ -560,7 +593,7 @@ static int nf_tables_table_enable(const struct nft_af_info *afi,
if (!(chain->flags & NFT_BASE_CHAIN))
continue;
- err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
+ err = nft_register_basechain(nft_base_chain(chain), afi->nops);
if (err < 0)
goto err;
@@ -575,20 +608,20 @@ err:
if (i-- <= 0)
break;
- nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops);
+ nft_unregister_basechain(nft_base_chain(chain), afi->nops);
}
return err;
}
static void nf_tables_table_disable(const struct nft_af_info *afi,
- struct nft_table *table)
+ struct nft_table *table)
{
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list) {
if (chain->flags & NFT_BASE_CHAIN)
- nf_unregister_hooks(nft_base_chain(chain)->ops,
- afi->nops);
+ nft_unregister_basechain(nft_base_chain(chain),
+ afi->nops);
}
}
@@ -679,13 +712,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
return -EINVAL;
}
+ err = -EAFNOSUPPORT;
if (!try_module_get(afi->owner))
- return -EAFNOSUPPORT;
+ goto err1;
err = -ENOMEM;
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (table == NULL)
- goto err1;
+ goto err2;
nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
INIT_LIST_HEAD(&table->chains);
@@ -695,14 +729,15 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
- goto err2;
+ goto err3;
list_add_tail_rcu(&table->list, &afi->tables);
return 0;
-err2:
+err3:
kfree(table);
-err1:
+err2:
module_put(afi->owner);
+err1:
return err;
}
@@ -881,6 +916,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
[NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 },
[NFTA_HOOK_PRIORITY] = { .type = NLA_U32 },
+ [NFTA_HOOK_DEV] = { .type = NLA_STRING,
+ .len = IFNAMSIZ - 1 },
};
static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
@@ -954,6 +991,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
goto nla_put_failure;
+ if (basechain->dev_name[0] &&
+ nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
@@ -1165,9 +1205,13 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
BUG_ON(chain->use > 0);
if (chain->flags & NFT_BASE_CHAIN) {
- module_put(nft_base_chain(chain)->type->owner);
- free_percpu(nft_base_chain(chain)->stats);
- kfree(nft_base_chain(chain));
+ struct nft_base_chain *basechain = nft_base_chain(chain);
+
+ module_put(basechain->type->owner);
+ free_percpu(basechain->stats);
+ if (basechain->ops[0].dev != NULL)
+ dev_put(basechain->ops[0].dev);
+ kfree(basechain);
} else {
kfree(chain);
}
@@ -1186,6 +1230,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
struct nlattr *ha[NFTA_HOOK_MAX + 1];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
+ struct net_device *dev = NULL;
u8 policy = NF_ACCEPT;
u64 handle = 0;
unsigned int i;
@@ -1325,17 +1370,43 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
return -ENOENT;
hookfn = type->hooks[hooknum];
+ if (afi->flags & NFT_AF_NEEDS_DEV) {
+ char ifname[IFNAMSIZ];
+
+ if (!ha[NFTA_HOOK_DEV]) {
+ module_put(type->owner);
+ return -EOPNOTSUPP;
+ }
+
+ nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
+ dev = dev_get_by_name(net, ifname);
+ if (!dev) {
+ module_put(type->owner);
+ return -ENOENT;
+ }
+ } else if (ha[NFTA_HOOK_DEV]) {
+ module_put(type->owner);
+ return -EOPNOTSUPP;
+ }
+
basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
if (basechain == NULL) {
module_put(type->owner);
+ if (dev != NULL)
+ dev_put(dev);
return -ENOMEM;
}
+ if (dev != NULL)
+ strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
if (IS_ERR(stats)) {
module_put(type->owner);
kfree(basechain);
+ if (dev != NULL)
+ dev_put(dev);
return PTR_ERR(stats);
}
basechain->stats = stats;
@@ -1344,6 +1415,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (stats == NULL) {
module_put(type->owner);
kfree(basechain);
+ if (dev != NULL)
+ dev_put(dev);
return -ENOMEM;
}
rcu_assign_pointer(basechain->stats, stats);
@@ -1361,6 +1434,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
ops->priority = priority;
ops->priv = chain;
ops->hook = afi->hooks[ops->hooknum];
+ ops->dev = dev;
if (hookfn)
ops->hook = hookfn;
if (afi->hook_ops_init)
@@ -1380,12 +1454,9 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
chain->table = table;
nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
- if (!(table->flags & NFT_TABLE_F_DORMANT) &&
- chain->flags & NFT_BASE_CHAIN) {
- err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
- if (err < 0)
- goto err1;
- }
+ err = nf_tables_register_hooks(table, chain, afi->nops);
+ if (err < 0)
+ goto err1;
nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index f153b0707..f77bad46a 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -114,7 +114,8 @@ unsigned int
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{
const struct nft_chain *chain = ops->priv, *basechain = chain;
- const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet);
+ const struct net *chain_net = read_pnet(&nft_base_chain(basechain)->pnet);
+ const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
struct nft_regs regs;
@@ -124,6 +125,10 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
+ /* Ignore chains that are not for the current network namespace */
+ if (!net_eq(net, chain_net))
+ return NF_ACCEPT;
+
do_chain:
rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
new file mode 100644
index 000000000..2cae4d4a0
--- /dev/null
+++ b/net/netfilter/nf_tables_netdev.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <net/netfilter/nf_tables.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+static inline void
+nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
+ const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct iphdr *iph, _iph;
+ u32 len, thoff;
+
+ nft_set_pktinfo(pkt, ops, skb, state);
+
+ iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
+ &_iph);
+ if (!iph)
+ return;
+
+ iph = ip_hdr(skb);
+ if (iph->ihl < 5 || iph->version != 4)
+ return;
+
+ len = ntohs(iph->tot_len);
+ thoff = iph->ihl * 4;
+ if (skb->len < len)
+ return;
+ else if (len < thoff)
+ return;
+
+ pkt->tprot = iph->protocol;
+ pkt->xt.thoff = thoff;
+ pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+}
+
+static inline void
+__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
+ const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6hdr *ip6h, _ip6h;
+ unsigned int thoff = 0;
+ unsigned short frag_off;
+ int protohdr;
+ u32 pkt_len;
+
+ ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h),
+ &_ip6h);
+ if (!ip6h)
+ return;
+
+ if (ip6h->version != 6)
+ return;
+
+ pkt_len = ntohs(ip6h->payload_len);
+ if (pkt_len + sizeof(*ip6h) > skb->len)
+ return;
+
+ protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
+ if (protohdr < 0)
+ return;
+
+ pkt->tprot = protohdr;
+ pkt->xt.thoff = thoff;
+ pkt->xt.fragoff = frag_off;
+#endif
+}
+
+static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
+ const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ nft_set_pktinfo(pkt, ops, skb, state);
+ __nft_netdev_set_pktinfo_ipv6(pkt, ops, skb, state);
+}
+
+static unsigned int
+nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ nft_netdev_set_pktinfo_ipv4(&pkt, ops, skb, state);
+ break;
+ case htons(ETH_P_IPV6):
+ nft_netdev_set_pktinfo_ipv6(&pkt, ops, skb, state);
+ break;
+ default:
+ nft_set_pktinfo(&pkt, ops, skb, state);
+ break;
+ }
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static struct nft_af_info nft_af_netdev __read_mostly = {
+ .family = NFPROTO_NETDEV,
+ .nhooks = NF_NETDEV_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .flags = NFT_AF_NEEDS_DEV,
+ .nops = 1,
+ .hooks = {
+ [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
+ },
+};
+
+static int nf_tables_netdev_init_net(struct net *net)
+{
+ net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.netdev == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev));
+
+ if (nft_register_afinfo(net, net->nft.netdev) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.netdev);
+ return -ENOMEM;
+}
+
+static void nf_tables_netdev_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.netdev);
+ kfree(net->nft.netdev);
+}
+
+static struct pernet_operations nf_tables_netdev_net_ops = {
+ .init = nf_tables_netdev_init_net,
+ .exit = nf_tables_netdev_exit_net,
+};
+
+static const struct nf_chain_type nft_filter_chain_netdev = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_NETDEV,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_NETDEV_INGRESS),
+};
+
+static void nft_netdev_event(unsigned long event, struct nft_af_info *afi,
+ struct net_device *dev, struct nft_table *table,
+ struct nft_base_chain *basechain)
+{
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (strcmp(basechain->dev_name, dev->name) != 0)
+ return;
+
+ BUG_ON(!(basechain->flags & NFT_BASECHAIN_DISABLED));
+
+ dev_hold(dev);
+ basechain->ops[0].dev = dev;
+ basechain->flags &= ~NFT_BASECHAIN_DISABLED;
+ if (!(table->flags & NFT_TABLE_F_DORMANT))
+ nft_register_basechain(basechain, afi->nops);
+ break;
+ case NETDEV_UNREGISTER:
+ if (strcmp(basechain->dev_name, dev->name) != 0)
+ return;
+
+ BUG_ON(basechain->flags & NFT_BASECHAIN_DISABLED);
+
+ if (!(table->flags & NFT_TABLE_F_DORMANT))
+ nft_unregister_basechain(basechain, afi->nops);
+
+ dev_put(basechain->ops[0].dev);
+ basechain->ops[0].dev = NULL;
+ basechain->flags |= NFT_BASECHAIN_DISABLED;
+ break;
+ case NETDEV_CHANGENAME:
+ if (dev->ifindex != basechain->ops[0].dev->ifindex)
+ return;
+
+ strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+ break;
+ }
+}
+
+static int nf_tables_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_chain *chain;
+
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
+ if (afi->family != NFPROTO_NETDEV)
+ continue;
+
+ list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry(chain, &table->chains, list) {
+ if (!(chain->flags & NFT_BASE_CHAIN))
+ continue;
+
+ nft_netdev_event(event, afi, dev, table,
+ nft_base_chain(chain));
+ }
+ }
+ }
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nf_tables_netdev_notifier = {
+ .notifier_call = nf_tables_netdev_event,
+};
+
+static int __init nf_tables_netdev_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&nft_filter_chain_netdev);
+ ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&nft_filter_chain_netdev);
+
+ register_netdevice_notifier(&nf_tables_netdev_notifier);
+
+ return ret;
+}
+
+static void __exit nf_tables_netdev_exit(void)
+{
+ unregister_netdevice_notifier(&nf_tables_netdev_notifier);
+ unregister_pernet_subsys(&nf_tables_netdev_net_ops);
+ nft_unregister_chain_type(&nft_filter_chain_netdev);
+}
+
+module_init(nf_tables_netdev_init);
+module_exit(nf_tables_netdev_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_FAMILY(5); /* NFPROTO_NETDEV */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 8b117c90e..0c0e8ecf0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -269,6 +269,12 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
}
}
+enum {
+ NFNL_BATCH_FAILURE = (1 << 0),
+ NFNL_BATCH_DONE = (1 << 1),
+ NFNL_BATCH_REPLAY = (1 << 2),
+};
+
static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
u_int16_t subsys_id)
{
@@ -276,13 +282,15 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
const struct nfnetlink_subsystem *ss;
const struct nfnl_callback *nc;
- bool success = true, done = false;
static LIST_HEAD(err_list);
+ u32 status;
int err;
if (subsys_id >= NFNL_SUBSYS_COUNT)
return netlink_ack(skb, nlh, -EINVAL);
replay:
+ status = 0;
+
skb = netlink_skb_clone(oskb, GFP_KERNEL);
if (!skb)
return netlink_ack(oskb, nlh, -ENOMEM);
@@ -336,10 +344,10 @@ replay:
if (type == NFNL_MSG_BATCH_BEGIN) {
/* Malformed: Batch begin twice */
nfnl_err_reset(&err_list);
- success = false;
+ status |= NFNL_BATCH_FAILURE;
goto done;
} else if (type == NFNL_MSG_BATCH_END) {
- done = true;
+ status |= NFNL_BATCH_DONE;
goto done;
} else if (type < NLMSG_MIN_TYPE) {
err = -EINVAL;
@@ -382,11 +390,8 @@ replay:
* original skb.
*/
if (err == -EAGAIN) {
- nfnl_err_reset(&err_list);
- ss->abort(oskb);
- nfnl_unlock(subsys_id);
- kfree_skb(skb);
- goto replay;
+ status |= NFNL_BATCH_REPLAY;
+ goto next;
}
}
ack:
@@ -402,7 +407,7 @@ ack:
*/
nfnl_err_reset(&err_list);
netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
- success = false;
+ status |= NFNL_BATCH_FAILURE;
goto done;
}
/* We don't stop processing the batch on errors, thus,
@@ -410,19 +415,26 @@ ack:
* triggers.
*/
if (err)
- success = false;
+ status |= NFNL_BATCH_FAILURE;
}
-
+next:
msglen = NLMSG_ALIGN(nlh->nlmsg_len);
if (msglen > skb->len)
msglen = skb->len;
skb_pull(skb, msglen);
}
done:
- if (success && done)
+ if (status & NFNL_BATCH_REPLAY) {
+ ss->abort(oskb);
+ nfnl_err_reset(&err_list);
+ nfnl_unlock(subsys_id);
+ kfree_skb(skb);
+ goto replay;
+ } else if (status == NFNL_BATCH_DONE) {
ss->commit(oskb);
- else
+ } else {
ss->abort(oskb);
+ }
nfnl_err_deliver(&err_list, oskb);
nfnl_unlock(subsys_id);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 4ef1fae84..4670821b5 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -598,8 +598,6 @@ nla_put_failure:
return -1;
}
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-
static struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_ULOG,
.u = {
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 11c7682fa..685cc6a17 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -278,6 +278,23 @@ nla_put_failure:
return -1;
}
+static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata)
+{
+ u32 seclen = 0;
+#if IS_ENABLED(CONFIG_NETWORK_SECMARK)
+ if (!skb || !sk_fullsock(skb->sk))
+ return 0;
+
+ read_lock_bh(&skb->sk->sk_callback_lock);
+
+ if (skb->secmark)
+ security_secid_to_secctx(skb->secmark, secdata, &seclen);
+
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+#endif
+ return seclen;
+}
+
static struct sk_buff *
nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry,
@@ -297,6 +314,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct nf_conn *ct = NULL;
enum ip_conntrack_info uninitialized_var(ctinfo);
bool csum_verify;
+ char *secdata = NULL;
+ u32 seclen = 0;
size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -352,6 +371,12 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
+ nla_total_size(sizeof(u_int32_t))); /* gid */
}
+ if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) {
+ seclen = nfqnl_get_sk_secctx(entskb, &secdata);
+ if (seclen)
+ size += nla_total_size(seclen);
+ }
+
skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
GFP_ATOMIC);
if (!skb) {
@@ -479,6 +504,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
goto nla_put_failure;
+ if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
+ goto nla_put_failure;
+
if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
goto nla_put_failure;
@@ -806,8 +834,6 @@ nfqnl_dev_drop(struct net *net, int ifindex)
rcu_read_unlock();
}
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-
static int
nfqnl_rcv_dev_event(struct notifier_block *this,
unsigned long event, void *ptr)
@@ -824,6 +850,27 @@ static struct notifier_block nfqnl_dev_notifier = {
.notifier_call = nfqnl_rcv_dev_event,
};
+static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr)
+{
+ return entry->elem == (struct nf_hook_ops *)ops_ptr;
+}
+
+static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook)
+{
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ int i;
+
+ rcu_read_lock();
+ for (i = 0; i < INSTANCE_BUCKETS; i++) {
+ struct nfqnl_instance *inst;
+ struct hlist_head *head = &q->instance_table[i];
+
+ hlist_for_each_entry_rcu(inst, head, hlist)
+ nfqnl_flush(inst, nf_hook_cmp, (unsigned long)hook);
+ }
+ rcu_read_unlock();
+}
+
static int
nfqnl_rcv_nl_event(struct notifier_block *this,
unsigned long event, void *ptr)
@@ -1031,7 +1078,8 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
};
static const struct nf_queue_handler nfqh = {
- .outfn = &nfqnl_enqueue_packet,
+ .outfn = &nfqnl_enqueue_packet,
+ .nf_hook_drop = &nfqnl_nf_hook_drop,
};
static int
@@ -1142,7 +1190,12 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
ret = -EOPNOTSUPP;
goto err_out_unlock;
}
-
+#if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
+ if (flags & mask & NFQA_CFG_F_SECCTX) {
+ ret = -EOPNOTSUPP;
+ goto err_out_unlock;
+ }
+#endif
spin_lock_bh(&queue->lock);
queue->flags &= ~mask;
queue->flags |= flags & mask;
@@ -1257,7 +1310,7 @@ static int seq_show(struct seq_file *s, void *v)
inst->copy_mode, inst->copy_range,
inst->queue_dropped, inst->queue_user_dropped,
inst->id_sequence, 1);
- return seq_has_overflowed(s);
+ return 0;
}
static const struct seq_operations nfqnl_seq_ops = {
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 7f29cfc76..66def315e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -161,6 +161,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
par->hook_mask = 0;
}
par->family = ctx->afi->family;
+ par->nft_compat = true;
}
static void target_compat_from_user(struct xt_target *t, void *in, void *out)
@@ -377,6 +378,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
par->hook_mask = 0;
}
par->family = ctx->afi->family;
+ par->nft_compat = true;
}
static void match_compat_from_user(struct xt_match *m, void *in, void *out)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 51a459c3c..d324fe712 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -658,35 +658,23 @@ EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
struct xt_table_info *xt_alloc_table_info(unsigned int size)
{
- struct xt_table_info *newinfo;
- int cpu;
+ struct xt_table_info *info = NULL;
+ size_t sz = sizeof(*info) + size;
/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
return NULL;
- newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
- if (!newinfo)
- return NULL;
-
- newinfo->size = size;
-
- for_each_possible_cpu(cpu) {
- if (size <= PAGE_SIZE)
- newinfo->entries[cpu] = kmalloc_node(size,
- GFP_KERNEL,
- cpu_to_node(cpu));
- else
- newinfo->entries[cpu] = vmalloc_node(size,
- cpu_to_node(cpu));
-
- if (newinfo->entries[cpu] == NULL) {
- xt_free_table_info(newinfo);
+ if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+ info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+ if (!info) {
+ info = vmalloc(sz);
+ if (!info)
return NULL;
- }
}
-
- return newinfo;
+ memset(info, 0, sizeof(*info));
+ info->size = size;
+ return info;
}
EXPORT_SYMBOL(xt_alloc_table_info);
@@ -694,9 +682,6 @@ void xt_free_table_info(struct xt_table_info *info)
{
int cpu;
- for_each_possible_cpu(cpu)
- kvfree(info->entries[cpu]);
-
if (info->jumpstack != NULL) {
for_each_possible_cpu(cpu)
kvfree(info->jumpstack[cpu]);
@@ -705,7 +690,7 @@ void xt_free_table_info(struct xt_table_info *info)
free_percpu(info->stackptr);
- kfree(info);
+ kvfree(info);
}
EXPORT_SYMBOL(xt_free_table_info);
@@ -947,11 +932,9 @@ static int xt_table_seq_show(struct seq_file *seq, void *v)
{
struct xt_table *table = list_entry(v, struct xt_table, list);
- if (strlen(table->name)) {
+ if (*table->name)
seq_printf(seq, "%s\n", table->name);
- return seq_has_overflowed(seq);
- } else
- return 0;
+ return 0;
}
static const struct seq_operations xt_table_seq_ops = {
@@ -1087,10 +1070,8 @@ static int xt_match_seq_show(struct seq_file *seq, void *v)
if (trav->curr == trav->head)
return 0;
match = list_entry(trav->curr, struct xt_match, list);
- if (*match->name == '\0')
- return 0;
- seq_printf(seq, "%s\n", match->name);
- return seq_has_overflowed(seq);
+ if (*match->name)
+ seq_printf(seq, "%s\n", match->name);
}
return 0;
}
@@ -1142,10 +1123,8 @@ static int xt_target_seq_show(struct seq_file *seq, void *v)
if (trav->curr == trav->head)
return 0;
target = list_entry(trav->curr, struct xt_target, list);
- if (*target->name == '\0')
- return 0;
- seq_printf(seq, "%s\n", target->name);
- return seq_has_overflowed(seq);
+ if (*target->name)
+ seq_printf(seq, "%s\n", target->name);
}
return 0;
}
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 75747aecd..f3377ce1f 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -184,7 +184,6 @@ out:
static int xt_ct_tg_check(const struct xt_tgchk_param *par,
struct xt_ct_target_info_v1 *info)
{
- struct nf_conntrack_tuple t;
struct nf_conn *ct;
int ret = -EOPNOTSUPP;
@@ -202,11 +201,11 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
if (ret < 0)
goto err1;
- memset(&t, 0, sizeof(t));
- ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
- ret = PTR_ERR(ct);
- if (IS_ERR(ct))
+ ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL);
+ if (!ct) {
+ ret = -ENOMEM;
goto err2;
+ }
ret = 0;
if ((info->ct_events || info->exp_events) &&
@@ -227,14 +226,14 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
if (ret < 0)
goto err3;
}
-
- nf_conntrack_tmpl_insert(par->net, ct);
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ nf_conntrack_get(&ct->ct_general);
out:
info->ct = ct;
return 0;
err3:
- nf_conntrack_free(ct);
+ nf_ct_tmpl_free(ct);
err2:
nf_ct_l3proto_module_put(par->family);
err1:
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index f407ebc13..29d2c31f4 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -126,6 +126,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
goto out;
}
+ sysfs_attr_init(&info->timer->attr.attr);
info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
if (!info->timer->attr.attr.name) {
ret = -ENOMEM;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index e762de5ee..8c3190e2f 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -277,6 +277,9 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
"FORWARD, OUTPUT and POSTROUTING hooks\n");
return -EINVAL;
}
+ if (par->nft_compat)
+ return 0;
+
xt_ematch_foreach(ematch, e)
if (find_syn_match(ematch))
return 0;
@@ -299,6 +302,9 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
"FORWARD, OUTPUT and POSTROUTING hooks\n");
return -EINVAL;
}
+ if (par->nft_compat)
+ return 0;
+
xt_ematch_foreach(ematch, e)
if (find_syn_match(ematch))
return 0;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 292934d23..a747eb475 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -152,6 +152,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
fl6.daddr = info->gw.in6;
fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
(iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
+ fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
dst_release(dst);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index fab6eea1b..5b4743cc0 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -73,7 +73,7 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (dev == NULL && rt->rt6i_flags & RTF_LOCAL)
ret |= XT_ADDRTYPE_LOCAL;
- if (rt->rt6i_flags & RTF_ANYCAST)
+ if (ipv6_anycast_destination((struct dst_entry *)rt, addr))
ret |= XT_ADDRTYPE_ANYCAST;
dst_release(&rt->dst);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 233452387..ebd41dc50 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -23,6 +23,7 @@ MODULE_ALIAS("ipt_mark");
MODULE_ALIAS("ip6t_mark");
MODULE_ALIAS("ipt_MARK");
MODULE_ALIAS("ip6t_MARK");
+MODULE_ALIAS("arpt_MARK");
static unsigned int
mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 89045982e..5669e5b45 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -9,14 +9,16 @@
*/
/* Kernel module which implements the set match and SET target
- * for netfilter/iptables. */
+ * for netfilter/iptables.
+ */
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_set.h>
+#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <uapi/linux/netfilter/xt_set.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -52,6 +54,7 @@ static bool
set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v0 *info = par->matchinfo;
+
ADT_OPT(opt, par->family, info->match_set.u.compat.dim,
info->match_set.u.compat.flags, 0, UINT_MAX);
@@ -68,10 +71,10 @@ compat_flags(struct xt_set_info_v0 *info)
info->u.compat.dim = IPSET_DIM_ZERO;
if (info->u.flags[0] & IPSET_MATCH_INV)
info->u.compat.flags |= IPSET_INV_MATCH;
- for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) {
+ for (i = 0; i < IPSET_DIM_MAX - 1 && info->u.flags[i]; i++) {
info->u.compat.dim++;
if (info->u.flags[i] & IPSET_SRC)
- info->u.compat.flags |= (1<<info->u.compat.dim);
+ info->u.compat.flags |= (1 << info->u.compat.dim);
}
}
@@ -88,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
info->match_set.index);
return -ENOENT;
}
- if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
+ if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
pr_warn("Protocol error: set match dimension is over the limit!\n");
ip_set_nfnl_put(par->net, info->match_set.index);
return -ERANGE;
@@ -114,6 +117,7 @@ static bool
set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v1 *info = par->matchinfo;
+
ADT_OPT(opt, par->family, info->match_set.dim,
info->match_set.flags, 0, UINT_MAX);
@@ -178,9 +182,10 @@ static bool
set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v3 *info = par->matchinfo;
+ int ret;
+
ADT_OPT(opt, par->family, info->match_set.dim,
info->match_set.flags, info->flags, UINT_MAX);
- int ret;
if (info->packets.op != IPSET_COUNTER_NONE ||
info->bytes.op != IPSET_COUNTER_NONE)
@@ -224,9 +229,10 @@ static bool
set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v4 *info = par->matchinfo;
+ int ret;
+
ADT_OPT(opt, par->family, info->match_set.dim,
info->match_set.flags, info->flags, UINT_MAX);
- int ret;
if (info->packets.op != IPSET_COUNTER_NONE ||
info->bytes.op != IPSET_COUNTER_NONE)
@@ -252,6 +258,7 @@ static unsigned int
set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v0 *info = par->targinfo;
+
ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim,
info->add_set.u.compat.flags, 0, UINT_MAX);
ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim,
@@ -290,8 +297,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
return -ENOENT;
}
}
- if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 ||
- info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) {
+ if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 ||
+ info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
pr_warn("Protocol error: SET target dimension is over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
@@ -324,6 +331,7 @@ static unsigned int
set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v1 *info = par->targinfo;
+
ADT_OPT(add_opt, par->family, info->add_set.dim,
info->add_set.flags, 0, UINT_MAX);
ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -392,6 +400,7 @@ static unsigned int
set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v2 *info = par->targinfo;
+
ADT_OPT(add_opt, par->family, info->add_set.dim,
info->add_set.flags, info->flags, info->timeout);
ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -399,8 +408,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
/* Normalize to fit into jiffies */
if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
- add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
- add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+ add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
+ add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_add(info->add_set.index, skb, par, &add_opt);
if (info->del_set.index != IPSET_INVALID_ID)
@@ -418,6 +427,8 @@ static unsigned int
set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v3 *info = par->targinfo;
+ int ret;
+
ADT_OPT(add_opt, par->family, info->add_set.dim,
info->add_set.flags, info->flags, info->timeout);
ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -425,12 +436,10 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
ADT_OPT(map_opt, par->family, info->map_set.dim,
info->map_set.flags, 0, UINT_MAX);
- int ret;
-
/* Normalize to fit into jiffies */
if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
- add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
- add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+ add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
+ add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_add(info->add_set.index, skb, par, &add_opt);
if (info->del_set.index != IPSET_INVALID_ID)
@@ -456,7 +465,6 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-
static int
set_target_v3_checkentry(const struct xt_tgchk_param *par)
{
@@ -496,8 +504,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
!(par->hook_mask & (1 << NF_INET_FORWARD |
1 << NF_INET_LOCAL_OUT |
1 << NF_INET_POST_ROUTING))) {
- pr_warn("mapping of prio or/and queue is allowed only"
- "from OUTPUT/FORWARD/POSTROUTING chains\n");
+ pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
return -EINVAL;
}
index = ip_set_nfnl_get_byindex(par->net,
@@ -518,8 +525,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
if (info->add_set.dim > IPSET_DIM_MAX ||
info->del_set.dim > IPSET_DIM_MAX ||
info->map_set.dim > IPSET_DIM_MAX) {
- pr_warn("Protocol error: SET target dimension "
- "is over the limit!\n");
+ pr_warn("Protocol error: SET target dimension is over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
@@ -545,7 +551,6 @@ set_target_v3_destroy(const struct xt_tgdtor_param *par)
ip_set_nfnl_put(par->net, info->map_set.index);
}
-
static struct xt_match set_matches[] __read_mostly = {
{
.name = "set",
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index e092cb046..43e26c881 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -205,6 +205,7 @@ static bool
socket_match(const struct sk_buff *skb, struct xt_action_param *par,
const struct xt_socket_mtinfo1 *info)
{
+ struct sk_buff *pskb = (struct sk_buff *)skb;
struct sock *sk = skb->sk;
if (!sk)
@@ -226,6 +227,10 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
if (info->flags & XT_SOCKET_TRANSPARENT)
transparent = xt_socket_sk_is_transparent(sk);
+ if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
+ transparent)
+ pskb->mark = sk->sk_mark;
+
if (sk != skb->sk)
sock_gen_put(sk);
@@ -247,7 +252,7 @@ socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
}
static bool
-socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
{
return socket_match(skb, par, par->matchinfo);
}
@@ -371,9 +376,10 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
}
static bool
-socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+ struct sk_buff *pskb = (struct sk_buff *)skb;
struct sock *sk = skb->sk;
if (!sk)
@@ -395,6 +401,10 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
if (info->flags & XT_SOCKET_TRANSPARENT)
transparent = xt_socket_sk_is_transparent(sk);
+ if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
+ transparent)
+ pskb->mark = sk->sk_mark;
+
if (sk != skb->sk)
sock_gen_put(sk);
@@ -428,6 +438,19 @@ static int socket_mt_v2_check(const struct xt_mtchk_param *par)
return 0;
}
+static int socket_mt_v3_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_socket_mtinfo3 *info =
+ (struct xt_socket_mtinfo3 *)par->matchinfo;
+
+ if (info->flags & ~XT_SOCKET_FLAGS_V3) {
+ pr_info("unknown flags 0x%x\n",
+ info->flags & ~XT_SOCKET_FLAGS_V3);
+ return -EINVAL;
+ }
+ return 0;
+}
+
static struct xt_match socket_mt_reg[] __read_mostly = {
{
.name = "socket",
@@ -442,7 +465,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
.name = "socket",
.revision = 1,
.family = NFPROTO_IPV4,
- .match = socket_mt4_v1_v2,
+ .match = socket_mt4_v1_v2_v3,
.checkentry = socket_mt_v1_check,
.matchsize = sizeof(struct xt_socket_mtinfo1),
.hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -454,7 +477,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
.name = "socket",
.revision = 1,
.family = NFPROTO_IPV6,
- .match = socket_mt6_v1_v2,
+ .match = socket_mt6_v1_v2_v3,
.checkentry = socket_mt_v1_check,
.matchsize = sizeof(struct xt_socket_mtinfo1),
.hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -466,7 +489,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
.name = "socket",
.revision = 2,
.family = NFPROTO_IPV4,
- .match = socket_mt4_v1_v2,
+ .match = socket_mt4_v1_v2_v3,
.checkentry = socket_mt_v2_check,
.matchsize = sizeof(struct xt_socket_mtinfo1),
.hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -478,7 +501,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
.name = "socket",
.revision = 2,
.family = NFPROTO_IPV6,
- .match = socket_mt6_v1_v2,
+ .match = socket_mt6_v1_v2_v3,
.checkentry = socket_mt_v2_check,
.matchsize = sizeof(struct xt_socket_mtinfo1),
.hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -486,6 +509,30 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
.me = THIS_MODULE,
},
#endif
+ {
+ .name = "socket",
+ .revision = 3,
+ .family = NFPROTO_IPV4,
+ .match = socket_mt4_v1_v2_v3,
+ .checkentry = socket_mt_v3_check,
+ .matchsize = sizeof(struct xt_socket_mtinfo1),
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ .me = THIS_MODULE,
+ },
+#ifdef XT_SOCKET_HAVE_IPV6
+ {
+ .name = "socket",
+ .revision = 3,
+ .family = NFPROTO_IPV6,
+ .match = socket_mt6_v1_v2_v3,
+ .checkentry = socket_mt_v3_check,
+ .matchsize = sizeof(struct xt_socket_mtinfo1),
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ .me = THIS_MODULE,
+ },
+#endif
};
static int __init socket_mt_init(void)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index bf6e76643..a77498548 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -76,17 +76,18 @@ struct listeners {
};
/* state bits */
-#define NETLINK_CONGESTED 0x0
+#define NETLINK_S_CONGESTED 0x0
/* flags */
-#define NETLINK_KERNEL_SOCKET 0x1
-#define NETLINK_RECV_PKTINFO 0x2
-#define NETLINK_BROADCAST_SEND_ERROR 0x4
-#define NETLINK_RECV_NO_ENOBUFS 0x8
+#define NETLINK_F_KERNEL_SOCKET 0x1
+#define NETLINK_F_RECV_PKTINFO 0x2
+#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
+#define NETLINK_F_RECV_NO_ENOBUFS 0x8
+#define NETLINK_F_LISTEN_ALL_NSID 0x10
static inline int netlink_is_kernel(struct sock *sk)
{
- return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
+ return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
}
struct netlink_table *nl_table __read_mostly;
@@ -157,7 +158,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt)
out:
spin_unlock(&netlink_tap_lock);
- if (found && nt->module)
+ if (found)
module_put(nt->module);
return found ? 0 : -ENODEV;
@@ -256,8 +257,9 @@ static void netlink_overrun(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
- if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
- if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
+ if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) {
+ if (!test_and_set_bit(NETLINK_S_CONGESTED,
+ &nlk_sk(sk)->state)) {
sk->sk_err = ENOBUFS;
sk->sk_error_report(sk);
}
@@ -270,8 +272,8 @@ static void netlink_rcv_wake(struct sock *sk)
struct netlink_sock *nlk = nlk_sk(sk);
if (skb_queue_empty(&sk->sk_receive_queue))
- clear_bit(NETLINK_CONGESTED, &nlk->state);
- if (!test_bit(NETLINK_CONGESTED, &nlk->state))
+ clear_bit(NETLINK_S_CONGESTED, &nlk->state);
+ if (!test_bit(NETLINK_S_CONGESTED, &nlk->state))
wake_up_interruptible(&nlk->wait);
}
@@ -355,25 +357,52 @@ err1:
return NULL;
}
+
+static void
+__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
+ unsigned int order)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ struct sk_buff_head *queue;
+ struct netlink_ring *ring;
+
+ queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+ ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+
+ spin_lock_bh(&queue->lock);
+
+ ring->frame_max = req->nm_frame_nr - 1;
+ ring->head = 0;
+ ring->frame_size = req->nm_frame_size;
+ ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
+
+ swap(ring->pg_vec_len, req->nm_block_nr);
+ swap(ring->pg_vec_order, order);
+ swap(ring->pg_vec, pg_vec);
+
+ __skb_queue_purge(queue);
+ spin_unlock_bh(&queue->lock);
+
+ WARN_ON(atomic_read(&nlk->mapped));
+
+ if (pg_vec)
+ free_pg_vec(pg_vec, order, req->nm_block_nr);
+}
+
static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
- bool closing, bool tx_ring)
+ bool tx_ring)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ring *ring;
- struct sk_buff_head *queue;
void **pg_vec = NULL;
unsigned int order = 0;
- int err;
ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
- if (!closing) {
- if (atomic_read(&nlk->mapped))
- return -EBUSY;
- if (atomic_read(&ring->pending))
- return -EBUSY;
- }
+ if (atomic_read(&nlk->mapped))
+ return -EBUSY;
+ if (atomic_read(&ring->pending))
+ return -EBUSY;
if (req->nm_block_nr) {
if (ring->pg_vec != NULL)
@@ -405,31 +434,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
return -EINVAL;
}
- err = -EBUSY;
mutex_lock(&nlk->pg_vec_lock);
- if (closing || atomic_read(&nlk->mapped) == 0) {
- err = 0;
- spin_lock_bh(&queue->lock);
-
- ring->frame_max = req->nm_frame_nr - 1;
- ring->head = 0;
- ring->frame_size = req->nm_frame_size;
- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
-
- swap(ring->pg_vec_len, req->nm_block_nr);
- swap(ring->pg_vec_order, order);
- swap(ring->pg_vec, pg_vec);
-
- __skb_queue_purge(queue);
- spin_unlock_bh(&queue->lock);
-
- WARN_ON(atomic_read(&nlk->mapped));
+ if (atomic_read(&nlk->mapped) == 0) {
+ __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
+ mutex_unlock(&nlk->pg_vec_lock);
+ return 0;
}
+
mutex_unlock(&nlk->pg_vec_lock);
if (pg_vec)
free_pg_vec(pg_vec, order, req->nm_block_nr);
- return err;
+
+ return -EBUSY;
}
static void netlink_mm_open(struct vm_area_struct *vma)
@@ -898,10 +915,10 @@ static void netlink_sock_destruct(struct sock *sk)
memset(&req, 0, sizeof(req));
if (nlk->rx_ring.pg_vec)
- netlink_set_ring(sk, &req, true, false);
+ __netlink_set_ring(sk, &req, false, NULL, 0);
memset(&req, 0, sizeof(req));
if (nlk->tx_ring.pg_vec)
- netlink_set_ring(sk, &req, true, true);
+ __netlink_set_ring(sk, &req, true, NULL, 0);
}
#endif /* CONFIG_NETLINK_MMAP */
@@ -1079,6 +1096,11 @@ static int netlink_insert(struct sock *sk, u32 portid)
err = __netlink_insert(table, sk);
if (err) {
+ /* In case the hashtable backend returns with -EBUSY
+ * from here, it must not escape to the caller.
+ */
+ if (unlikely(err == -EBUSY))
+ err = -EOVERFLOW;
if (err == -EEXIST)
err = -EADDRINUSE;
nlk_sk(sk)->portid = 0;
@@ -1118,14 +1140,15 @@ static struct proto netlink_proto = {
};
static int __netlink_create(struct net *net, struct socket *sock,
- struct mutex *cb_mutex, int protocol)
+ struct mutex *cb_mutex, int protocol,
+ int kern)
{
struct sock *sk;
struct netlink_sock *nlk;
sock->ops = &netlink_ops;
- sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
+ sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern);
if (!sk)
return -ENOMEM;
@@ -1187,7 +1210,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
if (err < 0)
goto out;
- err = __netlink_create(net, sock, cb_mutex, protocol);
+ err = __netlink_create(net, sock, cb_mutex, protocol, kern);
if (err < 0)
goto out_module;
@@ -1297,20 +1320,24 @@ static int netlink_autobind(struct socket *sock)
struct netlink_table *table = &nl_table[sk->sk_protocol];
s32 portid = task_tgid_vnr(current);
int err;
- static s32 rover = -4097;
+ s32 rover = -4096;
+ bool ok;
retry:
cond_resched();
rcu_read_lock();
- if (__netlink_lookup(table, portid, net)) {
+ ok = !__netlink_lookup(table, portid, net);
+ rcu_read_unlock();
+ if (!ok) {
/* Bind collision, search negative portid values. */
- portid = rover--;
- if (rover > -4097)
+ if (rover == -4096)
+ /* rover will be in range [S32_MIN, -4097] */
+ rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN);
+ else if (rover >= -4096)
rover = -4097;
- rcu_read_unlock();
+ portid = rover--;
goto retry;
}
- rcu_read_unlock();
err = netlink_insert(sk, portid);
if (err == -EADDRINUSE)
@@ -1657,7 +1684,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
nlk = nlk_sk(sk);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
!netlink_skb_is_mmaped(skb)) {
DECLARE_WAITQUEUE(wait, current);
if (!*timeo) {
@@ -1672,7 +1699,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
add_wait_queue(&nlk->wait, &wait);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
!sock_flag(sk, SOCK_DEAD))
*timeo = schedule_timeout(*timeo);
@@ -1896,7 +1923,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
struct netlink_sock *nlk = nlk_sk(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
- !test_bit(NETLINK_CONGESTED, &nlk->state)) {
+ !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
netlink_skb_set_owner_r(skb, sk);
__netlink_sendskb(sk, skb);
return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
@@ -1932,8 +1959,17 @@ static void do_one_broadcast(struct sock *sk,
!test_bit(p->group - 1, nlk->groups))
return;
- if (!net_eq(sock_net(sk), p->net))
- return;
+ if (!net_eq(sock_net(sk), p->net)) {
+ if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
+ return;
+
+ if (!peernet_has_id(sock_net(sk), p->net))
+ return;
+
+ if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
+ CAP_NET_BROADCAST))
+ return;
+ }
if (p->failure) {
netlink_overrun(sk);
@@ -1957,23 +1993,33 @@ static void do_one_broadcast(struct sock *sk,
netlink_overrun(sk);
/* Clone failed. Notify ALL listeners. */
p->failure = 1;
- if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
+ if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
p->delivery_failure = 1;
- } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+ goto out;
+ }
+ if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
- } else if (sk_filter(sk, p->skb2)) {
+ goto out;
+ }
+ if (sk_filter(sk, p->skb2)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
- } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
+ goto out;
+ }
+ NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
+ NETLINK_CB(p->skb2).nsid_is_set = true;
+ val = netlink_broadcast_deliver(sk, p->skb2);
+ if (val < 0) {
netlink_overrun(sk);
- if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
+ if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
p->delivery_failure = 1;
} else {
p->congested |= val;
p->delivered = 1;
p->skb2 = NULL;
}
+out:
sock_put(sk);
}
@@ -2058,7 +2104,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
!test_bit(p->group - 1, nlk->groups))
goto out;
- if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
+ if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) {
ret = 1;
goto out;
}
@@ -2077,7 +2123,7 @@ out:
* @code: error code, must be negative (as usual in kernelspace)
*
* This function returns the number of broadcast listeners that have set the
- * NETLINK_RECV_NO_ENOBUFS socket option.
+ * NETLINK_NO_ENOBUFS socket option.
*/
int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
{
@@ -2137,9 +2183,9 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case NETLINK_PKTINFO:
if (val)
- nlk->flags |= NETLINK_RECV_PKTINFO;
+ nlk->flags |= NETLINK_F_RECV_PKTINFO;
else
- nlk->flags &= ~NETLINK_RECV_PKTINFO;
+ nlk->flags &= ~NETLINK_F_RECV_PKTINFO;
err = 0;
break;
case NETLINK_ADD_MEMBERSHIP:
@@ -2168,18 +2214,18 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
}
case NETLINK_BROADCAST_ERROR:
if (val)
- nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
+ nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR;
else
- nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
+ nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR;
err = 0;
break;
case NETLINK_NO_ENOBUFS:
if (val) {
- nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
- clear_bit(NETLINK_CONGESTED, &nlk->state);
+ nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS;
+ clear_bit(NETLINK_S_CONGESTED, &nlk->state);
wake_up_interruptible(&nlk->wait);
} else {
- nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
+ nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS;
}
err = 0;
break;
@@ -2197,11 +2243,21 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
if (copy_from_user(&req, optval, sizeof(req)))
return -EFAULT;
- err = netlink_set_ring(sk, &req, false,
+ err = netlink_set_ring(sk, &req,
optname == NETLINK_TX_RING);
break;
}
#endif /* CONFIG_NETLINK_MMAP */
+ case NETLINK_LISTEN_ALL_NSID:
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
+ return -EPERM;
+
+ if (val)
+ nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
+ else
+ nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -2228,7 +2284,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
+ val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
@@ -2238,7 +2294,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
+ val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
@@ -2248,12 +2304,34 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
+ val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
err = 0;
break;
+ case NETLINK_LIST_MEMBERSHIPS: {
+ int pos, idx, shift;
+
+ err = 0;
+ netlink_table_grab();
+ for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
+ if (len - pos < sizeof(u32))
+ break;
+
+ idx = pos / sizeof(unsigned long);
+ shift = (pos % sizeof(unsigned long)) * 8;
+ if (put_user((u32)(nlk->groups[idx] >> shift),
+ (u32 __user *)(optval + pos))) {
+ err = -EFAULT;
+ break;
+ }
+ }
+ if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
+ err = -EFAULT;
+ netlink_table_ungrab();
+ break;
+ }
default:
err = -ENOPROTOOPT;
}
@@ -2268,6 +2346,16 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
}
+static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ if (!NETLINK_CB(skb).nsid_is_set)
+ return;
+
+ put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int),
+ &NETLINK_CB(skb).nsid);
+}
+
static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
@@ -2313,7 +2401,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
* sendmsg(), but that's what we've got...
*/
if (netlink_tx_is_mmaped(sk) &&
- msg->msg_iter.type == ITER_IOVEC &&
+ iter_is_iovec(&msg->msg_iter) &&
msg->msg_iter.nr_segs == 1 &&
msg->msg_iter.iov->iov_base == NULL) {
err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
@@ -2419,8 +2507,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
msg->msg_namelen = sizeof(*addr);
}
- if (nlk->flags & NETLINK_RECV_PKTINFO)
+ if (nlk->flags & NETLINK_F_RECV_PKTINFO)
netlink_cmsg_recv_pktinfo(msg, skb);
+ if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ netlink_cmsg_listen_all_nsid(sk, msg, skb);
memset(&scm, 0, sizeof(scm));
scm.creds = *NETLINK_CREDS(skb);
@@ -2474,17 +2564,10 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- /*
- * We have to just have a reference on the net from sk, but don't
- * get_net it. Besides, we cannot get and then put the net here.
- * So we create one inside init_net and the move it to net.
- */
-
- if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
+ if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0)
goto out_sock_release_nosk;
sk = sock->sk;
- sk_change_net(sk, net);
if (!cfg || cfg->groups < 32)
groups = 32;
@@ -2503,7 +2586,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
goto out_sock_release;
nlk = nlk_sk(sk);
- nlk->flags |= NETLINK_KERNEL_SOCKET;
+ nlk->flags |= NETLINK_F_KERNEL_SOCKET;
netlink_table_grab();
if (!nl_table[unit].registered) {
@@ -2540,7 +2623,10 @@ EXPORT_SYMBOL(__netlink_kernel_create);
void
netlink_kernel_release(struct sock *sk)
{
- sk_release_kernel(sk);
+ if (sk == NULL || sk->sk_socket == NULL)
+ return;
+
+ sock_release(sk->sk_socket);
}
EXPORT_SYMBOL(netlink_kernel_release);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index b987fd56c..ed212ffc1 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -433,7 +433,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_SEQPACKET || protocol != 0)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto);
+ sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, kern);
if (sk == NULL)
return -ENOMEM;
@@ -476,7 +476,7 @@ static struct sock *nr_make_new(struct sock *osk)
if (osk->sk_type != SOCK_SEQPACKET)
return NULL;
- sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot);
+ sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot, 0);
if (sk == NULL)
return NULL;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 96b64d2f6..d72a4f155 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -31,7 +31,6 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
-#include <linux/netfilter.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <net/netrom.h>
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 2277276f5..54e40fa47 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -40,7 +40,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto,
read_lock(&proto_tab_lock);
if (proto_tab[proto] && try_module_get(proto_tab[proto]->owner)) {
- rc = proto_tab[proto]->create(net, sock, proto_tab[proto]);
+ rc = proto_tab[proto]->create(net, sock, proto_tab[proto], kern);
module_put(proto_tab[proto]->owner);
}
read_unlock(&proto_tab_lock);
diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
index de1789e3c..1f68724d4 100644
--- a/net/nfc/llcp.h
+++ b/net/nfc/llcp.h
@@ -225,7 +225,7 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
struct sk_buff *skb, u8 direction);
/* Sock API */
-struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp);
+struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern);
void nfc_llcp_sock_free(struct nfc_llcp_sock *sock);
void nfc_llcp_accept_unlink(struct sock *sk);
void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk);
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index b18f07ccb..98876274a 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -934,7 +934,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
sock->ssap = ssap;
}
- new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC);
+ new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC, 0);
if (new_sk == NULL) {
reason = LLCP_DM_REJ;
release_sock(&sock->sk);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 9578bd6a4..b7de0da46 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -942,12 +942,12 @@ static void llcp_sock_destruct(struct sock *sk)
}
}
-struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp)
+struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern)
{
struct sock *sk;
struct nfc_llcp_sock *llcp_sock;
- sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto);
+ sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto, kern);
if (!sk)
return NULL;
@@ -993,7 +993,7 @@ void nfc_llcp_sock_free(struct nfc_llcp_sock *sock)
}
static int llcp_sock_create(struct net *net, struct socket *sock,
- const struct nfc_protocol *nfc_proto)
+ const struct nfc_protocol *nfc_proto, int kern)
{
struct sock *sk;
@@ -1009,7 +1009,7 @@ static int llcp_sock_create(struct net *net, struct socket *sock,
else
sock->ops = &llcp_sock_ops;
- sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC);
+ sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern);
if (sk == NULL)
return -ENOMEM;
diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig
index a4f1e42e3..901c1ddba 100644
--- a/net/nfc/nci/Kconfig
+++ b/net/nfc/nci/Kconfig
@@ -19,3 +19,10 @@ config NFC_NCI_SPI
an NFC Controller (NFCC) and a Device Host (DH).
Say yes if you use an NCI driver that requires SPI link layer.
+
+config NFC_NCI_UART
+ depends on NFC_NCI && TTY
+ tristate "NCI over UART protocol support"
+ default n
+ help
+ Say yes if you use an NCI driver that requires UART link layer.
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index 7ed894926..b4b85b82e 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -7,3 +7,6 @@ obj-$(CONFIG_NFC_NCI) += nci.o
nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o
nci-$(CONFIG_NFC_NCI_SPI) += spi.o
+
+nci_uart-y += uart.o
+obj-$(CONFIG_NFC_NCI_UART) += nci_uart.o
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 49ff32106..95af2d24d 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -28,6 +28,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__
#include <linux/module.h>
+#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <linux/completion.h>
@@ -73,6 +74,7 @@ void nci_req_complete(struct nci_dev *ndev, int result)
complete(&ndev->req_completion);
}
}
+EXPORT_SYMBOL(nci_req_complete);
static void nci_req_cancel(struct nci_dev *ndev, int err)
{
@@ -323,6 +325,32 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
sizeof(struct nci_rf_deactivate_cmd), &cmd);
}
+struct nci_prop_cmd_param {
+ __u16 opcode;
+ size_t len;
+ __u8 *payload;
+};
+
+static void nci_prop_cmd_req(struct nci_dev *ndev, unsigned long opt)
+{
+ struct nci_prop_cmd_param *param = (struct nci_prop_cmd_param *)opt;
+
+ nci_send_cmd(ndev, param->opcode, param->len, param->payload);
+}
+
+int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
+{
+ struct nci_prop_cmd_param param;
+
+ param.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, oid);
+ param.len = len;
+ param.payload = payload;
+
+ return __nci_request(ndev, nci_prop_cmd_req, (unsigned long)&param,
+ msecs_to_jiffies(NCI_CMD_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_prop_cmd);
+
static int nci_open_device(struct nci_dev *ndev)
{
int rc = 0;
@@ -343,11 +371,17 @@ static int nci_open_device(struct nci_dev *ndev)
set_bit(NCI_INIT, &ndev->flags);
- rc = __nci_request(ndev, nci_reset_req, 0,
- msecs_to_jiffies(NCI_RESET_TIMEOUT));
+ if (ndev->ops->init)
+ rc = ndev->ops->init(ndev);
- if (ndev->ops->setup)
- ndev->ops->setup(ndev);
+ if (!rc) {
+ rc = __nci_request(ndev, nci_reset_req, 0,
+ msecs_to_jiffies(NCI_RESET_TIMEOUT));
+ }
+
+ if (!rc && ndev->ops->setup) {
+ rc = ndev->ops->setup(ndev);
+ }
if (!rc) {
rc = __nci_request(ndev, nci_init_req, 0,
@@ -407,6 +441,12 @@ static int nci_close_device(struct nci_dev *ndev)
set_bit(NCI_INIT, &ndev->flags);
__nci_request(ndev, nci_reset_req, 0,
msecs_to_jiffies(NCI_RESET_TIMEOUT));
+
+ /* After this point our queues are empty
+ * and no works are scheduled.
+ */
+ ndev->ops->close(ndev);
+
clear_bit(NCI_INIT, &ndev->flags);
del_timer_sync(&ndev->cmd_timer);
@@ -414,10 +454,6 @@ static int nci_close_device(struct nci_dev *ndev)
/* Flush cmd wq */
flush_workqueue(ndev->cmd_wq);
- /* After this point our queues are empty
- * and no works are scheduled. */
- ndev->ops->close(ndev);
-
/* Clear flags */
ndev->flags = 0;
@@ -762,7 +798,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
nci_request(ndev, nci_rf_deactivate_req,
- NCI_DEACTIVATE_TYPE_SLEEP_MODE,
+ NCI_DEACTIVATE_TYPE_IDLE_MODE,
msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}
}
@@ -961,6 +997,14 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
return NULL;
ndev->ops = ops;
+
+ if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) {
+ pr_err("Too many proprietary commands: %zd\n",
+ ops->n_prop_ops);
+ ops->prop_ops = NULL;
+ ops->n_prop_ops = 0;
+ }
+
ndev->tx_headroom = tx_headroom;
ndev->tx_tailroom = tx_tailroom;
init_completion(&ndev->req_completion);
@@ -1165,6 +1209,49 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
return 0;
}
+/* Proprietary commands API */
+static struct nci_prop_ops *prop_cmd_lookup(struct nci_dev *ndev,
+ __u16 opcode)
+{
+ size_t i;
+ struct nci_prop_ops *prop_op;
+
+ if (!ndev->ops->prop_ops || !ndev->ops->n_prop_ops)
+ return NULL;
+
+ for (i = 0; i < ndev->ops->n_prop_ops; i++) {
+ prop_op = &ndev->ops->prop_ops[i];
+ if (prop_op->opcode == opcode)
+ return prop_op;
+ }
+
+ return NULL;
+}
+
+int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
+ struct sk_buff *skb)
+{
+ struct nci_prop_ops *prop_op;
+
+ prop_op = prop_cmd_lookup(ndev, rsp_opcode);
+ if (!prop_op || !prop_op->rsp)
+ return -ENOTSUPP;
+
+ return prop_op->rsp(ndev, skb);
+}
+
+int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode,
+ struct sk_buff *skb)
+{
+ struct nci_prop_ops *prop_op;
+
+ prop_op = prop_cmd_lookup(ndev, ntf_opcode);
+ if (!prop_op || !prop_op->ntf)
+ return -ENOTSUPP;
+
+ return prop_op->ntf(ndev, skb);
+}
+
/* ---- NCI TX Data worker thread ---- */
static void nci_tx_work(struct work_struct *work)
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index ed54ec533..af002df64 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -639,22 +639,19 @@ int nci_hci_dev_session_init(struct nci_dev *ndev)
ndev->hci_dev->init_data.gates[0].gate,
ndev->hci_dev->init_data.gates[0].pipe);
if (r < 0)
- goto exit;
+ return r;
r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE,
NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, &skb);
if (r < 0)
- goto exit;
+ return r;
if (skb->len &&
skb->len == strlen(ndev->hci_dev->init_data.session_id) &&
- memcmp(ndev->hci_dev->init_data.session_id,
- skb->data, skb->len) == 0 &&
+ !memcmp(ndev->hci_dev->init_data.session_id, skb->data, skb->len) &&
ndev->ops->hci_load_session) {
/* Restore gate<->pipe table from some proprietary location. */
r = ndev->ops->hci_load_session(ndev);
- if (r < 0)
- goto exit;
} else {
r = nci_hci_dev_connect_gates(ndev,
ndev->hci_dev->init_data.gate_count,
@@ -667,8 +664,6 @@ int nci_hci_dev_session_init(struct nci_dev *ndev)
ndev->hci_dev->init_data.session_id,
strlen(ndev->hci_dev->init_data.session_id));
}
- if (r == 0)
- goto exit;
exit:
kfree_skb(skb);
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 321807107..5d1c2e391 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -758,6 +758,15 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
/* strip the nci control header */
skb_pull(skb, NCI_CTRL_HDR_SIZE);
+ if (nci_opcode_gid(ntf_opcode) == NCI_GID_PROPRIETARY) {
+ if (nci_prop_ntf_packet(ndev, ntf_opcode, skb)) {
+ pr_err("unsupported ntf opcode 0x%x\n",
+ ntf_opcode);
+ }
+
+ goto end;
+ }
+
switch (ntf_opcode) {
case NCI_OP_CORE_CONN_CREDITS_NTF:
nci_core_conn_credits_ntf_packet(ndev, skb);
@@ -796,5 +805,6 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
break;
}
+end:
kfree_skb(skb);
}
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 02486bc2c..408bd8f85 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -296,6 +296,15 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
/* strip the nci control header */
skb_pull(skb, NCI_CTRL_HDR_SIZE);
+ if (nci_opcode_gid(rsp_opcode) == NCI_GID_PROPRIETARY) {
+ if (nci_prop_rsp_packet(ndev, rsp_opcode, skb) == -ENOTSUPP) {
+ pr_err("unsupported rsp opcode 0x%x\n",
+ rsp_opcode);
+ }
+
+ goto end;
+ }
+
switch (rsp_opcode) {
case NCI_OP_CORE_RESET_RSP:
nci_core_reset_rsp_packet(ndev, skb);
@@ -346,6 +355,7 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
break;
}
+end:
kfree_skb(skb);
/* trigger the next cmd */
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
new file mode 100644
index 000000000..21d887567
--- /dev/null
+++ b/net/nfc/nci/uart.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (C) 2015, Marvell International Ltd.
+ *
+ * This software file (the "File") is distributed by Marvell International
+ * Ltd. under the terms of the GNU General Public License Version 2, June 1991
+ * (the "License"). You may use, redistribute and/or modify this File in
+ * accordance with the terms and conditions of the License, a copy of which
+ * is available on the worldwide web at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
+ *
+ * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EXPRESSLY DISCLAIMED. The License provides additional details about
+ * this warranty disclaimer.
+ */
+
+/* Inspired (hugely) by HCI LDISC implementation in Bluetooth.
+ *
+ * Copyright (C) 2000-2001 Qualcomm Incorporated
+ * Copyright (C) 2002-2003 Maxim Krasnyansky <maxk@qualcomm.com>
+ * Copyright (C) 2004-2005 Marcel Holtmann <marcel@holtmann.org>
+ */
+
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/interrupt.h>
+#include <linux/ptrace.h>
+#include <linux/poll.h>
+
+#include <linux/slab.h>
+#include <linux/tty.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/signal.h>
+#include <linux/ioctl.h>
+#include <linux/skbuff.h>
+
+#include <net/nfc/nci.h>
+#include <net/nfc/nci_core.h>
+
+/* TX states */
+#define NCI_UART_SENDING 1
+#define NCI_UART_TX_WAKEUP 2
+
+static struct nci_uart *nci_uart_drivers[NCI_UART_DRIVER_MAX];
+
+static inline struct sk_buff *nci_uart_dequeue(struct nci_uart *nu)
+{
+ struct sk_buff *skb = nu->tx_skb;
+
+ if (!skb)
+ skb = skb_dequeue(&nu->tx_q);
+ else
+ nu->tx_skb = NULL;
+
+ return skb;
+}
+
+static inline int nci_uart_queue_empty(struct nci_uart *nu)
+{
+ if (nu->tx_skb)
+ return 0;
+
+ return skb_queue_empty(&nu->tx_q);
+}
+
+static int nci_uart_tx_wakeup(struct nci_uart *nu)
+{
+ if (test_and_set_bit(NCI_UART_SENDING, &nu->tx_state)) {
+ set_bit(NCI_UART_TX_WAKEUP, &nu->tx_state);
+ return 0;
+ }
+
+ schedule_work(&nu->write_work);
+
+ return 0;
+}
+
+static void nci_uart_write_work(struct work_struct *work)
+{
+ struct nci_uart *nu = container_of(work, struct nci_uart, write_work);
+ struct tty_struct *tty = nu->tty;
+ struct sk_buff *skb;
+
+restart:
+ clear_bit(NCI_UART_TX_WAKEUP, &nu->tx_state);
+
+ if (nu->ops.tx_start)
+ nu->ops.tx_start(nu);
+
+ while ((skb = nci_uart_dequeue(nu))) {
+ int len;
+
+ set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+ len = tty->ops->write(tty, skb->data, skb->len);
+ skb_pull(skb, len);
+ if (skb->len) {
+ nu->tx_skb = skb;
+ break;
+ }
+ kfree_skb(skb);
+ }
+
+ if (test_bit(NCI_UART_TX_WAKEUP, &nu->tx_state))
+ goto restart;
+
+ if (nu->ops.tx_done && nci_uart_queue_empty(nu))
+ nu->ops.tx_done(nu);
+
+ clear_bit(NCI_UART_SENDING, &nu->tx_state);
+}
+
+static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver)
+{
+ struct nci_uart *nu = NULL;
+ int ret;
+
+ if (driver >= NCI_UART_DRIVER_MAX)
+ return -EINVAL;
+
+ if (!nci_uart_drivers[driver])
+ return -ENOENT;
+
+ nu = kzalloc(sizeof(*nu), GFP_KERNEL);
+ if (!nu)
+ return -ENOMEM;
+
+ memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart));
+ nu->tty = tty;
+ tty->disc_data = nu;
+ skb_queue_head_init(&nu->tx_q);
+ INIT_WORK(&nu->write_work, nci_uart_write_work);
+ spin_lock_init(&nu->rx_lock);
+
+ ret = nu->ops.open(nu);
+ if (ret) {
+ tty->disc_data = NULL;
+ kfree(nu);
+ } else if (!try_module_get(nu->owner)) {
+ nu->ops.close(nu);
+ tty->disc_data = NULL;
+ kfree(nu);
+ return -ENOENT;
+ }
+ return ret;
+}
+
+/* ------ LDISC part ------ */
+
+/* nci_uart_tty_open
+ *
+ * Called when line discipline changed to NCI_UART.
+ *
+ * Arguments:
+ * tty pointer to tty info structure
+ * Return Value:
+ * 0 if success, otherwise error code
+ */
+static int nci_uart_tty_open(struct tty_struct *tty)
+{
+ /* Error if the tty has no write op instead of leaving an exploitable
+ * hole
+ */
+ if (!tty->ops->write)
+ return -EOPNOTSUPP;
+
+ tty->disc_data = NULL;
+ tty->receive_room = 65536;
+
+ /* Flush any pending characters in the driver and line discipline. */
+
+ /* FIXME: why is this needed. Note don't use ldisc_ref here as the
+ * open path is before the ldisc is referencable.
+ */
+
+ if (tty->ldisc->ops->flush_buffer)
+ tty->ldisc->ops->flush_buffer(tty);
+ tty_driver_flush_buffer(tty);
+
+ return 0;
+}
+
+/* nci_uart_tty_close()
+ *
+ * Called when the line discipline is changed to something
+ * else, the tty is closed, or the tty detects a hangup.
+ */
+static void nci_uart_tty_close(struct tty_struct *tty)
+{
+ struct nci_uart *nu = (void *)tty->disc_data;
+
+ /* Detach from the tty */
+ tty->disc_data = NULL;
+
+ if (!nu)
+ return;
+
+ if (nu->tx_skb)
+ kfree_skb(nu->tx_skb);
+ if (nu->rx_skb)
+ kfree_skb(nu->rx_skb);
+
+ skb_queue_purge(&nu->tx_q);
+
+ nu->ops.close(nu);
+ nu->tty = NULL;
+ module_put(nu->owner);
+
+ cancel_work_sync(&nu->write_work);
+
+ kfree(nu);
+}
+
+/* nci_uart_tty_wakeup()
+ *
+ * Callback for transmit wakeup. Called when low level
+ * device driver can accept more send data.
+ *
+ * Arguments: tty pointer to associated tty instance data
+ * Return Value: None
+ */
+static void nci_uart_tty_wakeup(struct tty_struct *tty)
+{
+ struct nci_uart *nu = (void *)tty->disc_data;
+
+ if (!nu)
+ return;
+
+ clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+
+ if (tty != nu->tty)
+ return;
+
+ nci_uart_tx_wakeup(nu);
+}
+
+/* nci_uart_tty_receive()
+ *
+ * Called by tty low level driver when receive data is
+ * available.
+ *
+ * Arguments: tty pointer to tty isntance data
+ * data pointer to received data
+ * flags pointer to flags for data
+ * count count of received data in bytes
+ *
+ * Return Value: None
+ */
+static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data,
+ char *flags, int count)
+{
+ struct nci_uart *nu = (void *)tty->disc_data;
+
+ if (!nu || tty != nu->tty)
+ return;
+
+ spin_lock(&nu->rx_lock);
+ nu->ops.recv_buf(nu, (void *)data, flags, count);
+ spin_unlock(&nu->rx_lock);
+
+ tty_unthrottle(tty);
+}
+
+/* nci_uart_tty_ioctl()
+ *
+ * Process IOCTL system call for the tty device.
+ *
+ * Arguments:
+ *
+ * tty pointer to tty instance data
+ * file pointer to open file object for device
+ * cmd IOCTL command code
+ * arg argument for IOCTL call (cmd dependent)
+ *
+ * Return Value: Command dependent
+ */
+static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct nci_uart *nu = (void *)tty->disc_data;
+ int err = 0;
+
+ switch (cmd) {
+ case NCIUARTSETDRIVER:
+ if (!nu)
+ return nci_uart_set_driver(tty, (unsigned int)arg);
+ else
+ return -EBUSY;
+ break;
+ default:
+ err = n_tty_ioctl_helper(tty, file, cmd, arg);
+ break;
+ }
+
+ return err;
+}
+
+/* We don't provide read/write/poll interface for user space. */
+static ssize_t nci_uart_tty_read(struct tty_struct *tty, struct file *file,
+ unsigned char __user *buf, size_t nr)
+{
+ return 0;
+}
+
+static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file,
+ const unsigned char *data, size_t count)
+{
+ return 0;
+}
+
+static unsigned int nci_uart_tty_poll(struct tty_struct *tty,
+ struct file *filp, poll_table *wait)
+{
+ return 0;
+}
+
+static int nci_uart_send(struct nci_uart *nu, struct sk_buff *skb)
+{
+ /* Queue TX packet */
+ skb_queue_tail(&nu->tx_q, skb);
+
+ /* Try to start TX (if possible) */
+ nci_uart_tx_wakeup(nu);
+
+ return 0;
+}
+
+/* -- Default recv_buf handler --
+ *
+ * This handler supposes that NCI frames are sent over UART link without any
+ * framing. It reads NCI header, retrieve the packet size and once all packet
+ * bytes are received it passes it to nci_uart driver for processing.
+ */
+static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
+ char *flags, int count)
+{
+ int chunk_len;
+
+ if (!nu->ndev) {
+ nfc_err(nu->tty->dev,
+ "receive data from tty but no NCI dev is attached yet, drop buffer\n");
+ return 0;
+ }
+
+ /* Decode all incoming data in packets
+ * and enqueue then for processing.
+ */
+ while (count > 0) {
+ /* If this is the first data of a packet, allocate a buffer */
+ if (!nu->rx_skb) {
+ nu->rx_packet_len = -1;
+ nu->rx_skb = nci_skb_alloc(nu->ndev,
+ NCI_MAX_PACKET_SIZE,
+ GFP_KERNEL);
+ if (!nu->rx_skb)
+ return -ENOMEM;
+ }
+
+ /* Eat byte after byte till full packet header is received */
+ if (nu->rx_skb->len < NCI_CTRL_HDR_SIZE) {
+ *skb_put(nu->rx_skb, 1) = *data++;
+ --count;
+ continue;
+ }
+
+ /* Header was received but packet len was not read */
+ if (nu->rx_packet_len < 0)
+ nu->rx_packet_len = NCI_CTRL_HDR_SIZE +
+ nci_plen(nu->rx_skb->data);
+
+ /* Compute how many bytes are missing and how many bytes can
+ * be consumed.
+ */
+ chunk_len = nu->rx_packet_len - nu->rx_skb->len;
+ if (count < chunk_len)
+ chunk_len = count;
+ memcpy(skb_put(nu->rx_skb, chunk_len), data, chunk_len);
+ data += chunk_len;
+ count -= chunk_len;
+
+ /* Chcek if packet is fully received */
+ if (nu->rx_packet_len == nu->rx_skb->len) {
+ /* Pass RX packet to driver */
+ if (nu->ops.recv(nu, nu->rx_skb) != 0)
+ nfc_err(nu->tty->dev, "corrupted RX packet\n");
+ /* Next packet will be a new one */
+ nu->rx_skb = NULL;
+ }
+ }
+
+ return 0;
+}
+
+/* -- Default recv handler -- */
+static int nci_uart_default_recv(struct nci_uart *nu, struct sk_buff *skb)
+{
+ return nci_recv_frame(nu->ndev, skb);
+}
+
+int nci_uart_register(struct nci_uart *nu)
+{
+ if (!nu || !nu->ops.open ||
+ !nu->ops.recv || !nu->ops.close)
+ return -EINVAL;
+
+ /* Set the send callback */
+ nu->ops.send = nci_uart_send;
+
+ /* Install default handlers if not overridden */
+ if (!nu->ops.recv_buf)
+ nu->ops.recv_buf = nci_uart_default_recv_buf;
+ if (!nu->ops.recv)
+ nu->ops.recv = nci_uart_default_recv;
+
+ /* Add this driver in the driver list */
+ if (nci_uart_drivers[nu->driver]) {
+ pr_err("driver %d is already registered\n", nu->driver);
+ return -EBUSY;
+ }
+ nci_uart_drivers[nu->driver] = nu;
+
+ pr_info("NCI uart driver '%s [%d]' registered\n", nu->name, nu->driver);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nci_uart_register);
+
+void nci_uart_unregister(struct nci_uart *nu)
+{
+ pr_info("NCI uart driver '%s [%d]' unregistered\n", nu->name,
+ nu->driver);
+
+ /* Remove this driver from the driver list */
+ nci_uart_drivers[nu->driver] = NULL;
+}
+EXPORT_SYMBOL_GPL(nci_uart_unregister);
+
+void nci_uart_set_config(struct nci_uart *nu, int baudrate, int flow_ctrl)
+{
+ struct ktermios new_termios;
+
+ if (!nu->tty)
+ return;
+
+ down_read(&nu->tty->termios_rwsem);
+ new_termios = nu->tty->termios;
+ up_read(&nu->tty->termios_rwsem);
+ tty_termios_encode_baud_rate(&new_termios, baudrate, baudrate);
+
+ if (flow_ctrl)
+ new_termios.c_cflag |= CRTSCTS;
+ else
+ new_termios.c_cflag &= ~CRTSCTS;
+
+ tty_set_termios(nu->tty, &new_termios);
+}
+EXPORT_SYMBOL_GPL(nci_uart_set_config);
+
+static struct tty_ldisc_ops nci_uart_ldisc = {
+ .magic = TTY_LDISC_MAGIC,
+ .owner = THIS_MODULE,
+ .name = "n_nci",
+ .open = nci_uart_tty_open,
+ .close = nci_uart_tty_close,
+ .read = nci_uart_tty_read,
+ .write = nci_uart_tty_write,
+ .poll = nci_uart_tty_poll,
+ .receive_buf = nci_uart_tty_receive,
+ .write_wakeup = nci_uart_tty_wakeup,
+ .ioctl = nci_uart_tty_ioctl,
+};
+
+static int __init nci_uart_init(void)
+{
+ memset(nci_uart_drivers, 0, sizeof(nci_uart_drivers));
+ return tty_register_ldisc(N_NCI, &nci_uart_ldisc);
+}
+
+static void __exit nci_uart_exit(void)
+{
+ tty_unregister_ldisc(N_NCI);
+}
+
+module_init(nci_uart_init);
+module_exit(nci_uart_exit);
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_DESCRIPTION("NFC NCI UART driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_LDISC(N_NCI);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 376303671..f85f37ed1 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -5,6 +5,12 @@
* Lauro Ramos Venancio <lauro.venancio@openbossa.org>
* Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
*
+ * Vendor commands implementation based on net/wireless/nl80211.c
+ * which is:
+ *
+ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -1489,6 +1495,50 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
}
+static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nfc_dev *dev;
+ struct nfc_vendor_cmd *cmd;
+ u32 dev_idx, vid, subcmd;
+ u8 *data;
+ size_t data_len;
+ int i;
+
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+ !info->attrs[NFC_ATTR_VENDOR_ID] ||
+ !info->attrs[NFC_ATTR_VENDOR_SUBCMD])
+ return -EINVAL;
+
+ dev_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+ vid = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_ID]);
+ subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
+
+ dev = nfc_get_device(dev_idx);
+ if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
+ return -ENODEV;
+
+ data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
+ if (data) {
+ data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
+ if (data_len == 0)
+ return -EINVAL;
+ } else {
+ data_len = 0;
+ }
+
+ for (i = 0; i < dev->n_vendor_cmds; i++) {
+ cmd = &dev->vendor_cmds[i];
+
+ if (cmd->vendor_id != vid || cmd->subcmd != subcmd)
+ continue;
+
+ return cmd->doit(dev, data, data_len);
+ }
+
+ return -EOPNOTSUPP;
+}
+
static const struct genl_ops nfc_genl_ops[] = {
{
.cmd = NFC_CMD_GET_DEVICE,
@@ -1579,6 +1629,11 @@ static const struct genl_ops nfc_genl_ops[] = {
.doit = nfc_genl_activate_target,
.policy = nfc_genl_policy,
},
+ {
+ .cmd = NFC_CMD_VENDOR,
+ .doit = nfc_genl_vendor_cmd,
+ .policy = nfc_genl_policy,
+ },
};
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index a8ce80b47..5c93e8412 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -30,7 +30,7 @@ struct nfc_protocol {
struct proto *proto;
struct module *owner;
int (*create)(struct net *net, struct socket *sock,
- const struct nfc_protocol *nfc_proto);
+ const struct nfc_protocol *nfc_proto, int kern);
};
struct nfc_rawsock {
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 82b4e8024..e9a91488f 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -334,7 +334,7 @@ static void rawsock_destruct(struct sock *sk)
}
static int rawsock_create(struct net *net, struct socket *sock,
- const struct nfc_protocol *nfc_proto)
+ const struct nfc_protocol *nfc_proto, int kern)
{
struct sock *sk;
@@ -348,7 +348,7 @@ static int rawsock_create(struct net *net, struct socket *sock,
else
sock->ops = &rawsock_ops;
- sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto);
+ sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ed6b0f8dd..15840401a 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -59,7 +59,7 @@ config OPENVSWITCH_VXLAN
config OPENVSWITCH_GENEVE
tristate "Open vSwitch Geneve tunneling support"
depends on OPENVSWITCH
- depends on GENEVE
+ depends on GENEVE_CORE
default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create geneve vport.
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b491c1c29..ee34f474a 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -273,28 +273,36 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
-static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
- __be32 *addr, __be32 new_addr)
+static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
+ __be32 addr, __be32 new_addr)
{
int transport_len = skb->len - skb_transport_offset(skb);
+ if (nh->frag_off & htons(IP_OFFSET))
+ return;
+
if (nh->protocol == IPPROTO_TCP) {
if (likely(transport_len >= sizeof(struct tcphdr)))
inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
- *addr, new_addr, 1);
+ addr, new_addr, 1);
} else if (nh->protocol == IPPROTO_UDP) {
if (likely(transport_len >= sizeof(struct udphdr))) {
struct udphdr *uh = udp_hdr(skb);
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&uh->check, skb,
- *addr, new_addr, 1);
+ addr, new_addr, 1);
if (!uh->check)
uh->check = CSUM_MANGLED_0;
}
}
}
+}
+static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
+ __be32 *addr, __be32 new_addr)
+{
+ update_ip_l4_checksum(skb, nh, *addr, new_addr);
csum_replace4(&nh->check, *addr, new_addr);
skb_clear_hash(skb);
*addr = new_addr;
@@ -608,17 +616,16 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
}
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key, const struct nlattr *attr)
+ struct sw_flow_key *key, const struct nlattr *attr,
+ const struct nlattr *actions, int actions_len)
{
struct ovs_tunnel_info info;
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
+ memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_ACTION;
- upcall.userdata = NULL;
- upcall.portid = 0;
- upcall.egress_tun_info = NULL;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
@@ -647,6 +654,13 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
break;
}
+ case OVS_USERSPACE_ATTR_ACTIONS: {
+ /* Include actions. */
+ upcall.actions = actions;
+ upcall.actions_len = actions_len;
+ break;
+ }
+
} /* End of switch. */
}
@@ -654,7 +668,8 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
}
static int sample(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key, const struct nlattr *attr)
+ struct sw_flow_key *key, const struct nlattr *attr,
+ const struct nlattr *actions, int actions_len)
{
const struct nlattr *acts_list = NULL;
const struct nlattr *a;
@@ -688,7 +703,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
*/
if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
nla_is_last(a, rem)))
- return output_userspace(dp, skb, key, a);
+ return output_userspace(dp, skb, key, a, actions, actions_len);
skb = skb_clone(skb, GFP_ATOMIC);
if (!skb)
@@ -872,7 +887,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_ACTION_ATTR_USERSPACE:
- output_userspace(dp, skb, key, a);
+ output_userspace(dp, skb, key, a, attr, len);
break;
case OVS_ACTION_ATTR_HASH:
@@ -916,7 +931,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_ACTION_ATTR_SAMPLE:
- err = sample(dp, skb, key, a);
+ err = sample(dp, skb, key, a, attr, len);
break;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 096c6276e..ff8c4a4c1 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -272,10 +272,9 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
struct dp_upcall_info upcall;
int error;
+ memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
- upcall.userdata = NULL;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
- upcall.egress_tun_info = NULL;
error = ovs_dp_upcall(dp, skb, key, &upcall);
if (unlikely(error))
kfree_skb(skb);
@@ -397,6 +396,10 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
if (upcall_info->egress_tun_info)
size += nla_total_size(ovs_tun_key_attr_size());
+ /* OVS_PACKET_ATTR_ACTIONS */
+ if (upcall_info->actions_len)
+ size += nla_total_size(upcall_info->actions_len);
+
return size;
}
@@ -478,6 +481,17 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
nla_nest_end(user_skb, nla);
}
+ if (upcall_info->actions_len) {
+ nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
+ err = ovs_nla_put_actions(upcall_info->actions,
+ upcall_info->actions_len,
+ user_skb);
+ if (!err)
+ nla_nest_end(user_skb, nla);
+ else
+ nla_nest_cancel(user_skb, nla);
+ }
+
/* Only reserve room for attribute header, packet data is added
* in skb_zerocopy() */
if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
@@ -545,7 +559,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
/* Normally, setting the skb 'protocol' field would be handled by a
* call to eth_type_trans(), but it assumes there's a sending
* device, which we may not have. */
- if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
+ if (eth_proto_is_802_3(eth->h_proto))
packet->protocol = eth->h_proto;
else
packet->protocol = htons(ETH_P_802_2);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 4ec4a480b..cd691e935 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -116,6 +116,8 @@ struct ovs_skb_cb {
struct dp_upcall_info {
const struct ovs_tunnel_info *egress_tun_info;
const struct nlattr *userdata;
+ const struct nlattr *actions;
+ int actions_len;
u32 portid;
u8 cmd;
};
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2dacc7b5a..bc7b0aba9 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -332,7 +332,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
proto = *(__be16 *) skb->data;
__skb_pull(skb, sizeof(__be16));
- if (ntohs(proto) >= ETH_P_802_3_MIN)
+ if (eth_proto_is_802_3(proto))
return proto;
if (skb->len < sizeof(struct llc_snap_hdr))
@@ -349,7 +349,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
__skb_pull(skb, sizeof(struct llc_snap_hdr));
- if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
+ if (eth_proto_is_802_3(llc->ethertype))
return llc->ethertype;
return htons(ETH_P_802_2);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c691b1a1e..624e41c42 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -816,7 +816,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
if (is_mask) {
/* Always exact match EtherType. */
eth_type = htons(0xffff);
- } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
+ } else if (!eth_proto_is_802_3(eth_type)) {
OVS_NLERR(log, "EtherType %x is less than min %x",
ntohs(eth_type), ETH_P_802_3_MIN);
return -EINVAL;
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 4613df8c8..65523948f 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -752,7 +752,7 @@ int ovs_flow_init(void)
BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
- + (num_possible_nodes()
+ + (nr_node_ids
* sizeof(struct flow_stats *)),
0, 0, NULL);
if (flow_cache == NULL)
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index bf02fd580..208c576bd 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -46,11 +46,6 @@ static inline struct geneve_port *geneve_vport(const struct vport *vport)
return vport_priv(vport);
}
-static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
-{
- return (struct genevehdr *)(udp_hdr(skb) + 1);
-}
-
/* Convert 64 bit tunnel ID to 24 bit VNI. */
static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
{
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index fe1610dde..ed458b315 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -543,15 +543,11 @@ static void prb_init_blk_timer(struct packet_sock *po,
pkc->retire_blk_timer.expires = jiffies;
}
-static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring)
+static void prb_setup_retire_blk_timer(struct packet_sock *po)
{
struct tpacket_kbdq_core *pkc;
- if (tx_ring)
- BUG();
-
- pkc = tx_ring ? GET_PBDQC_FROM_RB(&po->tx_ring) :
- GET_PBDQC_FROM_RB(&po->rx_ring);
+ pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
}
@@ -607,7 +603,7 @@ static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
static void init_prb_bdqc(struct packet_sock *po,
struct packet_ring_buffer *rb,
struct pgv *pg_vec,
- union tpacket_req_u *req_u, int tx_ring)
+ union tpacket_req_u *req_u)
{
struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
struct tpacket_block_desc *pbd;
@@ -634,7 +630,7 @@ static void init_prb_bdqc(struct packet_sock *po,
p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
prb_init_ft_ops(p1, req_u);
- prb_setup_retire_blk_timer(po, tx_ring);
+ prb_setup_retire_blk_timer(po);
prb_open_block(p1, pbd);
}
@@ -1234,27 +1230,81 @@ static void packet_free_pending(struct packet_sock *po)
free_percpu(po->tx_ring.pending_refcnt);
}
-static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+#define ROOM_POW_OFF 2
+#define ROOM_NONE 0x0
+#define ROOM_LOW 0x1
+#define ROOM_NORMAL 0x2
+
+static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
+{
+ int idx, len;
+
+ len = po->rx_ring.frame_max + 1;
+ idx = po->rx_ring.head;
+ if (pow_off)
+ idx += len >> pow_off;
+ if (idx >= len)
+ idx -= len;
+ return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
+}
+
+static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
+{
+ int idx, len;
+
+ len = po->rx_ring.prb_bdqc.knum_blocks;
+ idx = po->rx_ring.prb_bdqc.kactive_blk_num;
+ if (pow_off)
+ idx += len >> pow_off;
+ if (idx >= len)
+ idx -= len;
+ return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
+}
+
+static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
{
struct sock *sk = &po->sk;
- bool has_room;
+ int ret = ROOM_NONE;
+
+ if (po->prot_hook.func != tpacket_rcv) {
+ int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)
+ - (skb ? skb->truesize : 0);
+ if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF))
+ return ROOM_NORMAL;
+ else if (avail > 0)
+ return ROOM_LOW;
+ else
+ return ROOM_NONE;
+ }
- if (po->prot_hook.func != tpacket_rcv)
- return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize)
- <= sk->sk_rcvbuf;
+ if (po->tp_version == TPACKET_V3) {
+ if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
+ ret = ROOM_NORMAL;
+ else if (__tpacket_v3_has_room(po, 0))
+ ret = ROOM_LOW;
+ } else {
+ if (__tpacket_has_room(po, ROOM_POW_OFF))
+ ret = ROOM_NORMAL;
+ else if (__tpacket_has_room(po, 0))
+ ret = ROOM_LOW;
+ }
- spin_lock(&sk->sk_receive_queue.lock);
- if (po->tp_version == TPACKET_V3)
- has_room = prb_lookup_block(po, &po->rx_ring,
- po->rx_ring.prb_bdqc.kactive_blk_num,
- TP_STATUS_KERNEL);
- else
- has_room = packet_lookup_frame(po, &po->rx_ring,
- po->rx_ring.head,
- TP_STATUS_KERNEL);
- spin_unlock(&sk->sk_receive_queue.lock);
+ return ret;
+}
+
+static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+{
+ int ret;
+ bool has_room;
+
+ spin_lock_bh(&po->sk.sk_receive_queue.lock);
+ ret = __packet_rcv_has_room(po, skb);
+ has_room = ret == ROOM_NORMAL;
+ if (po->pressure == has_room)
+ po->pressure = !has_room;
+ spin_unlock_bh(&po->sk.sk_receive_queue.lock);
- return has_room;
+ return ret;
}
static void packet_sock_destruct(struct sock *sk)
@@ -1272,6 +1322,20 @@ static void packet_sock_destruct(struct sock *sk)
sk_refcnt_debug_dec(sk);
}
+static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
+{
+ u32 rxhash;
+ int i, count = 0;
+
+ rxhash = skb_get_hash(skb);
+ for (i = 0; i < ROLLOVER_HLEN; i++)
+ if (po->rollover->history[i] == rxhash)
+ count++;
+
+ po->rollover->history[prandom_u32() % ROLLOVER_HLEN] = rxhash;
+ return count > (ROLLOVER_HLEN >> 1);
+}
+
static unsigned int fanout_demux_hash(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
@@ -1304,22 +1368,40 @@ static unsigned int fanout_demux_rnd(struct packet_fanout *f,
static unsigned int fanout_demux_rollover(struct packet_fanout *f,
struct sk_buff *skb,
- unsigned int idx, unsigned int skip,
+ unsigned int idx, bool try_self,
unsigned int num)
{
- unsigned int i, j;
+ struct packet_sock *po, *po_next, *po_skip = NULL;
+ unsigned int i, j, room = ROOM_NONE;
+
+ po = pkt_sk(f->arr[idx]);
- i = j = min_t(int, f->next[idx], num - 1);
+ if (try_self) {
+ room = packet_rcv_has_room(po, skb);
+ if (room == ROOM_NORMAL ||
+ (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
+ return idx;
+ po_skip = po;
+ }
+
+ i = j = min_t(int, po->rollover->sock, num - 1);
do {
- if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) {
+ po_next = pkt_sk(f->arr[i]);
+ if (po_next != po_skip && !po_next->pressure &&
+ packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
if (i != j)
- f->next[idx] = i;
+ po->rollover->sock = i;
+ atomic_long_inc(&po->rollover->num);
+ if (room == ROOM_LOW)
+ atomic_long_inc(&po->rollover->num_huge);
return i;
}
+
if (++i == num)
i = 0;
} while (i != j);
+ atomic_long_inc(&po->rollover->num_failed);
return idx;
}
@@ -1372,17 +1454,14 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
idx = fanout_demux_qm(f, skb, num);
break;
case PACKET_FANOUT_ROLLOVER:
- idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
+ idx = fanout_demux_rollover(f, skb, 0, false, num);
break;
}
- po = pkt_sk(f->arr[idx]);
- if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) &&
- unlikely(!packet_rcv_has_room(po, skb))) {
- idx = fanout_demux_rollover(f, skb, idx, idx, num);
- po = pkt_sk(f->arr[idx]);
- }
+ if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
+ idx = fanout_demux_rollover(f, skb, idx, true, num);
+ po = pkt_sk(f->arr[idx]);
return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
}
@@ -1453,6 +1532,16 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
if (po->fanout)
return -EALREADY;
+ if (type == PACKET_FANOUT_ROLLOVER ||
+ (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
+ po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL);
+ if (!po->rollover)
+ return -ENOMEM;
+ atomic_long_set(&po->rollover->num, 0);
+ atomic_long_set(&po->rollover->num_huge, 0);
+ atomic_long_set(&po->rollover->num_failed, 0);
+ }
+
mutex_lock(&fanout_mutex);
match = NULL;
list_for_each_entry(f, &fanout_list, list) {
@@ -1501,6 +1590,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
}
out:
mutex_unlock(&fanout_mutex);
+ if (err) {
+ kfree(po->rollover);
+ po->rollover = NULL;
+ }
return err;
}
@@ -1522,6 +1615,9 @@ static void fanout_release(struct sock *sk)
kfree(f);
}
mutex_unlock(&fanout_mutex);
+
+ if (po->rollover)
+ kfree_rcu(po->rollover, rcu);
}
static const struct proto_ops packet_ops;
@@ -2307,7 +2403,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
}
tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
addr, hlen);
- if (tp_len > dev->mtu + dev->hard_header_len) {
+ if (likely(tp_len >= 0) &&
+ tp_len > dev->mtu + dev->hard_header_len) {
struct ethhdr *ehdr;
/* Earlier code assumed this would be a VLAN pkt,
* double-check this now that we have the actual
@@ -2688,7 +2785,7 @@ static int packet_release(struct socket *sock)
static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
{
struct packet_sock *po = pkt_sk(sk);
- const struct net_device *dev_curr;
+ struct net_device *dev_curr;
__be16 proto_curr;
bool need_rehook;
@@ -2712,15 +2809,13 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
po->num = proto;
po->prot_hook.type = proto;
-
- if (po->prot_hook.dev)
- dev_put(po->prot_hook.dev);
-
po->prot_hook.dev = dev;
po->ifindex = dev ? dev->ifindex : 0;
packet_cached_dev_assign(po, dev);
}
+ if (dev_curr)
+ dev_put(dev_curr);
if (proto == 0 || !need_rehook)
goto out_unlock;
@@ -2821,7 +2916,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
sock->state = SS_UNCONNECTED;
err = -ENOBUFS;
- sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
+ sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
if (sk == NULL)
goto out;
@@ -2851,6 +2946,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
spin_lock_init(&po->bind_lock);
mutex_init(&po->pg_vec_lock);
+ po->rollover = NULL;
po->prot_hook.func = packet_rcv;
if (sock->type == SOCK_PACKET)
@@ -2928,6 +3024,9 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (skb == NULL)
goto out;
+ if (pkt_sk(sk)->pressure)
+ packet_rcv_has_room(pkt_sk(sk), NULL);
+
if (pkt_sk(sk)->has_vnet_hdr) {
struct virtio_net_hdr vnet_hdr = { 0 };
@@ -3471,6 +3570,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
struct packet_sock *po = pkt_sk(sk);
void *data = &val;
union tpacket_stats_u st;
+ struct tpacket_rollover_stats rstats;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3546,6 +3646,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
((u32)po->fanout->flags << 24)) :
0);
break;
+ case PACKET_ROLLOVER_STATS:
+ if (!po->rollover)
+ return -EINVAL;
+ rstats.tp_all = atomic_long_read(&po->rollover->num);
+ rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
+ break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
break;
@@ -3683,6 +3792,8 @@ static unsigned int packet_poll(struct file *file, struct socket *sock,
TP_STATUS_KERNEL))
mask |= POLLIN | POLLRDNORM;
}
+ if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
+ po->pressure = 0;
spin_unlock_bh(&sk->sk_receive_queue.lock);
spin_lock_bh(&sk->sk_write_queue.lock);
if (po->tx_ring.pg_vec) {
@@ -3872,7 +3983,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
* it above but just being paranoid
*/
if (!tx_ring)
- init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
+ init_prb_bdqc(po, rb, pg_vec, req_u);
break;
default:
break;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index fe6e20cae..e20b3e882 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -82,12 +82,21 @@ struct packet_fanout {
atomic_t rr_cur;
struct list_head list;
struct sock *arr[PACKET_FANOUT_MAX];
- int next[PACKET_FANOUT_MAX];
spinlock_t lock;
atomic_t sk_ref;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
};
+struct packet_rollover {
+ int sock;
+ struct rcu_head rcu;
+ atomic_long_t num;
+ atomic_long_t num_huge;
+ atomic_long_t num_failed;
+#define ROLLOVER_HLEN (L1_CACHE_BYTES / sizeof(u32))
+ u32 history[ROLLOVER_HLEN] ____cacheline_aligned;
+} ____cacheline_aligned_in_smp;
+
struct packet_sock {
/* struct sock has to be the first member of packet_sock */
struct sock sk;
@@ -102,8 +111,10 @@ struct packet_sock {
auxdata:1,
origdev:1,
has_vnet_hdr:1;
+ int pressure;
int ifindex; /* bound device */
__be16 num;
+ struct packet_rollover *rollover;
struct packet_mclist *mclist;
atomic_t mapped;
enum tpacket_versions tp_version;
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 32ab87d34..10d42f322 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -97,7 +97,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
goto out;
}
- sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot);
+ sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot, kern);
if (sk == NULL) {
err = -ENOMEM;
goto out;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 6de2aeb98..850a86cde 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -845,7 +845,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
}
/* Create a new to-be-accepted sock */
- newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot);
+ newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0);
if (!newsk) {
pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
err = -ENOBUFS;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 10443377f..896834cd3 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -40,15 +40,6 @@
#include "rds.h"
-char *rds_str_array(char **array, size_t elements, size_t index)
-{
- if ((index < elements) && array[index])
- return array[index];
- else
- return "unknown";
-}
-EXPORT_SYMBOL(rds_str_array);
-
/* this is just used for stats gathering :/ */
static DEFINE_SPINLOCK(rds_sock_lock);
static unsigned long rds_sock_count;
@@ -270,6 +261,28 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
return ret;
}
+static int rds_set_transport(struct rds_sock *rs, char __user *optval,
+ int optlen)
+{
+ int t_type;
+
+ if (rs->rs_transport)
+ return -EOPNOTSUPP; /* previously attached to transport */
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+
+ if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type)))
+ return -EFAULT;
+
+ if (t_type < 0 || t_type >= RDS_TRANS_COUNT)
+ return -EINVAL;
+
+ rs->rs_transport = rds_trans_get(t_type);
+
+ return rs->rs_transport ? 0 : -ENOPROTOOPT;
+}
+
static int rds_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
@@ -300,6 +313,11 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
case RDS_CONG_MONITOR:
ret = rds_cong_monitor(rs, optval, optlen);
break;
+ case SO_RDS_TRANSPORT:
+ lock_sock(sock->sk);
+ ret = rds_set_transport(rs, optval, optlen);
+ release_sock(sock->sk);
+ break;
default:
ret = -ENOPROTOOPT;
}
@@ -312,6 +330,7 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
{
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
int ret = -ENOPROTOOPT, len;
+ int trans;
if (level != SOL_RDS)
goto out;
@@ -337,6 +356,19 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
else
ret = 0;
break;
+ case SO_RDS_TRANSPORT:
+ if (len < sizeof(int)) {
+ ret = -EINVAL;
+ break;
+ }
+ trans = (rs->rs_transport ? rs->rs_transport->t_type :
+ RDS_TRANS_NONE); /* unbound */
+ if (put_user(trans, (int __user *)optval) ||
+ put_user(sizeof(int), optlen))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ break;
default:
break;
}
@@ -440,7 +472,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_SEQPACKET || protocol)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto);
+ sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/rds/bind.c b/net/rds/bind.c
index a2e6562da..4ebd29c12 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -181,6 +181,10 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (ret)
goto out;
+ if (rs->rs_transport) { /* previously bound */
+ ret = 0;
+ goto out;
+ }
trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
if (!trans) {
ret = -EADDRNOTAVAIL;
diff --git a/net/rds/ib.h b/net/rds/ib.h
index c36d71322..86d88ec5d 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -235,28 +235,34 @@ extern struct workqueue_struct *rds_ib_wq;
* doesn't define it.
*/
static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev,
- struct scatterlist *sg, unsigned int sg_dma_len, int direction)
+ struct scatterlist *sglist,
+ unsigned int sg_dma_len,
+ int direction)
{
+ struct scatterlist *sg;
unsigned int i;
- for (i = 0; i < sg_dma_len; ++i) {
+ for_each_sg(sglist, sg, sg_dma_len, i) {
ib_dma_sync_single_for_cpu(dev,
- ib_sg_dma_address(dev, &sg[i]),
- ib_sg_dma_len(dev, &sg[i]),
+ ib_sg_dma_address(dev, sg),
+ ib_sg_dma_len(dev, sg),
direction);
}
}
#define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu
static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
- struct scatterlist *sg, unsigned int sg_dma_len, int direction)
+ struct scatterlist *sglist,
+ unsigned int sg_dma_len,
+ int direction)
{
+ struct scatterlist *sg;
unsigned int i;
- for (i = 0; i < sg_dma_len; ++i) {
+ for_each_sg(sglist, sg, sg_dma_len, i) {
ib_dma_sync_single_for_device(dev,
- ib_sg_dma_address(dev, &sg[i]),
- ib_sg_dma_len(dev, &sg[i]),
+ ib_sg_dma_address(dev, sg),
+ ib_sg_dma_len(dev, sg),
direction);
}
}
@@ -339,7 +345,6 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
extern wait_queue_head_t rds_ib_ring_empty_wait;
/* ib_send.c */
-char *rds_ib_wc_status_str(enum ib_wc_status status);
void rds_ib_xmit_complete(struct rds_connection *conn);
int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
unsigned int hdr_off, unsigned int sg, unsigned int off);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 8a09ee7db..0da2a45b3 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -39,36 +39,6 @@
#include "rds.h"
#include "ib.h"
-static char *rds_ib_event_type_strings[] = {
-#define RDS_IB_EVENT_STRING(foo) \
- [IB_EVENT_##foo] = __stringify(IB_EVENT_##foo)
- RDS_IB_EVENT_STRING(CQ_ERR),
- RDS_IB_EVENT_STRING(QP_FATAL),
- RDS_IB_EVENT_STRING(QP_REQ_ERR),
- RDS_IB_EVENT_STRING(QP_ACCESS_ERR),
- RDS_IB_EVENT_STRING(COMM_EST),
- RDS_IB_EVENT_STRING(SQ_DRAINED),
- RDS_IB_EVENT_STRING(PATH_MIG),
- RDS_IB_EVENT_STRING(PATH_MIG_ERR),
- RDS_IB_EVENT_STRING(DEVICE_FATAL),
- RDS_IB_EVENT_STRING(PORT_ACTIVE),
- RDS_IB_EVENT_STRING(PORT_ERR),
- RDS_IB_EVENT_STRING(LID_CHANGE),
- RDS_IB_EVENT_STRING(PKEY_CHANGE),
- RDS_IB_EVENT_STRING(SM_CHANGE),
- RDS_IB_EVENT_STRING(SRQ_ERR),
- RDS_IB_EVENT_STRING(SRQ_LIMIT_REACHED),
- RDS_IB_EVENT_STRING(QP_LAST_WQE_REACHED),
- RDS_IB_EVENT_STRING(CLIENT_REREGISTER),
-#undef RDS_IB_EVENT_STRING
-};
-
-static char *rds_ib_event_str(enum ib_event_type type)
-{
- return rds_str_array(rds_ib_event_type_strings,
- ARRAY_SIZE(rds_ib_event_type_strings), type);
-};
-
/*
* Set the selected protocol version
*/
@@ -243,7 +213,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
{
rdsdebug("event %u (%s) data %p\n",
- event->event, rds_ib_event_str(event->event), data);
+ event->event, ib_event_msg(event->event), data);
}
static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
@@ -252,7 +222,7 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
struct rds_ib_connection *ic = conn->c_transport_data;
rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event,
- rds_ib_event_str(event->event));
+ ib_event_msg(event->event));
switch (event->event) {
case IB_EVENT_COMM_EST:
@@ -261,7 +231,7 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
default:
rdsdebug("Fatal QP Event %u (%s) "
"- connection %pI4->%pI4, reconnecting\n",
- event->event, rds_ib_event_str(event->event),
+ event->event, ib_event_msg(event->event),
&conn->c_laddr, &conn->c_faddr);
rds_conn_drop(conn);
break;
@@ -277,6 +247,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
struct rds_ib_connection *ic = conn->c_transport_data;
struct ib_device *dev = ic->i_cm_id->device;
struct ib_qp_init_attr attr;
+ struct ib_cq_init_attr cq_attr = {};
struct rds_ib_device *rds_ibdev;
int ret;
@@ -300,9 +271,10 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
ic->i_pd = rds_ibdev->pd;
ic->i_mr = rds_ibdev->mr;
+ cq_attr.cqe = ic->i_send_ring.w_nr + 1;
ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler,
rds_ib_cq_event_handler, conn,
- ic->i_send_ring.w_nr + 1, 0);
+ &cq_attr);
if (IS_ERR(ic->i_send_cq)) {
ret = PTR_ERR(ic->i_send_cq);
ic->i_send_cq = NULL;
@@ -310,9 +282,10 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
goto out;
}
+ cq_attr.cqe = ic->i_recv_ring.w_nr;
ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler,
rds_ib_cq_event_handler, conn,
- ic->i_recv_ring.w_nr, 0);
+ &cq_attr);
if (IS_ERR(ic->i_recv_cq)) {
ret = PTR_ERR(ic->i_recv_cq);
ic->i_recv_cq = NULL;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 1b981a4e4..cac5b4506 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -956,7 +956,7 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic,
while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
(unsigned long long)wc.wr_id, wc.status,
- rds_ib_wc_status_str(wc.status), wc.byte_len,
+ ib_wc_status_msg(wc.status), wc.byte_len,
be32_to_cpu(wc.ex.imm_data));
rds_ib_stats_inc(s_ib_rx_cq_event);
@@ -978,7 +978,7 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic,
"status %u (%s), disconnecting and "
"reconnecting\n", &conn->c_faddr,
wc.status,
- rds_ib_wc_status_str(wc.status));
+ ib_wc_status_msg(wc.status));
}
/*
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index bd3825d38..5d0a704fa 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -39,40 +39,6 @@
#include "rds.h"
#include "ib.h"
-static char *rds_ib_wc_status_strings[] = {
-#define RDS_IB_WC_STATUS_STR(foo) \
- [IB_WC_##foo] = __stringify(IB_WC_##foo)
- RDS_IB_WC_STATUS_STR(SUCCESS),
- RDS_IB_WC_STATUS_STR(LOC_LEN_ERR),
- RDS_IB_WC_STATUS_STR(LOC_QP_OP_ERR),
- RDS_IB_WC_STATUS_STR(LOC_EEC_OP_ERR),
- RDS_IB_WC_STATUS_STR(LOC_PROT_ERR),
- RDS_IB_WC_STATUS_STR(WR_FLUSH_ERR),
- RDS_IB_WC_STATUS_STR(MW_BIND_ERR),
- RDS_IB_WC_STATUS_STR(BAD_RESP_ERR),
- RDS_IB_WC_STATUS_STR(LOC_ACCESS_ERR),
- RDS_IB_WC_STATUS_STR(REM_INV_REQ_ERR),
- RDS_IB_WC_STATUS_STR(REM_ACCESS_ERR),
- RDS_IB_WC_STATUS_STR(REM_OP_ERR),
- RDS_IB_WC_STATUS_STR(RETRY_EXC_ERR),
- RDS_IB_WC_STATUS_STR(RNR_RETRY_EXC_ERR),
- RDS_IB_WC_STATUS_STR(LOC_RDD_VIOL_ERR),
- RDS_IB_WC_STATUS_STR(REM_INV_RD_REQ_ERR),
- RDS_IB_WC_STATUS_STR(REM_ABORT_ERR),
- RDS_IB_WC_STATUS_STR(INV_EECN_ERR),
- RDS_IB_WC_STATUS_STR(INV_EEC_STATE_ERR),
- RDS_IB_WC_STATUS_STR(FATAL_ERR),
- RDS_IB_WC_STATUS_STR(RESP_TIMEOUT_ERR),
- RDS_IB_WC_STATUS_STR(GENERAL_ERR),
-#undef RDS_IB_WC_STATUS_STR
-};
-
-char *rds_ib_wc_status_str(enum ib_wc_status status)
-{
- return rds_str_array(rds_ib_wc_status_strings,
- ARRAY_SIZE(rds_ib_wc_status_strings), status);
-}
-
/*
* Convert IB-specific error message to RDS error message and call core
* completion handler.
@@ -293,7 +259,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
while (ib_poll_cq(cq, 1, &wc) > 0) {
rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
(unsigned long long)wc.wr_id, wc.status,
- rds_ib_wc_status_str(wc.status), wc.byte_len,
+ ib_wc_status_msg(wc.status), wc.byte_len,
be32_to_cpu(wc.ex.imm_data));
rds_ib_stats_inc(s_ib_tx_cq_event);
@@ -344,7 +310,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
rds_ib_conn_error(conn, "send completion on %pI4 had status "
"%u (%s), disconnecting and reconnecting\n",
&conn->c_faddr, wc.status,
- rds_ib_wc_status_str(wc.status));
+ ib_wc_status_msg(wc.status));
}
}
}
@@ -605,6 +571,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
}
rds_message_addref(rm);
+ rm->data.op_dmasg = 0;
+ rm->data.op_dmaoff = 0;
ic->i_data_op = &rm->data;
/* Finalize the header */
@@ -658,7 +626,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
send = &ic->i_sends[pos];
first = send;
prev = NULL;
- scat = &ic->i_data_op->op_sg[sg];
+ scat = &ic->i_data_op->op_sg[rm->data.op_dmasg];
i = 0;
do {
unsigned int len = 0;
@@ -680,17 +648,20 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
/* Set up the data, if present */
if (i < work_alloc
&& scat != &rm->data.op_sg[rm->data.op_count]) {
- len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
+ len = min(RDS_FRAG_SIZE,
+ ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
send->s_wr.num_sge = 2;
- send->s_sge[1].addr = ib_sg_dma_address(dev, scat) + off;
+ send->s_sge[1].addr = ib_sg_dma_address(dev, scat);
+ send->s_sge[1].addr += rm->data.op_dmaoff;
send->s_sge[1].length = len;
bytes_sent += len;
- off += len;
- if (off == ib_sg_dma_len(dev, scat)) {
+ rm->data.op_dmaoff += len;
+ if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
scat++;
- off = 0;
+ rm->data.op_dmasg++;
+ rm->data.op_dmaoff = 0;
}
}
diff --git a/net/rds/info.c b/net/rds/info.c
index 9a6b4f661..140a44a5f 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -176,7 +176,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
/* check for all kinds of wrapping and the like */
start = (unsigned long)optval;
- if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) {
+ if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) {
ret = -EINVAL;
goto out;
}
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a6c2bea9f..8f486fa32 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -179,6 +179,7 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
void *context)
{
struct ib_device *dev = rds_iwdev->dev;
+ struct ib_cq_init_attr cq_attr = {};
unsigned int send_size, recv_size;
int ret;
@@ -198,9 +199,10 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
attr->sq_sig_type = IB_SIGNAL_REQ_WR;
attr->qp_type = IB_QPT_RC;
+ cq_attr.cqe = send_size;
attr->send_cq = ib_create_cq(dev, send_cq_handler,
rds_iw_cq_event_handler,
- context, send_size, 0);
+ context, &cq_attr);
if (IS_ERR(attr->send_cq)) {
ret = PTR_ERR(attr->send_cq);
attr->send_cq = NULL;
@@ -208,9 +210,10 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
goto out;
}
+ cq_attr.cqe = recv_size;
attr->recv_cq = ib_create_cq(dev, recv_cq_handler,
rds_iw_cq_event_handler,
- context, recv_size, 0);
+ context, &cq_attr);
if (IS_ERR(attr->recv_cq)) {
ret = PTR_ERR(attr->recv_cq);
attr->recv_cq = NULL;
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 13834780a..334fe98c5 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -581,6 +581,8 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
rds_message_addref(rm);
+ rm->data.op_dmasg = 0;
+ rm->data.op_dmaoff = 0;
ic->i_rm = rm;
/* Finalize the header */
@@ -622,7 +624,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
send = &ic->i_sends[pos];
first = send;
prev = NULL;
- scat = &rm->data.op_sg[sg];
+ scat = &rm->data.op_sg[rm->data.op_dmasg];
sent = 0;
i = 0;
@@ -656,10 +658,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
send = &ic->i_sends[pos];
- len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
+ len = min(RDS_FRAG_SIZE,
+ ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
rds_iw_xmit_populate_wr(ic, send, pos,
- ib_sg_dma_address(dev, scat) + off, len,
- send_flags);
+ ib_sg_dma_address(dev, scat) + rm->data.op_dmaoff, len,
+ send_flags);
/*
* We want to delay signaling completions just enough to get
@@ -687,10 +690,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
&send->s_wr, send->s_wr.num_sge, send->s_wr.next);
sent += len;
- off += len;
- if (off == ib_sg_dma_len(dev, scat)) {
+ rm->data.op_dmaoff += len;
+ if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
scat++;
- off = 0;
+ rm->data.op_dmaoff = 0;
+ rm->data.op_dmasg++;
}
add_header:
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 6cd9d1dea..208240836 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -37,34 +37,6 @@
static struct rdma_cm_id *rds_rdma_listen_id;
-static char *rds_cm_event_strings[] = {
-#define RDS_CM_EVENT_STRING(foo) \
- [RDMA_CM_EVENT_##foo] = __stringify(RDMA_CM_EVENT_##foo)
- RDS_CM_EVENT_STRING(ADDR_RESOLVED),
- RDS_CM_EVENT_STRING(ADDR_ERROR),
- RDS_CM_EVENT_STRING(ROUTE_RESOLVED),
- RDS_CM_EVENT_STRING(ROUTE_ERROR),
- RDS_CM_EVENT_STRING(CONNECT_REQUEST),
- RDS_CM_EVENT_STRING(CONNECT_RESPONSE),
- RDS_CM_EVENT_STRING(CONNECT_ERROR),
- RDS_CM_EVENT_STRING(UNREACHABLE),
- RDS_CM_EVENT_STRING(REJECTED),
- RDS_CM_EVENT_STRING(ESTABLISHED),
- RDS_CM_EVENT_STRING(DISCONNECTED),
- RDS_CM_EVENT_STRING(DEVICE_REMOVAL),
- RDS_CM_EVENT_STRING(MULTICAST_JOIN),
- RDS_CM_EVENT_STRING(MULTICAST_ERROR),
- RDS_CM_EVENT_STRING(ADDR_CHANGE),
- RDS_CM_EVENT_STRING(TIMEWAIT_EXIT),
-#undef RDS_CM_EVENT_STRING
-};
-
-static char *rds_cm_event_str(enum rdma_cm_event_type type)
-{
- return rds_str_array(rds_cm_event_strings,
- ARRAY_SIZE(rds_cm_event_strings), type);
-};
-
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@@ -74,7 +46,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
int ret = 0;
rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
- event->event, rds_cm_event_str(event->event));
+ event->event, rdma_event_msg(event->event));
if (cm_id->device->node_type == RDMA_NODE_RNIC)
trans = &rds_iw_transport;
@@ -139,7 +111,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
default:
/* things like device disconnect? */
printk(KERN_ERR "RDS: unknown event %u (%s)!\n",
- event->event, rds_cm_event_str(event->event));
+ event->event, rdma_event_msg(event->event));
break;
}
@@ -148,7 +120,7 @@ out:
mutex_unlock(&conn->c_cm_lock);
rdsdebug("id %p event %u (%s) handling ret %d\n", cm_id, event->event,
- rds_cm_event_str(event->event), ret);
+ rdma_event_msg(event->event), ret);
return ret;
}
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 0d41155a2..2260c1e43 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -363,6 +363,8 @@ struct rds_message {
unsigned int op_active:1;
unsigned int op_nents;
unsigned int op_count;
+ unsigned int op_dmasg;
+ unsigned int op_dmaoff;
struct scatterlist *op_sg;
} data;
};
@@ -408,11 +410,6 @@ struct rds_notifier {
* should try hard not to block.
*/
-#define RDS_TRANS_IB 0
-#define RDS_TRANS_IWARP 1
-#define RDS_TRANS_TCP 2
-#define RDS_TRANS_COUNT 3
-
struct rds_transport {
char t_name[TRANSNAMSIZ];
struct list_head t_item;
@@ -575,7 +572,6 @@ struct rds_statistics {
};
/* af_rds.c */
-char *rds_str_array(char **array, size_t elements, size_t index);
void rds_sock_addref(struct rds_sock *rs);
void rds_sock_put(struct rds_sock *rs);
void rds_wake_sk_sleep(struct rds_sock *rs);
@@ -803,6 +799,7 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr);
void rds_trans_put(struct rds_transport *trans);
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);
+struct rds_transport *rds_trans_get(int t_type);
int rds_trans_init(void);
void rds_trans_exit(void);
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7f2ac4fec..83498e1c7 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL_GPL(rds_trans_unregister);
void rds_trans_put(struct rds_transport *trans)
{
- if (trans && trans->t_owner)
+ if (trans)
module_put(trans->t_owner);
}
@@ -101,6 +101,27 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr)
return ret;
}
+struct rds_transport *rds_trans_get(int t_type)
+{
+ struct rds_transport *ret = NULL;
+ struct rds_transport *trans;
+ unsigned int i;
+
+ down_read(&rds_trans_sem);
+ for (i = 0; i < RDS_TRANS_COUNT; i++) {
+ trans = transports[i];
+
+ if (trans && trans->t_type == t_type &&
+ (!trans->t_owner || try_module_get(trans->t_owner))) {
+ ret = trans;
+ break;
+ }
+ }
+ up_read(&rds_trans_sem);
+
+ return ret;
+}
+
/*
* This returns the number of stats entries in the snapshot and only
* copies them using the iter if there is enough space for them. The
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index fa7cd7927..f12149a29 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -794,7 +794,8 @@ void rfkill_resume_polling(struct rfkill *rfkill)
}
EXPORT_SYMBOL(rfkill_resume_polling);
-static int rfkill_suspend(struct device *dev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int rfkill_suspend(struct device *dev)
{
struct rfkill *rfkill = to_rfkill(dev);
@@ -818,13 +819,18 @@ static int rfkill_resume(struct device *dev)
return 0;
}
+static SIMPLE_DEV_PM_OPS(rfkill_pm_ops, rfkill_suspend, rfkill_resume);
+#define RFKILL_PM_OPS (&rfkill_pm_ops)
+#else
+#define RFKILL_PM_OPS NULL
+#endif
+
static struct class rfkill_class = {
.name = "rfkill",
.dev_release = rfkill_release,
.dev_groups = rfkill_dev_groups,
.dev_uevent = rfkill_dev_uevent,
- .suspend = rfkill_suspend,
- .resume = rfkill_resume,
+ .pm = RFKILL_PM_OPS,
};
bool rfkill_blocked(struct rfkill *rfkill)
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index d978f2f46..d5d58d919 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -112,21 +112,17 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
rfkill->clk = devm_clk_get(&pdev->dev, NULL);
- gpio = devm_gpiod_get(&pdev->dev, "reset");
- if (!IS_ERR(gpio)) {
- ret = gpiod_direction_output(gpio, 0);
- if (ret)
- return ret;
- rfkill->reset_gpio = gpio;
- }
+ gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(gpio))
+ return PTR_ERR(gpio);
- gpio = devm_gpiod_get(&pdev->dev, "shutdown");
- if (!IS_ERR(gpio)) {
- ret = gpiod_direction_output(gpio, 0);
- if (ret)
- return ret;
- rfkill->shutdown_gpio = gpio;
- }
+ rfkill->reset_gpio = gpio;
+
+ gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_OUT_LOW);
+ if (IS_ERR(gpio))
+ return PTR_ERR(gpio);
+
+ rfkill->shutdown_gpio = gpio;
/* Make sure at-least one of the GPIO is defined and that
* a name is specified for this instance
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 8ae603069..129d357d2 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -192,7 +192,8 @@ static void rose_kill_by_device(struct net_device *dev)
if (rose->device == dev) {
rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
- rose->neighbour->use--;
+ if (rose->neighbour)
+ rose->neighbour->use--;
rose->device = NULL;
}
}
@@ -520,7 +521,7 @@ static int rose_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_SEQPACKET || protocol != 0)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto);
+ sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, kern);
if (sk == NULL)
return -ENOMEM;
@@ -559,7 +560,7 @@ static struct sock *rose_make_new(struct sock *osk)
if (osk->sk_type != SOCK_SEQPACKET)
return NULL;
- sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto);
+ sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto, 0);
if (sk == NULL)
return NULL;
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index e873d7d9f..c76638cc2 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -25,7 +25,6 @@
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
-#include <linux/netfilter.h>
#include <net/rose.h>
static void rose_ftimer_expiry(unsigned long);
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 40148932c..0fc76d845 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -31,7 +31,6 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
-#include <linux/netfilter.h>
#include <linux/init.h>
#include <net/rose.h>
#include <linux/seq_file.h>
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0095b9a0b..25d60ed15 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -632,7 +632,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &rxrpc_rpc_ops;
sock->state = SS_UNCONNECTED;
- sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto);
+ sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
index ca904ed54..78483b460 100644
--- a/net/rxrpc/ar-local.c
+++ b/net/rxrpc/ar-local.c
@@ -73,8 +73,8 @@ static int rxrpc_create_local(struct rxrpc_local *local)
_enter("%p{%d}", local, local->srx.transport_type);
/* create a socket to represent the local endpoint */
- ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP,
- &local->socket);
+ ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type,
+ IPPROTO_UDP, &local->socket);
if (ret < 0) {
_leave(" = %d [socket]", ret);
return ret;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2274e723a..daa33432b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -312,6 +312,7 @@ config NET_SCH_PIE
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
+ select NET_INGRESS
---help---
Say Y here if you want to use classifiers for incoming packets.
If unsure, say Y.
@@ -477,6 +478,16 @@ config NET_CLS_BPF
To compile this code as a module, choose M here: the module will
be called cls_bpf.
+config NET_CLS_FLOWER
+ tristate "Flower classifier"
+ select NET_CLS
+ ---help---
+ If you say Y here, you will be able to classify packets based on
+ a configurable combination of packet keys and masks.
+
+ To compile this code as a module, choose M here: the module will
+ be called cls_flower.
+
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7ca7f4c1b..690c1689e 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o
+obj-$(CONFIG_NET_CLS_FLOWER) += cls_flower.o
obj-$(CONFIG_NET_EMATCH) += ematch.o
obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3d43e4979..43ec92680 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -45,7 +45,7 @@ void tcf_hash_destroy(struct tc_action *a)
}
EXPORT_SYMBOL(tcf_hash_destroy);
-int tcf_hash_release(struct tc_action *a, int bind)
+int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
{
struct tcf_common *p = a->priv;
int ret = 0;
@@ -53,7 +53,7 @@ int tcf_hash_release(struct tc_action *a, int bind)
if (p) {
if (bind)
p->tcfc_bindcnt--;
- else if (p->tcfc_bindcnt > 0)
+ else if (strict && p->tcfc_bindcnt > 0)
return -EPERM;
p->tcfc_refcnt--;
@@ -64,9 +64,10 @@ int tcf_hash_release(struct tc_action *a, int bind)
ret = 1;
}
}
+
return ret;
}
-EXPORT_SYMBOL(tcf_hash_release);
+EXPORT_SYMBOL(__tcf_hash_release);
static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
struct tc_action *a)
@@ -136,7 +137,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
hlist_for_each_entry_safe(p, n, head, tcfc_head) {
a->priv = p;
- ret = tcf_hash_release(a, 0);
+ ret = __tcf_hash_release(a, false, true);
if (ret == ACT_P_DELETED) {
module_put(a->ops->owner);
n_i++;
@@ -392,11 +393,6 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
list_for_each_entry(a, actions, list) {
repeat:
ret = a->ops->act(skb, a, res);
- if (TC_MUNGED & skb->tc_verd) {
- /* copied already, allow trampling */
- skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
- skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
- }
if (ret == TC_ACT_REPEAT)
goto repeat; /* we need a ttl - JHS */
if (ret != TC_ACT_PIPE)
@@ -413,7 +409,7 @@ int tcf_action_destroy(struct list_head *actions, int bind)
int ret = 0;
list_for_each_entry_safe(a, tmp, actions, list) {
- ret = tcf_hash_release(a, bind);
+ ret = __tcf_hash_release(a, bind, true);
if (ret == ACT_P_DELETED)
module_put(a->ops->owner);
else if (ret < 0)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index dc6a2d324..d0edeb7a1 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -27,9 +27,10 @@
struct tcf_bpf_cfg {
struct bpf_prog *filter;
struct sock_filter *bpf_ops;
- char *bpf_name;
+ const char *bpf_name;
u32 bpf_fd;
u16 bpf_num_ops;
+ bool is_ebpf;
};
static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
@@ -37,6 +38,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
{
struct tcf_bpf *prog = act->priv;
int action, filter_res;
+ bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
if (unlikely(!skb_mac_header_was_set(skb)))
return TC_ACT_UNSPEC;
@@ -48,7 +50,13 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
/* Needed here for accessing maps. */
rcu_read_lock();
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ if (at_ingress) {
+ __skb_push(skb, skb->mac_len);
+ filter_res = BPF_PROG_RUN(prog->filter, skb);
+ __skb_pull(skb, skb->mac_len);
+ } else {
+ filter_res = BPF_PROG_RUN(prog->filter, skb);
+ }
rcu_read_unlock();
/* A BPF program may overwrite the default action opcode.
@@ -200,6 +208,7 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
cfg->bpf_ops = bpf_ops;
cfg->bpf_num_ops = bpf_num_ops;
cfg->filter = fp;
+ cfg->is_ebpf = false;
return 0;
}
@@ -234,18 +243,40 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
cfg->bpf_fd = bpf_fd;
cfg->bpf_name = name;
cfg->filter = fp;
+ cfg->is_ebpf = true;
return 0;
}
+static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg)
+{
+ if (cfg->is_ebpf)
+ bpf_prog_put(cfg->filter);
+ else
+ bpf_prog_destroy(cfg->filter);
+
+ kfree(cfg->bpf_ops);
+ kfree(cfg->bpf_name);
+}
+
+static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
+ struct tcf_bpf_cfg *cfg)
+{
+ cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
+ cfg->filter = prog->filter;
+
+ cfg->bpf_ops = prog->bpf_ops;
+ cfg->bpf_name = prog->bpf_name;
+}
+
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action *act,
int replace, int bind)
{
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+ struct tcf_bpf_cfg cfg, old;
struct tc_act_bpf *parm;
struct tcf_bpf *prog;
- struct tcf_bpf_cfg cfg;
bool is_bpf, is_ebpf;
int ret;
@@ -294,6 +325,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
prog = to_bpf(act);
spin_lock_bh(&prog->tcf_lock);
+ if (ret != ACT_P_CREATED)
+ tcf_bpf_prog_fill_cfg(prog, &old);
+
prog->bpf_ops = cfg.bpf_ops;
prog->bpf_name = cfg.bpf_name;
@@ -309,29 +343,22 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (ret == ACT_P_CREATED)
tcf_hash_insert(act);
+ else
+ tcf_bpf_cfg_cleanup(&old);
return ret;
destroy_fp:
- if (is_ebpf)
- bpf_prog_put(cfg.filter);
- else
- bpf_prog_destroy(cfg.filter);
-
- kfree(cfg.bpf_ops);
- kfree(cfg.bpf_name);
-
+ tcf_bpf_cfg_cleanup(&cfg);
return ret;
}
static void tcf_bpf_cleanup(struct tc_action *act, int bind)
{
- const struct tcf_bpf *prog = act->priv;
+ struct tcf_bpf_cfg tmp;
- if (tcf_bpf_is_ebpf(prog))
- bpf_prog_put(prog->filter);
- else
- bpf_prog_destroy(prog->filter);
+ tcf_bpf_prog_fill_cfg(act->priv, &tmp);
+ tcf_bpf_cfg_cleanup(&tmp);
}
static struct tc_action_ops act_bpf_ops __read_mostly = {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 3f63ceac8..268545050 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -98,6 +98,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
return ret;
ret = ACT_P_CREATED;
} else {
+ if (bind)
+ return 0;
if (!ovr) {
tcf_hash_release(a, bind);
return -EEXIST;
@@ -151,7 +153,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
}
at = G_TC_AT(skb->tc_verd);
- skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
+ skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2 == NULL)
goto out;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 59649d588..ff8b466a7 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -68,13 +68,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
} else {
- p = to_pedit(a);
- tcf_hash_release(a, bind);
if (bind)
return 0;
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
-
+ p = to_pedit(a);
if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL)
@@ -108,7 +107,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_pedit *p = a->priv;
- int i, munged = 0;
+ int i;
unsigned int off;
if (skb_unclone(skb, GFP_ATOMIC))
@@ -156,11 +155,8 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
*ptr = ((*ptr & tkey->mask) ^ tkey->val);
if (ptr == &_data)
skb_store_bits(skb, off + offset, ptr, 4);
- munged++;
}
- if (munged)
- skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
goto done;
} else
WARN(1, "pedit BUG: index %d\n", p->tcf_index);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 91bd9c194..e5168f8b9 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -64,6 +64,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
{
struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
struct cls_bpf_prog *prog;
+#ifdef CONFIG_NET_CLS_ACT
+ bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
+#else
+ bool at_ingress = false;
+#endif
int ret = -1;
if (unlikely(!skb_mac_header_was_set(skb)))
@@ -72,7 +77,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
/* Needed here for accessing maps. */
rcu_read_lock();
list_for_each_entry_rcu(prog, &head->plist, link) {
- int filter_res = BPF_PROG_RUN(prog->filter, skb);
+ int filter_res;
+
+ if (at_ingress) {
+ /* It is safe to push/pull even if skb_shared() */
+ __skb_push(skb, skb->mac_len);
+ filter_res = BPF_PROG_RUN(prog->filter, skb);
+ __skb_pull(skb, skb->mac_len);
+ } else {
+ filter_res = BPF_PROG_RUN(prog->filter, skb);
+ }
if (filter_res == 0)
continue;
@@ -364,7 +378,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
goto errout;
if (oldprog) {
- list_replace_rcu(&prog->link, &oldprog->link);
+ list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
call_rcu(&oldprog->rcu, __cls_bpf_delete_prog);
} else {
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index a620c4e28..bb2a0f529 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -26,7 +26,7 @@
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/route.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h>
@@ -68,35 +68,41 @@ static inline u32 addr_fold(void *addr)
static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->src)
- return ntohl(flow->src);
+ __be32 src = flow_get_u32_src(flow);
+
+ if (src)
+ return ntohl(src);
+
return addr_fold(skb->sk);
}
static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->dst)
- return ntohl(flow->dst);
+ __be32 dst = flow_get_u32_dst(flow);
+
+ if (dst)
+ return ntohl(dst);
+
return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
{
- return flow->ip_proto;
+ return flow->basic.ip_proto;
}
static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->ports)
- return ntohs(flow->port16[0]);
+ if (flow->ports.ports)
+ return ntohs(flow->ports.src);
return addr_fold(skb->sk);
}
static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->ports)
- return ntohs(flow->port16[1]);
+ if (flow->ports.ports)
+ return ntohs(flow->ports.dst);
return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
@@ -295,7 +301,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
keymask = f->keymask;
if (keymask & FLOW_KEYS_NEEDED)
- skb_flow_dissect(skb, &flow_keys);
+ skb_flow_dissect_flow_keys(skb, &flow_keys);
for (n = 0; n < f->nkeys; n++) {
key = ffs(keymask) - 1;
@@ -419,6 +425,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (!fnew)
goto err2;
+ tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+
fold = (struct flow_filter *)*arg;
if (fold) {
err = -EINVAL;
@@ -480,7 +488,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
fnew->mask = ~0U;
fnew->tp = tp;
get_random_bytes(&fnew->hashrnd, 4);
- tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
}
fnew->perturb_timer.function = flow_perturbation;
@@ -520,7 +527,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (*arg == 0)
list_add_tail_rcu(&fnew->list, &head->filters);
else
- list_replace_rcu(&fnew->list, &fold->list);
+ list_replace_rcu(&fold->list, &fnew->list);
*arg = (unsigned long)fnew;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
new file mode 100644
index 000000000..2f3d03f99
--- /dev/null
+++ b/net/sched/cls_flower.c
@@ -0,0 +1,691 @@
+/*
+ * net/sched/cls_flower.c Flower classifier
+ *
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/rhashtable.h>
+
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+#include <net/ip.h>
+#include <net/flow_dissector.h>
+
+struct fl_flow_key {
+ int indev_ifindex;
+ struct flow_dissector_key_control control;
+ struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_eth_addrs eth;
+ struct flow_dissector_key_addrs ipaddrs;
+ union {
+ struct flow_dissector_key_ipv4_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
+ };
+ struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct fl_flow_mask_range {
+ unsigned short int start;
+ unsigned short int end;
+};
+
+struct fl_flow_mask {
+ struct fl_flow_key key;
+ struct fl_flow_mask_range range;
+ struct rcu_head rcu;
+};
+
+struct cls_fl_head {
+ struct rhashtable ht;
+ struct fl_flow_mask mask;
+ struct flow_dissector dissector;
+ u32 hgen;
+ bool mask_assigned;
+ struct list_head filters;
+ struct rhashtable_params ht_params;
+ struct rcu_head rcu;
+};
+
+struct cls_fl_filter {
+ struct rhash_head ht_node;
+ struct fl_flow_key mkey;
+ struct tcf_exts exts;
+ struct tcf_result res;
+ struct fl_flow_key key;
+ struct list_head list;
+ u32 handle;
+ struct rcu_head rcu;
+};
+
+static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
+{
+ return mask->range.end - mask->range.start;
+}
+
+static void fl_mask_update_range(struct fl_flow_mask *mask)
+{
+ const u8 *bytes = (const u8 *) &mask->key;
+ size_t size = sizeof(mask->key);
+ size_t i, first = 0, last = size - 1;
+
+ for (i = 0; i < sizeof(mask->key); i++) {
+ if (bytes[i]) {
+ if (!first && i)
+ first = i;
+ last = i;
+ }
+ }
+ mask->range.start = rounddown(first, sizeof(long));
+ mask->range.end = roundup(last + 1, sizeof(long));
+}
+
+static void *fl_key_get_start(struct fl_flow_key *key,
+ const struct fl_flow_mask *mask)
+{
+ return (u8 *) key + mask->range.start;
+}
+
+static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
+ struct fl_flow_mask *mask)
+{
+ const long *lkey = fl_key_get_start(key, mask);
+ const long *lmask = fl_key_get_start(&mask->key, mask);
+ long *lmkey = fl_key_get_start(mkey, mask);
+ int i;
+
+ for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
+ *lmkey++ = *lkey++ & *lmask++;
+}
+
+static void fl_clear_masked_range(struct fl_flow_key *key,
+ struct fl_flow_mask *mask)
+{
+ memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
+}
+
+static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ struct cls_fl_head *head = rcu_dereference_bh(tp->root);
+ struct cls_fl_filter *f;
+ struct fl_flow_key skb_key;
+ struct fl_flow_key skb_mkey;
+
+ fl_clear_masked_range(&skb_key, &head->mask);
+ skb_key.indev_ifindex = skb->skb_iif;
+ /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
+ * so do it rather here.
+ */
+ skb_key.basic.n_proto = skb->protocol;
+ skb_flow_dissect(skb, &head->dissector, &skb_key);
+
+ fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
+
+ f = rhashtable_lookup_fast(&head->ht,
+ fl_key_get_start(&skb_mkey, &head->mask),
+ head->ht_params);
+ if (f) {
+ *res = f->res;
+ return tcf_exts_exec(skb, &f->exts, res);
+ }
+ return -1;
+}
+
+static int fl_init(struct tcf_proto *tp)
+{
+ struct cls_fl_head *head;
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (!head)
+ return -ENOBUFS;
+
+ INIT_LIST_HEAD_RCU(&head->filters);
+ rcu_assign_pointer(tp->root, head);
+
+ return 0;
+}
+
+static void fl_destroy_filter(struct rcu_head *head)
+{
+ struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+ tcf_exts_destroy(&f->exts);
+ kfree(f);
+}
+
+static bool fl_destroy(struct tcf_proto *tp, bool force)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f, *next;
+
+ if (!force && !list_empty(&head->filters))
+ return false;
+
+ list_for_each_entry_safe(f, next, &head->filters, list) {
+ list_del_rcu(&f->list);
+ call_rcu(&f->rcu, fl_destroy_filter);
+ }
+ RCU_INIT_POINTER(tp->root, NULL);
+ if (head->mask_assigned)
+ rhashtable_destroy(&head->ht);
+ kfree_rcu(head, rcu);
+ return true;
+}
+
+static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f;
+
+ list_for_each_entry(f, &head->filters, list)
+ if (f->handle == handle)
+ return (unsigned long) f;
+ return 0;
+}
+
+static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
+ [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC },
+ [TCA_FLOWER_CLASSID] = { .type = NLA_U32 },
+ [TCA_FLOWER_INDEV] = { .type = NLA_STRING,
+ .len = IFNAMSIZ },
+ [TCA_FLOWER_KEY_ETH_DST] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_DST_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_SRC] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_IP_PROTO] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_DST] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_DST] = { .type = NLA_U16 },
+};
+
+static void fl_set_key_val(struct nlattr **tb,
+ void *val, int val_type,
+ void *mask, int mask_type, int len)
+{
+ if (!tb[val_type])
+ return;
+ memcpy(val, nla_data(tb[val_type]), len);
+ if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
+ memset(mask, 0xff, len);
+ else
+ memcpy(mask, nla_data(tb[mask_type]), len);
+}
+
+static int fl_set_key(struct net *net, struct nlattr **tb,
+ struct fl_flow_key *key, struct fl_flow_key *mask)
+{
+#ifdef CONFIG_NET_CLS_IND
+ if (tb[TCA_FLOWER_INDEV]) {
+ int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
+ if (err < 0)
+ return err;
+ key->indev_ifindex = err;
+ mask->indev_ifindex = 0xffffffff;
+ }
+#endif
+
+ fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+ mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+ sizeof(key->eth.dst));
+ fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+ mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+ sizeof(key->eth.src));
+ fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+ &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.n_proto));
+ if (key->basic.n_proto == htons(ETH_P_IP) ||
+ key->basic.n_proto == htons(ETH_P_IPV6)) {
+ fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+ &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.ip_proto));
+ }
+ if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+ &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+ sizeof(key->ipv4.src));
+ fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+ &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+ sizeof(key->ipv4.dst));
+ } else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+ &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ sizeof(key->ipv6.src));
+ fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+ &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+ sizeof(key->ipv6.dst));
+ }
+ if (key->basic.ip_proto == IPPROTO_TCP) {
+ fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src));
+ fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst));
+ } else if (key->basic.ip_proto == IPPROTO_UDP) {
+ fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src));
+ fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst));
+ }
+
+ return 0;
+}
+
+static bool fl_mask_eq(struct fl_flow_mask *mask1,
+ struct fl_flow_mask *mask2)
+{
+ const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
+ const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
+
+ return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
+ !memcmp(lmask1, lmask2, fl_mask_range(mask1));
+}
+
+static const struct rhashtable_params fl_ht_params = {
+ .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
+ .head_offset = offsetof(struct cls_fl_filter, ht_node),
+ .automatic_shrinking = true,
+};
+
+static int fl_init_hashtable(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ head->ht_params = fl_ht_params;
+ head->ht_params.key_len = fl_mask_range(mask);
+ head->ht_params.key_offset += mask->range.start;
+
+ return rhashtable_init(&head->ht, &head->ht_params);
+}
+
+#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
+#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
+#define FL_KEY_MEMBER_END_OFFSET(member) \
+ (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
+
+#define FL_KEY_IN_RANGE(mask, member) \
+ (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \
+ FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+
+#define FL_KEY_SET(keys, cnt, id, member) \
+ do { \
+ keys[cnt].key_id = id; \
+ keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member); \
+ cnt++; \
+ } while(0);
+
+#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \
+ do { \
+ if (FL_KEY_IN_RANGE(mask, member)) \
+ FL_KEY_SET(keys, cnt, id, member); \
+ } while(0);
+
+static void fl_init_dissector(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
+ size_t cnt = 0;
+
+ FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
+ FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_PORTS, tp);
+
+ skb_flow_dissector_init(&head->dissector, keys, cnt);
+}
+
+static int fl_check_assign_mask(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ int err;
+
+ if (head->mask_assigned) {
+ if (!fl_mask_eq(&head->mask, mask))
+ return -EINVAL;
+ else
+ return 0;
+ }
+
+ /* Mask is not assigned yet. So assign it and init hashtable
+ * according to that.
+ */
+ err = fl_init_hashtable(head, mask);
+ if (err)
+ return err;
+ memcpy(&head->mask, mask, sizeof(head->mask));
+ head->mask_assigned = true;
+
+ fl_init_dissector(head, mask);
+
+ return 0;
+}
+
+static int fl_set_parms(struct net *net, struct tcf_proto *tp,
+ struct cls_fl_filter *f, struct fl_flow_mask *mask,
+ unsigned long base, struct nlattr **tb,
+ struct nlattr *est, bool ovr)
+{
+ struct tcf_exts e;
+ int err;
+
+ tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ if (err < 0)
+ return err;
+
+ if (tb[TCA_FLOWER_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+ tcf_bind_filter(tp, &f->res, base);
+ }
+
+ err = fl_set_key(net, tb, &f->key, &mask->key);
+ if (err)
+ goto errout;
+
+ fl_mask_update_range(mask);
+ fl_set_masked_key(&f->mkey, &f->key, mask);
+
+ tcf_exts_change(tp, &f->exts, &e);
+
+ return 0;
+errout:
+ tcf_exts_destroy(&e);
+ return err;
+}
+
+static u32 fl_grab_new_handle(struct tcf_proto *tp,
+ struct cls_fl_head *head)
+{
+ unsigned int i = 0x80000000;
+ u32 handle;
+
+ do {
+ if (++head->hgen == 0x7FFFFFFF)
+ head->hgen = 1;
+ } while (--i > 0 && fl_get(tp, head->hgen));
+
+ if (unlikely(i == 0)) {
+ pr_err("Insufficient number of handles\n");
+ handle = 0;
+ } else {
+ handle = head->hgen;
+ }
+
+ return handle;
+}
+
+static int fl_change(struct net *net, struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+ unsigned long *arg, bool ovr)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+ struct cls_fl_filter *fnew;
+ struct nlattr *tb[TCA_FLOWER_MAX + 1];
+ struct fl_flow_mask mask = {};
+ int err;
+
+ if (!tca[TCA_OPTIONS])
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
+ if (err < 0)
+ return err;
+
+ if (fold && handle && fold->handle != handle)
+ return -EINVAL;
+
+ fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+ if (!fnew)
+ return -ENOBUFS;
+
+ tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+
+ if (!handle) {
+ handle = fl_grab_new_handle(tp, head);
+ if (!handle) {
+ err = -EINVAL;
+ goto errout;
+ }
+ }
+ fnew->handle = handle;
+
+ err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
+ if (err)
+ goto errout;
+
+ err = fl_check_assign_mask(head, &mask);
+ if (err)
+ goto errout;
+
+ err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+ head->ht_params);
+ if (err)
+ goto errout;
+ if (fold)
+ rhashtable_remove_fast(&head->ht, &fold->ht_node,
+ head->ht_params);
+
+ *arg = (unsigned long) fnew;
+
+ if (fold) {
+ list_replace_rcu(&fold->list, &fnew->list);
+ tcf_unbind_filter(tp, &fold->res);
+ call_rcu(&fold->rcu, fl_destroy_filter);
+ } else {
+ list_add_tail_rcu(&fnew->list, &head->filters);
+ }
+
+ return 0;
+
+errout:
+ kfree(fnew);
+ return err;
+}
+
+static int fl_delete(struct tcf_proto *tp, unsigned long arg)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+
+ rhashtable_remove_fast(&head->ht, &f->ht_node,
+ head->ht_params);
+ list_del_rcu(&f->list);
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, fl_destroy_filter);
+ return 0;
+}
+
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f;
+
+ list_for_each_entry_rcu(f, &head->filters, list) {
+ if (arg->count < arg->skip)
+ goto skip;
+ if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+skip:
+ arg->count++;
+ }
+}
+
+static int fl_dump_key_val(struct sk_buff *skb,
+ void *val, int val_type,
+ void *mask, int mask_type, int len)
+{
+ int err;
+
+ if (!memchr_inv(mask, 0, len))
+ return 0;
+ err = nla_put(skb, val_type, len, val);
+ if (err)
+ return err;
+ if (mask_type != TCA_FLOWER_UNSPEC) {
+ err = nla_put(skb, mask_type, len, mask);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+ struct sk_buff *skb, struct tcmsg *t)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+ struct nlattr *nest;
+ struct fl_flow_key *key, *mask;
+
+ if (!f)
+ return skb->len;
+
+ t->tcm_handle = f->handle;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (f->res.classid &&
+ nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
+ goto nla_put_failure;
+
+ key = &f->key;
+ mask = &head->mask.key;
+
+ if (mask->indev_ifindex) {
+ struct net_device *dev;
+
+ dev = __dev_get_by_index(net, key->indev_ifindex);
+ if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
+ goto nla_put_failure;
+ }
+
+ if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+ mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+ sizeof(key->eth.dst)) ||
+ fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+ mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+ sizeof(key->eth.src)) ||
+ fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+ &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.n_proto)))
+ goto nla_put_failure;
+ if ((key->basic.n_proto == htons(ETH_P_IP) ||
+ key->basic.n_proto == htons(ETH_P_IPV6)) &&
+ fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+ &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.ip_proto)))
+ goto nla_put_failure;
+
+ if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
+ (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+ &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+ sizeof(key->ipv4.src)) ||
+ fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+ &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+ sizeof(key->ipv4.dst))))
+ goto nla_put_failure;
+ else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
+ (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+ &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ sizeof(key->ipv6.src)) ||
+ fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+ &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+ sizeof(key->ipv6.dst))))
+ goto nla_put_failure;
+
+ if (key->basic.ip_proto == IPPROTO_TCP &&
+ (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src)) ||
+ fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst))))
+ goto nla_put_failure;
+ else if (key->basic.ip_proto == IPPROTO_UDP &&
+ (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src)) ||
+ fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst))))
+ goto nla_put_failure;
+
+ if (tcf_exts_dump(skb, &f->exts))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+ goto nla_put_failure;
+
+ return skb->len;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
+static struct tcf_proto_ops cls_fl_ops __read_mostly = {
+ .kind = "flower",
+ .classify = fl_classify,
+ .init = fl_init,
+ .destroy = fl_destroy,
+ .get = fl_get,
+ .change = fl_change,
+ .delete = fl_delete,
+ .walk = fl_walk,
+ .dump = fl_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init cls_fl_init(void)
+{
+ return register_tcf_proto_ops(&cls_fl_ops);
+}
+
+static void __exit cls_fl_exit(void)
+{
+ unregister_tcf_proto_ops(&cls_fl_ops);
+}
+
+module_init(cls_fl_init);
+module_exit(cls_fl_exit);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("Flower classifier");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index cab9e9b43..4fbb67430 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -490,6 +490,19 @@ static bool u32_destroy(struct tcf_proto *tp, bool force)
return false;
}
}
+
+ if (tp_c->refcnt > 1)
+ return false;
+
+ if (tp_c->refcnt == 1) {
+ struct tc_u_hnode *ht;
+
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next))
+ if (!ht_empty(ht))
+ return false;
+ }
}
if (root_ht && --root_ht->refcnt == 0)
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index a3d79c8bf..df0328ba6 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -92,8 +92,8 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
rcu_read_lock();
- if (dev && skb->skb_iif)
- indev = dev_get_by_index_rcu(dev_net(dev), skb->skb_iif);
+ if (skb->skb_iif)
+ indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
acpar.in = indev ? indev : dev;
acpar.out = dev;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1e1c89e51..f06aa01d6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1818,13 +1818,8 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
continue;
err = tp->classify(skb, tp, res);
- if (err >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
- if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
- skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
-#endif
+ if (err >= 0)
return err;
- }
}
return -1;
}
@@ -1836,23 +1831,22 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
int err = 0;
#ifdef CONFIG_NET_CLS_ACT
const struct tcf_proto *otp = tp;
+ int limit = 0;
reclassify:
#endif
err = tc_classify_compat(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
if (err == TC_ACT_RECLASSIFY) {
- u32 verd = G_TC_VERD(skb->tc_verd);
tp = otp;
- if (verd++ >= MAX_REC_LOOP) {
+ if (unlikely(limit++ >= MAX_REC_LOOP)) {
net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
tp->q->ops->id,
tp->prio & 0xffff,
ntohs(tp->protocol));
return TC_ACT_SHOT;
}
- skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
goto reclassify;
}
#endif
@@ -1885,13 +1879,10 @@ EXPORT_SYMBOL(tcf_destroy_chain);
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
- struct timespec ts;
-
- hrtimer_get_res(CLOCK_MONOTONIC, &ts);
seq_printf(seq, "%08x %08x %08x %08x\n",
(u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1000000,
- (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
+ (u32)NSEC_PER_SEC / hrtimer_resolution);
return 0;
}
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index c009eb904..6a783afe4 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -18,7 +18,7 @@
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/red.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
/*
CHOKe stateless AQM for fair bandwidth allocation
@@ -133,16 +133,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
--sch->q.qlen;
}
-/* private part of skb->cb[] that a qdisc is allowed to use
- * is limited to QDISC_CB_PRIV_LEN bytes.
- * As a flow key might be too large, we store a part of it only.
- */
-#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3)
-
struct choke_skb_cb {
u16 classid;
u8 keys_valid;
- u8 keys[QDISC_CB_PRIV_LEN - 3];
+ struct flow_keys_digest keys;
};
static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -176,19 +170,19 @@ static bool choke_match_flow(struct sk_buff *skb1,
if (!choke_skb_cb(skb1)->keys_valid) {
choke_skb_cb(skb1)->keys_valid = 1;
- skb_flow_dissect(skb1, &temp);
- memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN);
+ skb_flow_dissect_flow_keys(skb1, &temp);
+ make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
}
if (!choke_skb_cb(skb2)->keys_valid) {
choke_skb_cb(skb2)->keys_valid = 1;
- skb_flow_dissect(skb2, &temp);
- memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN);
+ skb_flow_dissect_flow_keys(skb2, &temp);
+ make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
}
return !memcmp(&choke_skb_cb(skb1)->keys,
&choke_skb_cb(skb2)->keys,
- CHOKE_K_LEN);
+ sizeof(choke_skb_cb(skb1)->keys));
}
/*
@@ -391,6 +385,19 @@ static void choke_reset(struct Qdisc *sch)
{
struct choke_sched_data *q = qdisc_priv(sch);
+ while (q->head != q->tail) {
+ struct sk_buff *skb = q->tab[q->head];
+
+ q->head = (q->head + 1) & q->tab_mask;
+ if (!skb)
+ continue;
+ qdisc_qstats_backlog_dec(sch, skb);
+ --sch->q.qlen;
+ qdisc_drop(skb, sch);
+ }
+
+ memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+ q->head = q->tail = 0;
red_restart(&q->vars);
}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 7a0bdb16a..535007d5f 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -6,7 +6,7 @@
*
* Implemented on linux by :
* Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
- * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -109,6 +109,7 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
[TCA_CODEL_LIMIT] = { .type = NLA_U32 },
[TCA_CODEL_INTERVAL] = { .type = NLA_U32 },
[TCA_CODEL_ECN] = { .type = NLA_U32 },
+ [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
};
static int codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -133,6 +134,12 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
}
+ if (tb[TCA_CODEL_CE_THRESHOLD]) {
+ u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
+
+ q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+ }
+
if (tb[TCA_CODEL_INTERVAL]) {
u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
@@ -201,7 +208,10 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_CODEL_ECN,
q->params.ecn))
goto nla_put_failure;
-
+ if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+ nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
+ codel_time_to_us(q->params.ce_threshold)))
+ goto nla_put_failure;
return nla_nest_end(skb, opts);
nla_put_failure:
@@ -220,6 +230,7 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.ldelay = codel_time_to_us(q->vars.ldelay),
.dropping = q->vars.dropping,
.ecn_mark = q->stats.ecn_mark,
+ .ce_mark = q->stats.ce_mark,
};
if (q->vars.dropping) {
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index c244c45b7..a9ba03043 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -6,7 +6,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
*/
#include <linux/module.h>
@@ -23,7 +23,6 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-#include <net/flow_keys.h>
#include <net/codel.h>
/* Fair Queue CoDel.
@@ -68,15 +67,9 @@ struct fq_codel_sched_data {
};
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
- const struct sk_buff *skb)
+ struct sk_buff *skb)
{
- struct flow_keys keys;
- unsigned int hash;
-
- skb_flow_dissect(skb, &keys);
- hash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src ^ keys.ip_proto,
- (__force u32)keys.ports, q->perturbation);
+ u32 hash = skb_get_hash_perturb(skb, q->perturbation);
return reciprocal_scale(hash, q->flows_cnt);
}
@@ -162,14 +155,23 @@ static unsigned int fq_codel_drop(struct Qdisc *sch)
skb = dequeue_head(flow);
len = qdisc_pkt_len(skb);
q->backlogs[idx] -= len;
- kfree_skb(skb);
sch->q.qlen--;
qdisc_qstats_drop(sch);
qdisc_qstats_backlog_dec(sch, skb);
+ kfree_skb(skb);
flow->dropped++;
return idx;
}
+static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
+{
+ unsigned int prev_backlog;
+
+ prev_backlog = sch->qstats.backlog;
+ fq_codel_drop(sch);
+ return prev_backlog - sch->qstats.backlog;
+}
+
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -286,10 +288,26 @@ begin:
static void fq_codel_reset(struct Qdisc *sch)
{
- struct sk_buff *skb;
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ int i;
- while ((skb = fq_codel_dequeue(sch)) != NULL)
- kfree_skb(skb);
+ INIT_LIST_HEAD(&q->new_flows);
+ INIT_LIST_HEAD(&q->old_flows);
+ for (i = 0; i < q->flows_cnt; i++) {
+ struct fq_codel_flow *flow = q->flows + i;
+
+ while (flow->head) {
+ struct sk_buff *skb = dequeue_head(flow);
+
+ qdisc_qstats_backlog_dec(sch, skb);
+ kfree_skb(skb);
+ }
+
+ INIT_LIST_HEAD(&flow->flowchain);
+ codel_vars_init(&flow->cvars);
+ }
+ memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
+ sch->q.qlen = 0;
}
static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
@@ -299,6 +317,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
[TCA_FQ_CODEL_ECN] = { .type = NLA_U32 },
[TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 },
[TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
};
static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -329,6 +348,12 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
}
+ if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
+ u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
+
+ q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+ }
+
if (tb[TCA_FQ_CODEL_INTERVAL]) {
u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
@@ -448,6 +473,11 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
q->flows_cnt))
goto nla_put_failure;
+ if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+ nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
+ codel_time_to_us(q->cparams.ce_threshold)))
+ goto nla_put_failure;
+
return nla_nest_end(skb, opts);
nla_put_failure:
@@ -466,6 +496,7 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.qdisc_stats.drop_overlimit = q->drop_overlimit;
st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
st.qdisc_stats.new_flow_count = q->new_flow_count;
+ st.qdisc_stats.ce_mark = q->cstats.ce_mark;
list_for_each(pos, &q->new_flows)
st.qdisc_stats.new_flows_len++;
@@ -598,7 +629,7 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
.enqueue = fq_codel_enqueue,
.dequeue = fq_codel_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = fq_codel_drop,
+ .drop = fq_codel_qdisc_drop,
.init = fq_codel_init,
.reset = fq_codel_reset,
.destroy = fq_codel_destroy,
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 634529e0c..abb9f2fec 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -165,7 +165,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* if no default DP has been configured. This
* allows for DP flows to be left untouched.
*/
- if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len)
+ if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
+ sch->limit))
return qdisc_enqueue_tail(skb, sch);
else
goto drop;
@@ -397,7 +398,10 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
q->DP = dp;
q->prio = prio;
- q->limit = ctl->limit;
+ if (ctl->limit > sch->limit)
+ q->limit = sch->limit;
+ else
+ q->limit = ctl->limit;
if (q->backlog == 0)
red_end_of_idle_period(&q->vars);
@@ -414,6 +418,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
[TCA_GRED_STAB] = { .len = 256 },
[TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) },
[TCA_GRED_MAX_P] = { .type = NLA_U32 },
+ [TCA_GRED_LIMIT] = { .type = NLA_U32 },
};
static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -433,11 +438,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
if (err < 0)
return err;
- if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL)
+ if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
+ if (tb[TCA_GRED_LIMIT] != NULL)
+ sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
return gred_change_table_def(sch, opt);
+ }
if (tb[TCA_GRED_PARMS] == NULL ||
- tb[TCA_GRED_STAB] == NULL)
+ tb[TCA_GRED_STAB] == NULL ||
+ tb[TCA_GRED_LIMIT] != NULL)
return -EINVAL;
max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
@@ -501,6 +510,14 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
return -EINVAL;
+ if (tb[TCA_GRED_LIMIT])
+ sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
+ else {
+ u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1;
+
+ sch->limit = qlen * psched_mtu(qdisc_dev(sch));
+ }
+
return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
}
@@ -531,6 +548,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
+ goto nla_put_failure;
+
parms = nla_nest_start(skb, TCA_GRED_PARMS);
if (parms == NULL)
goto nla_put_failure;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 15d3aabfe..9d15cb6b8 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -9,7 +9,6 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
-#include <net/flow_keys.h>
#include <net/pkt_sched.h>
#include <net/sock.h>
@@ -176,22 +175,6 @@ static u32 hhf_time_stamp(void)
return jiffies;
}
-static unsigned int skb_hash(const struct hhf_sched_data *q,
- const struct sk_buff *skb)
-{
- struct flow_keys keys;
- unsigned int hash;
-
- if (skb->sk && skb->sk->sk_hash)
- return skb->sk->sk_hash;
-
- skb_flow_dissect(skb, &keys);
- hash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src ^ keys.ip_proto,
- (__force u32)keys.ports, q->perturbation);
- return hash;
-}
-
/* Looks up a heavy-hitter flow in a chaining list of table T. */
static struct hh_flow_state *seek_list(const u32 hash,
struct list_head *head,
@@ -280,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
}
/* Get hashed flow-id of the skb. */
- hash = skb_hash(q, skb);
+ hash = skb_get_hash_perturb(skb, q->perturbation);
/* Check if this packet belongs to an already established HH flow. */
flow_pos = hash & HHF_BIT_MASK;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 4cdbfb856..e7c648fa9 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -12,16 +12,10 @@
#include <linux/list.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+
#include <net/netlink.h>
#include <net/pkt_sched.h>
-
-struct ingress_qdisc_data {
- struct tcf_proto __rcu *filter_list;
-};
-
-/* ------------------------- Class/flow operations ------------------------- */
-
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
{
return NULL;
@@ -49,57 +43,24 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
unsigned long cl)
{
- struct ingress_qdisc_data *p = qdisc_priv(sch);
-
- return &p->filter_list;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
+ struct net_device *dev = qdisc_dev(sch);
-static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
-{
- struct ingress_qdisc_data *p = qdisc_priv(sch);
- struct tcf_result res;
- struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result;
-
- result = tc_classify(skb, fl, &res);
-
- qdisc_bstats_update(sch, skb);
- switch (result) {
- case TC_ACT_SHOT:
- result = TC_ACT_SHOT;
- qdisc_qstats_drop(sch);
- break;
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- result = TC_ACT_STOLEN;
- break;
- case TC_ACT_RECLASSIFY:
- case TC_ACT_OK:
- skb->tc_index = TC_H_MIN(res.classid);
- default:
- result = TC_ACT_OK;
- break;
- }
-
- return result;
+ return &dev->ingress_cl_list;
}
-/* ------------------------------------------------------------- */
-
static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
{
net_inc_ingress_queue();
+ sch->flags |= TCQ_F_CPUSTATS;
return 0;
}
static void ingress_destroy(struct Qdisc *sch)
{
- struct ingress_qdisc_data *p = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
- tcf_destroy_chain(&p->filter_list);
+ tcf_destroy_chain(&dev->ingress_cl_list);
net_dec_ingress_queue();
}
@@ -110,6 +71,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
+
return nla_nest_end(skb, nest);
nla_put_failure:
@@ -130,8 +92,6 @@ static const struct Qdisc_class_ops ingress_class_ops = {
static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.cl_ops = &ingress_class_ops,
.id = "ingress",
- .priv_size = sizeof(struct ingress_qdisc_data),
- .enqueue = ingress_enqueue,
.init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
@@ -148,6 +108,7 @@ static void __exit ingress_module_exit(void)
unregister_qdisc(&ingress_qdisc_ops);
}
-module_init(ingress_module_init)
-module_exit(ingress_module_exit)
+module_init(ingress_module_init);
+module_exit(ingress_module_exit);
+
MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 956ead2ca..5abd1d9de 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -440,9 +440,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
struct Qdisc *rootq = qdisc_root(sch);
u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
- q->duplicate = 0;
- qdisc_enqueue_root(skb2, rootq);
+ q->duplicate = 0;
+ rootq->enqueue(skb2, rootq);
q->duplicate = dupsave;
}
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 89f8fcf73..ade9445a5 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -216,6 +216,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
.peek = qdisc_peek_head,
.init = plug_init,
.change = plug_change,
+ .reset = qdisc_reset_queue,
.owner = THIS_MODULE,
};
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 3ec7e88a4..b8d73bca6 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -339,8 +339,7 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
{
- if (!hlist_unhashed(&agg->nonfull_next))
- hlist_del_init(&agg->nonfull_next);
+ hlist_del_init(&agg->nonfull_next);
q->wsum -= agg->class_weight;
if (q->wsum != 0)
q->iwsum = ONE_FP / q->wsum;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5819dd826..4b8151933 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -26,7 +26,6 @@
#include <net/ip.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
-#include <net/flow_keys.h>
/*
* SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
@@ -285,9 +284,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
int i;
u32 p_min = ~0;
u32 minqlen = ~0;
- u32 r, slot, salt, sfbhash;
+ u32 r, sfbhash;
+ u32 slot = q->slot;
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- struct flow_keys keys;
if (unlikely(sch->q.qlen >= q->limit)) {
qdisc_qstats_overlimit(sch);
@@ -309,22 +308,17 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
fl = rcu_dereference_bh(q->filter_list);
if (fl) {
+ u32 salt;
+
/* If using external classifiers, get result and record it. */
if (!sfb_classify(skb, fl, &ret, &salt))
goto other_drop;
- keys.src = salt;
- keys.dst = 0;
- keys.ports = 0;
+ sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
} else {
- skb_flow_dissect(skb, &keys);
+ sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
}
- slot = q->slot;
- sfbhash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports,
- q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -356,10 +350,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (unlikely(p_min >= SFB_MAX_PROB)) {
/* Inelastic flow */
if (q->double_buffering) {
- sfbhash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports,
- q->bins[slot].perturbation);
+ sfbhash = skb_get_hash_perturb(skb,
+ q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b877140be..52f75a547 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -23,7 +23,6 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-#include <net/flow_keys.h>
#include <net/red.h>
@@ -156,30 +155,10 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
return &q->dep[val - SFQ_MAX_FLOWS];
}
-/*
- * In order to be able to quickly rehash our queue when timer changes
- * q->perturbation, we store flow_keys in skb->cb[]
- */
-struct sfq_skb_cb {
- struct flow_keys keys;
-};
-
-static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
-{
- qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb));
- return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
-}
-
static unsigned int sfq_hash(const struct sfq_sched_data *q,
const struct sk_buff *skb)
{
- const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
- unsigned int hash;
-
- hash = jhash_3words((__force u32)keys->dst,
- (__force u32)keys->src ^ keys->ip_proto,
- (__force u32)keys->ports, q->perturbation);
- return hash & (q->divisor - 1);
+ return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
}
static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -196,10 +175,8 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
return TC_H_MIN(skb->priority);
fl = rcu_dereference_bh(q->filter_list);
- if (!fl) {
- skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
+ if (!fl)
return sfq_hash(q, skb) + 1;
- }
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
result = tc_classify(skb, fl, &res);
@@ -329,10 +306,10 @@ drop:
len = qdisc_pkt_len(skb);
slot->backlog -= len;
sfq_dec(q, x);
- kfree_skb(skb);
sch->q.qlen--;
qdisc_qstats_drop(sch);
qdisc_qstats_backlog_dec(sch, skb);
+ kfree_skb(skb);
return len;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0e4198ee2..e917d2732 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -331,8 +331,9 @@ out:
rt = (struct rt6_info *)dst;
t->dst = dst;
- t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
- pr_debug("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr,
+ t->dst_cookie = rt6_get_cookie(rt);
+ pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n",
+ &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
&fl6->saddr);
} else {
t->dst = NULL;
@@ -635,7 +636,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct sctp6_sock *newsctp6sk;
- newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot);
+ newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
if (!newsk)
goto out;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 53b7acde9..59e803566 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -550,7 +550,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
struct sctp_association *asoc)
{
struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
- sk->sk_prot);
+ sk->sk_prot, 0);
struct inet_sock *newinet;
if (!newsk)
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 06320c8c1..a655ddc3f 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3132,11 +3132,18 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
case SCTP_PARAM_IPV4_ADDRESS:
if (length != sizeof(sctp_ipv4addr_param_t))
return false;
+ /* ensure there is only one addr param and it's in the
+ * beginning of addip_hdr params, or we reject it.
+ */
+ if (param.v != addip->addip_hdr.params)
+ return false;
addr_param_seen = true;
break;
case SCTP_PARAM_IPV6_ADDRESS:
if (length != sizeof(sctp_ipv6addr_param_t))
return false;
+ if (param.v != addip->addip_hdr.params)
+ return false;
addr_param_seen = true;
break;
case SCTP_PARAM_ADD_IP:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index fef2acdf4..85e6f03ae 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -702,7 +702,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
* outstanding data and rely on the retransmission limit be reached
* to shutdown the association.
*/
- if (t->asoc->state != SCTP_STATE_SHUTDOWN_PENDING)
+ if (t->asoc->state < SCTP_STATE_SHUTDOWN_PENDING)
t->asoc->overall_error_count = 0;
/* Clear the hb_sent flag to signal that we had a good
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 5f6c4e613..17bef01b9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2121,12 +2121,6 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (sp->subscribe.sctp_data_io_event)
sctp_ulpevent_read_sndrcvinfo(event, msg);
-#if 0
- /* FIXME: we should be calling IP/IPv6 layers. */
- if (sk->sk_protinfo.af_inet.cmsg_flags)
- ip_cmsg_recv(msg, skb);
-#endif
-
err = copied;
/* If skb's length exceeds the user's buffer, update the skb and
@@ -2206,12 +2200,6 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen))
return -EFAULT;
- if (sctp_sk(sk)->subscribe.sctp_data_io_event)
- pr_warn_ratelimited(DEPRECATED "%s (pid %d) "
- "Requested SCTP_SNDRCVINFO event.\n"
- "Use SCTP_RCVINFO through SCTP_RECVRCVINFO option instead.\n",
- current->comm, task_pid_nr(current));
-
/* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT,
* if there is no data to be sent or retransmit, the stack will
* immediately send up this notification.
diff --git a/net/socket.c b/net/socket.c
index 884e32997..9963a0b53 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -576,9 +576,6 @@ void sock_release(struct socket *sock)
if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
pr_err("%s: fasync list not empty!\n", __func__);
- if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
- return;
-
this_cpu_sub(sockets_in_use, 1);
if (!sock->file) {
iput(SOCK_INODE(sock));
@@ -1213,9 +1210,9 @@ int sock_create(int family, int type, int protocol, struct socket **res)
}
EXPORT_SYMBOL(sock_create);
-int sock_create_kern(int family, int type, int protocol, struct socket **res)
+int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
{
- return __sock_create(&init_net, family, type, protocol, res, 1);
+ return __sock_create(net, family, type, protocol, res, 1);
}
EXPORT_SYMBOL(sock_create_kern);
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 9068e72aa..04ce2c0b6 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -48,28 +48,16 @@ config SUNRPC_DEBUG
If unsure, say Y.
-config SUNRPC_XPRT_RDMA_CLIENT
- tristate "RPC over RDMA Client Support"
+config SUNRPC_XPRT_RDMA
+ tristate "RPC-over-RDMA transport"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
help
- This option allows the NFS client to support an RDMA-enabled
- transport.
+ This option allows the NFS client and server to use RDMA
+ transports (InfiniBand, iWARP, or RoCE).
- To compile RPC client RDMA transport support as a module,
- choose M here: the module will be called xprtrdma.
+ To compile this support as a module, choose M. The module
+ will be called rpcrdma.ko.
- If unsure, say N.
-
-config SUNRPC_XPRT_RDMA_SERVER
- tristate "RPC over RDMA Server Support"
- depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
- default SUNRPC && INFINIBAND
- help
- This option allows the NFS server to support an RDMA-enabled
- transport.
-
- To compile RPC server RDMA transport support as a module,
- choose M here: the module will be called svcrdma.
-
- If unsure, say N.
+ If unsure, or you know there is no RDMA capability on your
+ hardware platform, say N.
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 15e6f6c23..b512fbd9d 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -5,8 +5,7 @@
obj-$(CONFIG_SUNRPC) += sunrpc.o
obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
-
-obj-y += xprtrdma/
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o auth_generic.o \
@@ -15,6 +14,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
sunrpc_syms.o cache.o rpc_pipe.o \
svc_xprt.o
sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
-sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
+sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 47f38be41..02f53674d 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -72,7 +72,7 @@ static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp)
#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
-static struct kernel_param_ops param_ops_hashtbl_sz = {
+static const struct kernel_param_ops param_ops_hashtbl_sz = {
.set = param_set_hashtbl_sz,
.get = param_get_hashtbl_sz,
};
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index b5408e8a3..fee3c15a4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -881,9 +881,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
if (err)
goto out_err;
- sg_init_table(sg, 1);
- sg_set_buf(sg, &zeroconstant, 4);
-
+ sg_init_one(sg, &zeroconstant, 4);
err = crypto_hash_digest(&desc, sg, 4, Kseq);
if (err)
goto out_err;
@@ -951,9 +949,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
if (err)
goto out_err;
- sg_init_table(sg, 1);
- sg_set_buf(sg, zeroconstant, 4);
-
+ sg_init_one(sg, zeroconstant, 4);
err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
if (err)
goto out_err;
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 28504dfd3..6255d1411 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -37,16 +37,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
{
- return xprt->bc_alloc_count > 0;
+ return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots);
}
static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
{
+ atomic_add(n, &xprt->bc_free_slots);
xprt->bc_alloc_count += n;
}
static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
{
+ atomic_sub(n, &xprt->bc_free_slots);
return xprt->bc_alloc_count -= n;
}
@@ -67,6 +69,55 @@ static void xprt_free_allocation(struct rpc_rqst *req)
kfree(req);
}
+static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
+{
+ struct page *page;
+ /* Preallocate one XDR receive buffer */
+ page = alloc_page(gfp_flags);
+ if (page == NULL)
+ return -ENOMEM;
+ buf->head[0].iov_base = page_address(page);
+ buf->head[0].iov_len = PAGE_SIZE;
+ buf->tail[0].iov_base = NULL;
+ buf->tail[0].iov_len = 0;
+ buf->page_len = 0;
+ buf->len = 0;
+ buf->buflen = PAGE_SIZE;
+ return 0;
+}
+
+static
+struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
+{
+ struct rpc_rqst *req;
+
+ /* Pre-allocate one backchannel rpc_rqst */
+ req = kzalloc(sizeof(*req), gfp_flags);
+ if (req == NULL)
+ return NULL;
+
+ req->rq_xprt = xprt;
+ INIT_LIST_HEAD(&req->rq_list);
+ INIT_LIST_HEAD(&req->rq_bc_list);
+
+ /* Preallocate one XDR receive buffer */
+ if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) {
+ printk(KERN_ERR "Failed to create bc receive xbuf\n");
+ goto out_free;
+ }
+ req->rq_rcv_buf.len = PAGE_SIZE;
+
+ /* Preallocate one XDR send buffer */
+ if (xprt_alloc_xdr_buf(&req->rq_snd_buf, gfp_flags) < 0) {
+ printk(KERN_ERR "Failed to create bc snd xbuf\n");
+ goto out_free;
+ }
+ return req;
+out_free:
+ xprt_free_allocation(req);
+ return NULL;
+}
+
/*
* Preallocate up to min_reqs structures and related buffers for use
* by the backchannel. This function can be called multiple times
@@ -87,9 +138,7 @@ static void xprt_free_allocation(struct rpc_rqst *req)
*/
int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
{
- struct page *page_rcv = NULL, *page_snd = NULL;
- struct xdr_buf *xbufp = NULL;
- struct rpc_rqst *req, *tmp;
+ struct rpc_rqst *req;
struct list_head tmp_list;
int i;
@@ -106,7 +155,7 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
INIT_LIST_HEAD(&tmp_list);
for (i = 0; i < min_reqs; i++) {
/* Pre-allocate one backchannel rpc_rqst */
- req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
+ req = xprt_alloc_bc_req(xprt, GFP_KERNEL);
if (req == NULL) {
printk(KERN_ERR "Failed to create bc rpc_rqst\n");
goto out_free;
@@ -115,41 +164,6 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
/* Add the allocated buffer to the tmp list */
dprintk("RPC: adding req= %p\n", req);
list_add(&req->rq_bc_pa_list, &tmp_list);
-
- req->rq_xprt = xprt;
- INIT_LIST_HEAD(&req->rq_list);
- INIT_LIST_HEAD(&req->rq_bc_list);
-
- /* Preallocate one XDR receive buffer */
- page_rcv = alloc_page(GFP_KERNEL);
- if (page_rcv == NULL) {
- printk(KERN_ERR "Failed to create bc receive xbuf\n");
- goto out_free;
- }
- xbufp = &req->rq_rcv_buf;
- xbufp->head[0].iov_base = page_address(page_rcv);
- xbufp->head[0].iov_len = PAGE_SIZE;
- xbufp->tail[0].iov_base = NULL;
- xbufp->tail[0].iov_len = 0;
- xbufp->page_len = 0;
- xbufp->len = PAGE_SIZE;
- xbufp->buflen = PAGE_SIZE;
-
- /* Preallocate one XDR send buffer */
- page_snd = alloc_page(GFP_KERNEL);
- if (page_snd == NULL) {
- printk(KERN_ERR "Failed to create bc snd xbuf\n");
- goto out_free;
- }
-
- xbufp = &req->rq_snd_buf;
- xbufp->head[0].iov_base = page_address(page_snd);
- xbufp->head[0].iov_len = 0;
- xbufp->tail[0].iov_base = NULL;
- xbufp->tail[0].iov_len = 0;
- xbufp->page_len = 0;
- xbufp->len = 0;
- xbufp->buflen = PAGE_SIZE;
}
/*
@@ -167,7 +181,10 @@ out_free:
/*
* Memory allocation failed, free the temporary list
*/
- list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) {
+ while (!list_empty(&tmp_list)) {
+ req = list_first_entry(&tmp_list,
+ struct rpc_rqst,
+ rq_bc_pa_list);
list_del(&req->rq_bc_pa_list);
xprt_free_allocation(req);
}
@@ -217,9 +234,15 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
struct rpc_rqst *req = NULL;
dprintk("RPC: allocate a backchannel request\n");
- if (list_empty(&xprt->bc_pa_list))
+ if (atomic_read(&xprt->bc_free_slots) <= 0)
goto not_found;
-
+ if (list_empty(&xprt->bc_pa_list)) {
+ req = xprt_alloc_bc_req(xprt, GFP_ATOMIC);
+ if (!req)
+ goto not_found;
+ list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+ xprt->bc_alloc_count++;
+ }
req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
rq_bc_pa_list);
req->rq_reply_bytes_recvd = 0;
@@ -245,11 +268,21 @@ void xprt_free_bc_request(struct rpc_rqst *req)
req->rq_connect_cookie = xprt->connect_cookie - 1;
smp_mb__before_atomic();
- WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
smp_mb__after_atomic();
- if (!xprt_need_to_requeue(xprt)) {
+ /*
+ * Return it to the list of preallocations so that it
+ * may be reused by a new callback request.
+ */
+ spin_lock_bh(&xprt->bc_pa_lock);
+ if (xprt_need_to_requeue(xprt)) {
+ list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+ xprt->bc_alloc_count++;
+ req = NULL;
+ }
+ spin_unlock_bh(&xprt->bc_pa_lock);
+ if (req != NULL) {
/*
* The last remaining session was destroyed while this
* entry was in use. Free the entry and don't attempt
@@ -260,14 +293,6 @@ void xprt_free_bc_request(struct rpc_rqst *req)
xprt_free_allocation(req);
return;
}
-
- /*
- * Return it to the list of preallocations so that it
- * may be reused by a new callback request.
- */
- spin_lock_bh(&xprt->bc_pa_lock);
- list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
- spin_unlock_bh(&xprt->bc_pa_lock);
}
/*
@@ -311,6 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
spin_lock(&xprt->bc_pa_lock);
list_del(&req->rq_bc_pa_list);
+ xprt_dec_alloc_count(xprt, 1);
spin_unlock(&xprt->bc_pa_lock);
req->rq_private_buf.len = copied;
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
deleted file mode 100644
index 15c7a8a1c..000000000
--- a/net/sunrpc/bc_svc.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/******************************************************************************
-
-(c) 2007 Network Appliance, Inc. All Rights Reserved.
-(c) 2009 NetApp. All Rights Reserved.
-
-NetApp provides this source code under the GPL v2 License.
-The GPL v2 license is available at
-http://opensource.org/licenses/gpl-license.php.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-******************************************************************************/
-
-/*
- * The NFSv4.1 callback service helper routines.
- * They implement the transport level processing required to send the
- * reply over an existing open connection previously established by the client.
- */
-
-#include <linux/module.h>
-
-#include <linux/sunrpc/xprt.h>
-#include <linux/sunrpc/sched.h>
-#include <linux/sunrpc/bc_xprt.h>
-
-#define RPCDBG_FACILITY RPCDBG_SVCDSP
-
-/* Empty callback ops */
-static const struct rpc_call_ops nfs41_callback_ops = {
-};
-
-
-/*
- * Send the callback reply
- */
-int bc_send(struct rpc_rqst *req)
-{
- struct rpc_task *task;
- int ret;
-
- dprintk("RPC: bc_send req= %p\n", req);
- task = rpc_run_bc_task(req, &nfs41_callback_ops);
- if (IS_ERR(task))
- ret = PTR_ERR(task);
- else {
- WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
- ret = task->tk_status;
- rpc_put_task(task);
- }
- dprintk("RPC: bc_send ret= %d\n", ret);
- return ret;
-}
-
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e6ce15173..23608eb0d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -891,15 +891,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
task->tk_flags |= RPC_TASK_SOFT;
if (clnt->cl_noretranstimeo)
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
- if (sk_memalloc_socks()) {
- struct rpc_xprt *xprt;
-
- rcu_read_lock();
- xprt = rcu_dereference(clnt->cl_xprt);
- if (xprt->swapper)
- task->tk_flags |= RPC_TASK_SWAPPER;
- rcu_read_unlock();
- }
+ if (atomic_read(&clnt->cl_swapper))
+ task->tk_flags |= RPC_TASK_SWAPPER;
/* Add to the client's list of all tasks */
spin_lock(&clnt->cl_lock);
list_add_tail(&task->tk_task, &clnt->cl_tasks);
@@ -1031,15 +1024,14 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
* @req: RPC request
- * @tk_ops: RPC call ops
*/
-struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
- const struct rpc_call_ops *tk_ops)
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
{
struct rpc_task *task;
struct xdr_buf *xbufp = &req->rq_snd_buf;
struct rpc_task_setup task_setup_data = {
- .callback_ops = tk_ops,
+ .callback_ops = &rpc_default_ops,
+ .flags = RPC_TASK_SOFTCONN,
};
dprintk("RPC: rpc_run_bc_task req= %p\n", req);
@@ -1614,6 +1606,7 @@ call_allocate(struct rpc_task *task)
req->rq_callsize + req->rq_rcvsize);
if (req->rq_buffer != NULL)
return;
+ xprt_inject_disconnect(xprt);
dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
@@ -1909,6 +1902,7 @@ call_transmit_status(struct rpc_task *task)
switch (task->tk_status) {
case -EAGAIN:
+ case -ENOBUFS:
break;
default:
dprint_status(task);
@@ -1935,7 +1929,6 @@ call_transmit_status(struct rpc_task *task)
case -ECONNABORTED:
case -EADDRINUSE:
case -ENOTCONN:
- case -ENOBUFS:
case -EPIPE:
rpc_task_force_reencode(task);
}
@@ -1951,33 +1944,36 @@ call_bc_transmit(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- if (!xprt_prepare_transmit(task)) {
- /*
- * Could not reserve the transport. Try again after the
- * transport is released.
- */
- task->tk_status = 0;
- task->tk_action = call_bc_transmit;
- return;
- }
+ if (!xprt_prepare_transmit(task))
+ goto out_retry;
- task->tk_action = rpc_exit_task;
if (task->tk_status < 0) {
printk(KERN_NOTICE "RPC: Could not send backchannel reply "
"error: %d\n", task->tk_status);
- return;
+ goto out_done;
}
+ if (req->rq_connect_cookie != req->rq_xprt->connect_cookie)
+ req->rq_bytes_sent = 0;
xprt_transmit(task);
+
+ if (task->tk_status == -EAGAIN)
+ goto out_nospace;
+
xprt_end_transmit(task);
dprint_status(task);
switch (task->tk_status) {
case 0:
/* Success */
- break;
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
+ case -ECONNRESET:
+ case -ECONNREFUSED:
+ case -EADDRINUSE:
+ case -ENOTCONN:
+ case -EPIPE:
+ break;
case -ETIMEDOUT:
/*
* Problem reaching the server. Disconnect and let the
@@ -2002,6 +1998,13 @@ call_bc_transmit(struct rpc_task *task)
break;
}
rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
+out_done:
+ task->tk_action = rpc_exit_task;
+ return;
+out_nospace:
+ req->rq_connect_cookie = req->rq_xprt->connect_cookie;
+out_retry:
+ task->tk_status = 0;
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -2054,12 +2057,13 @@ call_status(struct rpc_task *task)
case -ECONNABORTED:
rpc_force_rebind(clnt);
case -EADDRINUSE:
- case -ENOBUFS:
rpc_delay(task, 3*HZ);
case -EPIPE:
case -ENOTCONN:
task->tk_action = call_bind;
break;
+ case -ENOBUFS:
+ rpc_delay(task, HZ>>2);
case -EAGAIN:
task->tk_action = call_transmit;
break;
@@ -2476,3 +2480,59 @@ void rpc_show_tasks(struct net *net)
spin_unlock(&sn->rpc_client_lock);
}
#endif
+
+#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
+int
+rpc_clnt_swap_activate(struct rpc_clnt *clnt)
+{
+ int ret = 0;
+ struct rpc_xprt *xprt;
+
+ if (atomic_inc_return(&clnt->cl_swapper) == 1) {
+retry:
+ rcu_read_lock();
+ xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+ rcu_read_unlock();
+ if (!xprt) {
+ /*
+ * If we didn't get a reference, then we likely are
+ * racing with a migration event. Wait for a grace
+ * period and try again.
+ */
+ synchronize_rcu();
+ goto retry;
+ }
+
+ ret = xprt_enable_swap(xprt);
+ xprt_put(xprt);
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate);
+
+void
+rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
+{
+ struct rpc_xprt *xprt;
+
+ if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) {
+retry:
+ rcu_read_lock();
+ xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+ rcu_read_unlock();
+ if (!xprt) {
+ /*
+ * If we didn't get a reference, then we likely are
+ * racing with a migration event. Wait for a grace
+ * period and try again.
+ */
+ synchronize_rcu();
+ goto retry;
+ }
+
+ xprt_disable_swap(xprt);
+ xprt_put(xprt);
+ }
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate);
+#endif /* CONFIG_SUNRPC_SWAP */
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 82962f7e6..e7b4d9356 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -10,9 +10,12 @@
#include "netns.h"
static struct dentry *topdir;
+static struct dentry *rpc_fault_dir;
static struct dentry *rpc_clnt_dir;
static struct dentry *rpc_xprt_dir;
+unsigned int rpc_inject_disconnect;
+
struct rpc_clnt_iter {
struct rpc_clnt *clnt;
loff_t pos;
@@ -257,6 +260,8 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
debugfs_remove_recursive(xprt->debugfs);
xprt->debugfs = NULL;
}
+
+ atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
}
void
@@ -266,11 +271,79 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt)
xprt->debugfs = NULL;
}
+static int
+fault_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = kmalloc(128, GFP_KERNEL);
+ if (!filp->private_data)
+ return -ENOMEM;
+ return 0;
+}
+
+static int
+fault_release(struct inode *inode, struct file *filp)
+{
+ kfree(filp->private_data);
+ return 0;
+}
+
+static ssize_t
+fault_disconnect_read(struct file *filp, char __user *user_buf,
+ size_t len, loff_t *offset)
+{
+ char *buffer = (char *)filp->private_data;
+ size_t size;
+
+ size = sprintf(buffer, "%u\n", rpc_inject_disconnect);
+ return simple_read_from_buffer(user_buf, len, offset, buffer, size);
+}
+
+static ssize_t
+fault_disconnect_write(struct file *filp, const char __user *user_buf,
+ size_t len, loff_t *offset)
+{
+ char buffer[16];
+
+ if (len >= sizeof(buffer))
+ len = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, user_buf, len))
+ return -EFAULT;
+ buffer[len] = '\0';
+ if (kstrtouint(buffer, 10, &rpc_inject_disconnect))
+ return -EINVAL;
+ return len;
+}
+
+static const struct file_operations fault_disconnect_fops = {
+ .owner = THIS_MODULE,
+ .open = fault_open,
+ .read = fault_disconnect_read,
+ .write = fault_disconnect_write,
+ .release = fault_release,
+};
+
+static struct dentry *
+inject_fault_dir(struct dentry *topdir)
+{
+ struct dentry *faultdir;
+
+ faultdir = debugfs_create_dir("inject_fault", topdir);
+ if (!faultdir)
+ return NULL;
+
+ if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir,
+ NULL, &fault_disconnect_fops))
+ return NULL;
+
+ return faultdir;
+}
+
void __exit
sunrpc_debugfs_exit(void)
{
debugfs_remove_recursive(topdir);
topdir = NULL;
+ rpc_fault_dir = NULL;
rpc_clnt_dir = NULL;
rpc_xprt_dir = NULL;
}
@@ -282,6 +355,10 @@ sunrpc_debugfs_init(void)
if (!topdir)
return;
+ rpc_fault_dir = inject_fault_dir(topdir);
+ if (!rpc_fault_dir)
+ goto out_remove;
+
rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
if (!rpc_clnt_dir)
goto out_remove;
@@ -294,5 +371,6 @@ sunrpc_debugfs_init(void)
out_remove:
debugfs_remove_recursive(topdir);
topdir = NULL;
+ rpc_fault_dir = NULL;
rpc_clnt_dir = NULL;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 78974e4d9..5a16d8d8c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1290,7 +1290,6 @@ err_bad:
svc_putnl(resv, ntohl(rpc_stat));
goto sendit;
}
-EXPORT_SYMBOL_GPL(svc_process);
/*
* Process the RPC request.
@@ -1338,6 +1337,7 @@ out_drop:
svc_drop(rqstp);
return 0;
}
+EXPORT_SYMBOL_GPL(svc_process);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
@@ -1350,6 +1350,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
+ struct rpc_task *task;
+ int proc_error;
+ int error;
+
+ dprintk("svc: %s(%p)\n", __func__, req);
/* Build the svc_rqst used by the common processing routine */
rqstp->rq_xprt = serv->sv_bc_xprt;
@@ -1372,21 +1377,36 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
/*
* Skip the next two words because they've already been
- * processed in the trasport
+ * processed in the transport
*/
svc_getu32(argv); /* XID */
svc_getnl(argv); /* CALLDIR */
- /* Returns 1 for send, 0 for drop */
- if (svc_process_common(rqstp, argv, resv)) {
- memcpy(&req->rq_snd_buf, &rqstp->rq_res,
- sizeof(req->rq_snd_buf));
- return bc_send(req);
- } else {
- /* drop request */
+ /* Parse and execute the bc call */
+ proc_error = svc_process_common(rqstp, argv, resv);
+
+ atomic_inc(&req->rq_xprt->bc_free_slots);
+ if (!proc_error) {
+ /* Processing error: drop the request */
xprt_free_bc_request(req);
return 0;
}
+
+ /* Finally, send the reply synchronously */
+ memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
+ task = rpc_run_bc_task(req);
+ if (IS_ERR(task)) {
+ error = PTR_ERR(task);
+ goto out;
+ }
+
+ WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
+ error = task->tk_status;
+ rpc_put_task(task);
+
+out:
+ dprintk("svc: %s(), error=%d\n", __func__, error);
+ return error;
}
EXPORT_SYMBOL_GPL(bc_svc_process);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 1d4fe24af..ab5dd621a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -68,6 +68,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
static void xprt_connect_status(struct rpc_task *task);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
+static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
static void xprt_destroy(struct rpc_xprt *xprt);
static DEFINE_SPINLOCK(xprt_list_lock);
@@ -250,6 +251,8 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
}
xprt_clear_locked(xprt);
out_sleep:
+ if (req)
+ __xprt_put_cong(xprt, req);
dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
@@ -608,8 +611,8 @@ static void xprt_autoclose(struct work_struct *work)
struct rpc_xprt *xprt =
container_of(work, struct rpc_xprt, task_cleanup);
- xprt->ops->close(xprt);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ xprt->ops->close(xprt);
xprt_release_write(xprt, NULL);
}
@@ -967,6 +970,7 @@ void xprt_transmit(struct rpc_task *task)
task->tk_status = status;
return;
}
+ xprt_inject_disconnect(xprt);
dprintk("RPC: %5u xmit complete\n", task->tk_pid);
task->tk_flags |= RPC_TASK_SENT;
@@ -1285,6 +1289,7 @@ void xprt_release(struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(req->rq_buffer);
+ xprt_inject_disconnect(xprt);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
task->tk_rqstp = NULL;
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 579f72bbc..48913de24 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,9 +1,7 @@
-obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
-xprtrdma-y := transport.o rpc_rdma.o verbs.o \
- fmr_ops.o frwr_ops.o physical_ops.o
-
-obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
-
-svcrdma-y := svc_rdma.o svc_rdma_transport.o \
- svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
+rpcrdma-y := transport.o rpc_rdma.o verbs.o \
+ fmr_ops.o frwr_ops.o physical_ops.o \
+ svc_rdma.o svc_rdma_transport.o \
+ svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
+ module.o
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 302d4ebf6..f1e8dafbd 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -11,6 +11,21 @@
* can take tens of usecs to complete.
*/
+/* Normal operation
+ *
+ * A Memory Region is prepared for RDMA READ or WRITE using the
+ * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
+ * finished, the Memory Region is unmapped using the ib_unmap_fmr
+ * verb (fmr_op_unmap).
+ */
+
+/* Transport recovery
+ *
+ * After a transport reconnect, fmr_op_map re-uses the MR already
+ * allocated for the RPC, but generates a fresh rkey then maps the
+ * MR again. This process is synchronous.
+ */
+
#include "xprt_rdma.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -50,19 +65,28 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_mw *r;
int i, rc;
+ spin_lock_init(&buf->rb_mwlock);
INIT_LIST_HEAD(&buf->rb_mws);
INIT_LIST_HEAD(&buf->rb_all);
- i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
- dprintk("RPC: %s: initializing %d FMRs\n", __func__, i);
+ i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
+ i += 2; /* head + tail */
+ i *= buf->rb_max_requests; /* one set for each RPC slot */
+ dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
+ rc = -ENOMEM;
while (i--) {
r = kzalloc(sizeof(*r), GFP_KERNEL);
if (!r)
- return -ENOMEM;
+ goto out;
- r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
- if (IS_ERR(r->r.fmr))
+ r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
+ sizeof(u64), GFP_KERNEL);
+ if (!r->r.fmr.physaddrs)
+ goto out_free;
+
+ r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
+ if (IS_ERR(r->r.fmr.fmr))
goto out_fmr_err;
list_add(&r->mw_list, &buf->rb_mws);
@@ -71,12 +95,24 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
return 0;
out_fmr_err:
- rc = PTR_ERR(r->r.fmr);
+ rc = PTR_ERR(r->r.fmr.fmr);
dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc);
+ kfree(r->r.fmr.physaddrs);
+out_free:
kfree(r);
+out:
return rc;
}
+static int
+__fmr_unmap(struct rpcrdma_mw *r)
+{
+ LIST_HEAD(l);
+
+ list_add(&r->r.fmr.fmr->list, &l);
+ return ib_unmap_fmr(&l);
+}
+
/* Use the ib_map_phys_fmr() verb to register a memory region
* for remote access via RDMA READ or RDMA WRITE.
*/
@@ -85,12 +121,24 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct ib_device *device = ia->ri_id->device;
+ struct ib_device *device = ia->ri_device;
enum dma_data_direction direction = rpcrdma_data_dir(writing);
struct rpcrdma_mr_seg *seg1 = seg;
- struct rpcrdma_mw *mw = seg1->rl_mw;
- u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
int len, pageoff, i, rc;
+ struct rpcrdma_mw *mw;
+
+ mw = seg1->rl_mw;
+ seg1->rl_mw = NULL;
+ if (!mw) {
+ mw = rpcrdma_get_mw(r_xprt);
+ if (!mw)
+ return -ENOMEM;
+ } else {
+ /* this is a retransmit; generate a fresh rkey */
+ rc = __fmr_unmap(mw);
+ if (rc)
+ return rc;
+ }
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
@@ -100,7 +148,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
nsegs = RPCRDMA_MAX_FMR_SGES;
for (i = 0; i < nsegs;) {
rpcrdma_map_one(device, seg, direction);
- physaddrs[i] = seg->mr_dma;
+ mw->r.fmr.physaddrs[i] = seg->mr_dma;
len += seg->mr_len;
++seg;
++i;
@@ -110,11 +158,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
break;
}
- rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
+ rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs,
+ i, seg1->mr_dma);
if (rc)
goto out_maperr;
- seg1->mr_rkey = mw->r.fmr->rkey;
+ seg1->rl_mw = mw;
+ seg1->mr_rkey = mw->r.fmr.fmr->rkey;
seg1->mr_base = seg1->mr_dma + pageoff;
seg1->mr_nsegs = i;
seg1->mr_len = len;
@@ -137,48 +187,28 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mr_seg *seg1 = seg;
- struct ib_device *device;
+ struct rpcrdma_mw *mw = seg1->rl_mw;
int rc, nsegs = seg->mr_nsegs;
- LIST_HEAD(l);
- list_add(&seg1->rl_mw->r.fmr->list, &l);
- rc = ib_unmap_fmr(&l);
- read_lock(&ia->ri_qplock);
- device = ia->ri_id->device;
+ dprintk("RPC: %s: FMR %p\n", __func__, mw);
+
+ seg1->rl_mw = NULL;
while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(device, seg++);
- read_unlock(&ia->ri_qplock);
+ rpcrdma_unmap_one(ia->ri_device, seg++);
+ rc = __fmr_unmap(mw);
if (rc)
goto out_err;
+ rpcrdma_put_mw(r_xprt, mw);
return nsegs;
out_err:
+ /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy
+ * will attempt to release it when the transport is destroyed.
+ */
dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc);
return nsegs;
}
-/* After a disconnect, unmap all FMRs.
- *
- * This is invoked only in the transport connect worker in order
- * to serialize with rpcrdma_register_fmr_external().
- */
-static void
-fmr_op_reset(struct rpcrdma_xprt *r_xprt)
-{
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- struct rpcrdma_mw *r;
- LIST_HEAD(list);
- int rc;
-
- list_for_each_entry(r, &buf->rb_all, mw_all)
- list_add(&r->r.fmr->list, &list);
-
- rc = ib_unmap_fmr(&list);
- if (rc)
- dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
- __func__, rc);
-}
-
static void
fmr_op_destroy(struct rpcrdma_buffer *buf)
{
@@ -188,10 +218,13 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
while (!list_empty(&buf->rb_all)) {
r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
list_del(&r->mw_all);
- rc = ib_dealloc_fmr(r->r.fmr);
+ kfree(r->r.fmr.physaddrs);
+
+ rc = ib_dealloc_fmr(r->r.fmr.fmr);
if (rc)
dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
__func__, rc);
+
kfree(r);
}
}
@@ -202,7 +235,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages,
.ro_init = fmr_op_init,
- .ro_reset = fmr_op_reset,
.ro_destroy = fmr_op_destroy,
.ro_displayname = "fmr",
};
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index dff0481db..04ea91420 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -11,12 +11,136 @@
* but most complex memory registration mode.
*/
+/* Normal operation
+ *
+ * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
+ * Work Request (frmr_op_map). When the RDMA operation is finished, this
+ * Memory Region is invalidated using a LOCAL_INV Work Request
+ * (frmr_op_unmap).
+ *
+ * Typically these Work Requests are not signaled, and neither are RDMA
+ * SEND Work Requests (with the exception of signaling occasionally to
+ * prevent provider work queue overflows). This greatly reduces HCA
+ * interrupt workload.
+ *
+ * As an optimization, frwr_op_unmap marks MRs INVALID before the
+ * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
+ * rb_mws immediately so that no work (like managing a linked list
+ * under a spinlock) is needed in the completion upcall.
+ *
+ * But this means that frwr_op_map() can occasionally encounter an MR
+ * that is INVALID but the LOCAL_INV WR has not completed. Work Queue
+ * ordering prevents a subsequent FAST_REG WR from executing against
+ * that MR while it is still being invalidated.
+ */
+
+/* Transport recovery
+ *
+ * ->op_map and the transport connect worker cannot run at the same
+ * time, but ->op_unmap can fire while the transport connect worker
+ * is running. Thus MR recovery is handled in ->op_map, to guarantee
+ * that recovered MRs are owned by a sending RPC, and not one where
+ * ->op_unmap could fire at the same time transport reconnect is
+ * being done.
+ *
+ * When the underlying transport disconnects, MRs are left in one of
+ * three states:
+ *
+ * INVALID: The MR was not in use before the QP entered ERROR state.
+ * (Or, the LOCAL_INV WR has not completed or flushed yet).
+ *
+ * STALE: The MR was being registered or unregistered when the QP
+ * entered ERROR state, and the pending WR was flushed.
+ *
+ * VALID: The MR was registered before the QP entered ERROR state.
+ *
+ * When frwr_op_map encounters STALE and VALID MRs, they are recovered
+ * with ib_dereg_mr and then are re-initialized. Beause MR recovery
+ * allocates fresh resources, it is deferred to a workqueue, and the
+ * recovered MRs are placed back on the rb_mws list when recovery is
+ * complete. frwr_op_map allocates another MR for the current RPC while
+ * the broken MR is reset.
+ *
+ * To ensure that frwr_op_map doesn't encounter an MR that is marked
+ * INVALID but that is about to be flushed due to a previous transport
+ * disconnect, the transport connect worker attempts to drain all
+ * pending send queue WRs before the transport is reconnected.
+ */
+
#include "xprt_rdma.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
+static struct workqueue_struct *frwr_recovery_wq;
+
+#define FRWR_RECOVERY_WQ_FLAGS (WQ_UNBOUND | WQ_MEM_RECLAIM)
+
+int
+frwr_alloc_recovery_wq(void)
+{
+ frwr_recovery_wq = alloc_workqueue("frwr_recovery",
+ FRWR_RECOVERY_WQ_FLAGS, 0);
+ return !frwr_recovery_wq ? -ENOMEM : 0;
+}
+
+void
+frwr_destroy_recovery_wq(void)
+{
+ struct workqueue_struct *wq;
+
+ if (!frwr_recovery_wq)
+ return;
+
+ wq = frwr_recovery_wq;
+ frwr_recovery_wq = NULL;
+ destroy_workqueue(wq);
+}
+
+/* Deferred reset of a single FRMR. Generate a fresh rkey by
+ * replacing the MR.
+ *
+ * There's no recovery if this fails. The FRMR is abandoned, but
+ * remains in rb_all. It will be cleaned up when the transport is
+ * destroyed.
+ */
+static void
+__frwr_recovery_worker(struct work_struct *work)
+{
+ struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw,
+ r.frmr.fr_work);
+ struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt;
+ unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
+ struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+
+ if (ib_dereg_mr(r->r.frmr.fr_mr))
+ goto out_fail;
+
+ r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth);
+ if (IS_ERR(r->r.frmr.fr_mr))
+ goto out_fail;
+
+ dprintk("RPC: %s: recovered FRMR %p\n", __func__, r);
+ r->r.frmr.fr_state = FRMR_IS_INVALID;
+ rpcrdma_put_mw(r_xprt, r);
+ return;
+
+out_fail:
+ pr_warn("RPC: %s: FRMR %p unrecovered\n",
+ __func__, r);
+}
+
+/* A broken MR was discovered in a context that can't sleep.
+ * Defer recovery to the recovery worker.
+ */
+static void
+__frwr_queue_recovery(struct rpcrdma_mw *r)
+{
+ INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker);
+ queue_work(frwr_recovery_wq, &r->r.frmr.fr_work);
+}
+
static int
__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
unsigned int depth)
@@ -128,8 +252,8 @@ frwr_sendcompletion(struct ib_wc *wc)
/* WARNING: Only wr_id and status are reliable at this point */
r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
- dprintk("RPC: %s: frmr %p (stale), status %d\n",
- __func__, r, wc->status);
+ pr_warn("RPC: %s: frmr %p flushed, status %s (%d)\n",
+ __func__, r, ib_wc_status_msg(wc->status), wc->status);
r->r.frmr.fr_state = FRMR_IS_STALE;
}
@@ -137,16 +261,19 @@ static int
frwr_op_init(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- struct ib_device *device = r_xprt->rx_ia.ri_id->device;
+ struct ib_device *device = r_xprt->rx_ia.ri_device;
unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
int i;
+ spin_lock_init(&buf->rb_mwlock);
INIT_LIST_HEAD(&buf->rb_mws);
INIT_LIST_HEAD(&buf->rb_all);
- i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
- dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i);
+ i = max_t(int, RPCRDMA_MAX_DATA_SEGS / depth, 1);
+ i += 2; /* head + tail */
+ i *= buf->rb_max_requests; /* one set for each RPC slot */
+ dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
while (i--) {
struct rpcrdma_mw *r;
@@ -165,6 +292,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
list_add(&r->mw_list, &buf->rb_mws);
list_add(&r->mw_all, &buf->rb_all);
r->mw_sendcompletion = frwr_sendcompletion;
+ r->r.frmr.fr_xprt = r_xprt;
}
return 0;
@@ -178,12 +306,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct ib_device *device = ia->ri_id->device;
+ struct ib_device *device = ia->ri_device;
enum dma_data_direction direction = rpcrdma_data_dir(writing);
struct rpcrdma_mr_seg *seg1 = seg;
- struct rpcrdma_mw *mw = seg1->rl_mw;
- struct rpcrdma_frmr *frmr = &mw->r.frmr;
- struct ib_mr *mr = frmr->fr_mr;
+ struct rpcrdma_mw *mw;
+ struct rpcrdma_frmr *frmr;
+ struct ib_mr *mr;
struct ib_send_wr fastreg_wr, *bad_wr;
u8 key;
int len, pageoff;
@@ -192,12 +320,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
u64 pa;
int page_no;
+ mw = seg1->rl_mw;
+ seg1->rl_mw = NULL;
+ do {
+ if (mw)
+ __frwr_queue_recovery(mw);
+ mw = rpcrdma_get_mw(r_xprt);
+ if (!mw)
+ return -ENOMEM;
+ } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
+ frmr = &mw->r.frmr;
+ frmr->fr_state = FRMR_IS_VALID;
+
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
seg1->mr_len += pageoff;
len = -pageoff;
if (nsegs > ia->ri_max_frmr_depth)
nsegs = ia->ri_max_frmr_depth;
+
for (page_no = i = 0; i < nsegs;) {
rpcrdma_map_one(device, seg, direction);
pa = seg->mr_dma;
@@ -216,8 +357,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n",
__func__, mw, i, len);
- frmr->fr_state = FRMR_IS_VALID;
-
memset(&fastreg_wr, 0, sizeof(fastreg_wr));
fastreg_wr.wr_id = (unsigned long)(void *)mw;
fastreg_wr.opcode = IB_WR_FAST_REG_MR;
@@ -229,6 +368,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
fastreg_wr.wr.fast_reg.access_flags = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
+ mr = frmr->fr_mr;
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
fastreg_wr.wr.fast_reg.rkey = mr->rkey;
@@ -238,6 +378,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
if (rc)
goto out_senderr;
+ seg1->rl_mw = mw;
seg1->mr_rkey = mr->rkey;
seg1->mr_base = seg1->mr_dma + pageoff;
seg1->mr_nsegs = i;
@@ -246,10 +387,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
out_senderr:
dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
- ib_update_fast_reg_key(mr, --key);
- frmr->fr_state = FRMR_IS_INVALID;
while (i--)
rpcrdma_unmap_one(device, --seg);
+ __frwr_queue_recovery(mw);
return rc;
}
@@ -261,78 +401,46 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
struct rpcrdma_mr_seg *seg1 = seg;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct rpcrdma_mw *mw = seg1->rl_mw;
struct ib_send_wr invalidate_wr, *bad_wr;
int rc, nsegs = seg->mr_nsegs;
- struct ib_device *device;
- seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
+ dprintk("RPC: %s: FRMR %p\n", __func__, mw);
+
+ seg1->rl_mw = NULL;
+ mw->r.frmr.fr_state = FRMR_IS_INVALID;
memset(&invalidate_wr, 0, sizeof(invalidate_wr));
- invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
+ invalidate_wr.wr_id = (unsigned long)(void *)mw;
invalidate_wr.opcode = IB_WR_LOCAL_INV;
- invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
+ invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
- read_lock(&ia->ri_qplock);
- device = ia->ri_id->device;
while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(device, seg++);
+ rpcrdma_unmap_one(ia->ri_device, seg++);
+ read_lock(&ia->ri_qplock);
rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
read_unlock(&ia->ri_qplock);
if (rc)
goto out_err;
+
+ rpcrdma_put_mw(r_xprt, mw);
return nsegs;
out_err:
- /* Force rpcrdma_buffer_get() to retry */
- seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
+ __frwr_queue_recovery(mw);
return nsegs;
}
-/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
- * an unusable state. Find FRMRs in this state and dereg / reg
- * each. FRMRs that are VALID and attached to an rpcrdma_req are
- * also torn down.
- *
- * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
- *
- * This is invoked only in the transport connect worker in order
- * to serialize with rpcrdma_register_frmr_external().
- */
-static void
-frwr_op_reset(struct rpcrdma_xprt *r_xprt)
-{
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- struct ib_device *device = r_xprt->rx_ia.ri_id->device;
- unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
- struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
- struct rpcrdma_mw *r;
- int rc;
-
- list_for_each_entry(r, &buf->rb_all, mw_all) {
- if (r->r.frmr.fr_state == FRMR_IS_INVALID)
- continue;
-
- __frwr_release(r);
- rc = __frwr_init(r, pd, device, depth);
- if (rc) {
- dprintk("RPC: %s: mw %p left %s\n",
- __func__, r,
- (r->r.frmr.fr_state == FRMR_IS_STALE ?
- "stale" : "valid"));
- continue;
- }
-
- r->r.frmr.fr_state = FRMR_IS_INVALID;
- }
-}
-
static void
frwr_op_destroy(struct rpcrdma_buffer *buf)
{
struct rpcrdma_mw *r;
+ /* Ensure stale MWs for "buf" are no longer in flight */
+ flush_workqueue(frwr_recovery_wq);
+
while (!list_empty(&buf->rb_all)) {
r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
list_del(&r->mw_all);
@@ -347,7 +455,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages,
.ro_init = frwr_op_init,
- .ro_reset = frwr_op_reset,
.ro_destroy = frwr_op_destroy,
.ro_displayname = "frwr",
};
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
new file mode 100644
index 000000000..560712bd9
--- /dev/null
+++ b/net/sunrpc/xprtrdma/module.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2015 Oracle. All rights reserved.
+ */
+
+/* rpcrdma.ko module initialization
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY RPCDBG_TRANS
+#endif
+
+MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
+MODULE_DESCRIPTION("RPC/RDMA Transport");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("svcrdma");
+MODULE_ALIAS("xprtrdma");
+
+static void __exit rpc_rdma_cleanup(void)
+{
+ xprt_rdma_cleanup();
+ svc_rdma_cleanup();
+}
+
+static int __init rpc_rdma_init(void)
+{
+ int rc;
+
+ rc = svc_rdma_init();
+ if (rc)
+ goto out;
+
+ rc = xprt_rdma_init();
+ if (rc)
+ svc_rdma_cleanup();
+
+out:
+ return rc;
+}
+
+module_init(rpc_rdma_init);
+module_exit(rpc_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index ba518af16..41985d07f 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -50,8 +50,7 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- rpcrdma_map_one(ia->ri_id->device, seg,
- rpcrdma_data_dir(writing));
+ rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing));
seg->mr_rkey = ia->ri_bind_mem->rkey;
seg->mr_base = seg->mr_dma;
seg->mr_nsegs = 1;
@@ -65,19 +64,11 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- read_lock(&ia->ri_qplock);
- rpcrdma_unmap_one(ia->ri_id->device, seg);
- read_unlock(&ia->ri_qplock);
-
+ rpcrdma_unmap_one(ia->ri_device, seg);
return 1;
}
static void
-physical_op_reset(struct rpcrdma_xprt *r_xprt)
-{
-}
-
-static void
physical_op_destroy(struct rpcrdma_buffer *buf)
{
}
@@ -88,7 +79,6 @@ const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
.ro_open = physical_op_open,
.ro_maxpages = physical_op_maxpages,
.ro_init = physical_op_init,
- .ro_reset = physical_op_reset,
.ro_destroy = physical_op_destroy,
.ro_displayname = "physical",
};
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 2c53ea9e1..84ea37dae 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -284,9 +284,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
return (unsigned char *)iptr - (unsigned char *)headerp;
out:
- if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
- return n;
-
for (pos = 0; nchunks--;)
pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
&req->rl_segments[pos]);
@@ -732,8 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
struct rpcrdma_msg *headerp;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
- struct rpc_xprt *xprt = rep->rr_xprt;
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
__be32 *iptr;
int rdmalen, status;
unsigned long cwnd;
@@ -770,7 +767,6 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rep->rr_len);
repost:
r_xprt->rx_stats.bad_reply_count++;
- rep->rr_func = rpcrdma_reply_handler;
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
rpcrdma_recv_buffer_put(rep);
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index c1b627026..2cd252f02 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -38,8 +38,7 @@
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
-#include <linux/module.h>
-#include <linux/init.h>
+
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
@@ -295,8 +294,3 @@ int svc_rdma_init(void)
destroy_workqueue(svc_rdma_wq);
return -ENOMEM;
}
-MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
-MODULE_DESCRIPTION("SVC RDMA Transport");
-MODULE_LICENSE("Dual BSD/GPL");
-module_init(svc_rdma_init);
-module_exit(svc_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index b681855cf..e2fca7617 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -50,12 +50,12 @@
/*
* Decodes a read chunk list. The expected format is as follows:
* descrim : xdr_one
- * position : u32 offset into XDR stream
- * handle : u32 RKEY
+ * position : __be32 offset into XDR stream
+ * handle : __be32 RKEY
* . . .
* end-of-list: xdr_zero
*/
-static u32 *decode_read_list(u32 *va, u32 *vaend)
+static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
{
struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
@@ -67,20 +67,20 @@ static u32 *decode_read_list(u32 *va, u32 *vaend)
}
ch++;
}
- return (u32 *)&ch->rc_position;
+ return &ch->rc_position;
}
/*
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
- * handle : u32 RKEY ---+
- * length : u32 <len of segment> |
+ * handle : __be32 RKEY ---+
+ * length : __be32 <len of segment> |
* offset : remove va + <count>
* . . . |
* ---+
*/
-static u32 *decode_write_list(u32 *va, u32 *vaend)
+static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
{
unsigned long start, end;
int nchunks;
@@ -90,14 +90,14 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
/* Check for not write-array */
if (ary->wc_discrim == xdr_zero)
- return (u32 *)&ary->wc_nchunks;
+ return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
- nchunks = ntohl(ary->wc_nchunks);
+ nchunks = be32_to_cpu(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
@@ -112,10 +112,10 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
- return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
+ return &ary->wc_array[nchunks].wc_target.rs_length;
}
-static u32 *decode_reply_array(u32 *va, u32 *vaend)
+static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
{
unsigned long start, end;
int nchunks;
@@ -124,14 +124,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
/* Check for no reply-array */
if (ary->wc_discrim == xdr_zero)
- return (u32 *)&ary->wc_nchunks;
+ return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
- nchunks = ntohl(ary->wc_nchunks);
+ nchunks = be32_to_cpu(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
@@ -142,15 +142,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
ary, nchunks, vaend);
return NULL;
}
- return (u32 *)&ary->wc_array[nchunks];
+ return (__be32 *)&ary->wc_array[nchunks];
}
int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
- u32 *va;
- u32 *vaend;
+ __be32 *va, *vaend;
u32 hdr_len;
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
@@ -162,22 +161,17 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
return -EINVAL;
}
- /* Decode the header */
- rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
- rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
- rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
- rmsgp->rm_type = ntohl(rmsgp->rm_type);
-
- if (rmsgp->rm_vers != RPCRDMA_VERSION)
+ if (rmsgp->rm_vers != rpcrdma_version)
return -ENOSYS;
/* Pull in the extra for the padded case and bump our pointer */
- if (rmsgp->rm_type == RDMA_MSGP) {
+ if (rmsgp->rm_type == rdma_msgp) {
int hdrlen;
+
rmsgp->rm_body.rm_padded.rm_align =
- ntohl(rmsgp->rm_body.rm_padded.rm_align);
+ be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
rmsgp->rm_body.rm_padded.rm_thresh =
- ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
+ be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
@@ -192,7 +186,7 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
* chunk list and a reply chunk list.
*/
va = &rmsgp->rm_body.rm_chunks[0];
- vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
+ vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
va = decode_read_list(va, vaend);
if (!va)
return -EINVAL;
@@ -211,76 +205,20 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
return hdr_len;
}
-int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
-{
- struct rpcrdma_msg *rmsgp = NULL;
- struct rpcrdma_read_chunk *ch;
- struct rpcrdma_write_array *ary;
- u32 *va;
- u32 hdrlen;
-
- dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
- rqstp);
- rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
-
- /* Pull in the extra for the padded case and bump our pointer */
- if (rmsgp->rm_type == RDMA_MSGP) {
- va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
- rqstp->rq_arg.head[0].iov_base = va;
- hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
- rqstp->rq_arg.head[0].iov_len -= hdrlen;
- return hdrlen;
- }
-
- /*
- * Skip all chunks to find RPC msg. These were previously processed
- */
- va = &rmsgp->rm_body.rm_chunks[0];
-
- /* Skip read-list */
- for (ch = (struct rpcrdma_read_chunk *)va;
- ch->rc_discrim != xdr_zero; ch++);
- va = (u32 *)&ch->rc_position;
-
- /* Skip write-list */
- ary = (struct rpcrdma_write_array *)va;
- if (ary->wc_discrim == xdr_zero)
- va = (u32 *)&ary->wc_nchunks;
- else
- /*
- * rs_length is the 2nd 4B field in wc_target and taking its
- * address skips the list terminator
- */
- va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
-
- /* Skip reply-array */
- ary = (struct rpcrdma_write_array *)va;
- if (ary->wc_discrim == xdr_zero)
- va = (u32 *)&ary->wc_nchunks;
- else
- va = (u32 *)&ary->wc_array[ary->wc_nchunks];
-
- rqstp->rq_arg.head[0].iov_base = va;
- hdrlen = (unsigned long)va - (unsigned long)rmsgp;
- rqstp->rq_arg.head[0].iov_len -= hdrlen;
-
- return hdrlen;
-}
-
int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
- enum rpcrdma_errcode err, u32 *va)
+ enum rpcrdma_errcode err, __be32 *va)
{
- u32 *startp = va;
+ __be32 *startp = va;
- *va++ = htonl(rmsgp->rm_xid);
- *va++ = htonl(rmsgp->rm_vers);
- *va++ = htonl(xprt->sc_max_requests);
- *va++ = htonl(RDMA_ERROR);
- *va++ = htonl(err);
+ *va++ = rmsgp->rm_xid;
+ *va++ = rmsgp->rm_vers;
+ *va++ = cpu_to_be32(xprt->sc_max_requests);
+ *va++ = rdma_error;
+ *va++ = cpu_to_be32(err);
if (err == ERR_VERS) {
- *va++ = htonl(RPCRDMA_VERSION);
- *va++ = htonl(RPCRDMA_VERSION);
+ *va++ = rpcrdma_version;
+ *va++ = rpcrdma_version;
}
return (int)((unsigned long)va - (unsigned long)startp);
@@ -297,7 +235,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
&rmsgp->rm_body.rm_chunks[1];
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
- &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
+ &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
wc_target.rs_length;
else
wr_ary = (struct rpcrdma_write_array *)
@@ -306,7 +244,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
/* skip reply array */
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
- &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
+ &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
@@ -325,7 +263,7 @@ void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
ary->wc_discrim = xdr_one;
- ary->wc_nchunks = htonl(chunks);
+ ary->wc_nchunks = cpu_to_be32(chunks);
/* write-list terminator */
ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
@@ -338,7 +276,7 @@ void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
int chunks)
{
ary->wc_discrim = xdr_one;
- ary->wc_nchunks = htonl(chunks);
+ ary->wc_nchunks = cpu_to_be32(chunks);
}
void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
@@ -350,7 +288,7 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
seg->rs_handle = rs_handle;
seg->rs_offset = rs_offset;
- seg->rs_length = htonl(write_len);
+ seg->rs_length = cpu_to_be32(write_len);
}
void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
@@ -358,10 +296,10 @@ void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_resp,
enum rpcrdma_proc rdma_type)
{
- rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
- rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
- rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
- rdma_resp->rm_type = htonl(rdma_type);
+ rdma_resp->rm_xid = rdma_argp->rm_xid;
+ rdma_resp->rm_vers = rdma_argp->rm_vers;
+ rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
+ rdma_resp->rm_type = cpu_to_be32(rdma_type);
/* Encode <nul> chunks lists */
rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f9f13a32d..2e1348bde 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -85,7 +85,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
- if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG)
+ if (rmsgp->rm_type == rdma_nomsg)
rqstp->rq_arg.pages = &rqstp->rq_pages[0];
else
rqstp->rq_arg.pages = &rqstp->rq_pages[1];
@@ -117,8 +117,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
{
- if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
- RDMA_TRANSPORT_IWARP)
+ if (!rdma_cap_read_multi_sge(xprt->sc_cm_id->device,
+ xprt->sc_cm_id->port_num))
return 1;
else
return min_t(int, sge_count, xprt->sc_max_sge);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7de33d1af..d25cd430f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -240,6 +240,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
u32 xdr_off;
int chunk_off;
int chunk_no;
+ int nchunks;
struct rpcrdma_write_array *arg_ary;
struct rpcrdma_write_array *res_ary;
int ret;
@@ -251,14 +252,15 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
&rdma_resp->rm_body.rm_chunks[1];
/* Write chunks start at the pagelist */
+ nchunks = be32_to_cpu(arg_ary->wc_nchunks);
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
- xfer_len && chunk_no < arg_ary->wc_nchunks;
+ xfer_len && chunk_no < nchunks;
chunk_no++) {
struct rpcrdma_segment *arg_ch;
u64 rs_offset;
arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
- write_len = min(xfer_len, ntohl(arg_ch->rs_length));
+ write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
/* Prepare the response chunk given the length actually
* written */
@@ -270,7 +272,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
chunk_off = 0;
while (write_len) {
ret = send_write(xprt, rqstp,
- ntohl(arg_ch->rs_handle),
+ be32_to_cpu(arg_ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
write_len,
@@ -318,13 +320,13 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
&rdma_resp->rm_body.rm_chunks[2];
/* xdr offset starts at RPC message */
- nchunks = ntohl(arg_ary->wc_nchunks);
+ nchunks = be32_to_cpu(arg_ary->wc_nchunks);
for (xdr_off = 0, chunk_no = 0;
xfer_len && chunk_no < nchunks;
chunk_no++) {
u64 rs_offset;
ch = &arg_ary->wc_array[chunk_no].wc_target;
- write_len = min(xfer_len, htonl(ch->rs_length));
+ write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
/* Prepare the reply chunk given the length actually
* written */
@@ -335,7 +337,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
chunk_off = 0;
while (write_len) {
ret = send_write(xprt, rqstp,
- ntohl(ch->rs_handle),
+ be32_to_cpu(ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
write_len,
@@ -515,7 +517,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
- res_page = svc_rdma_get_page();
+ res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
rdma_resp = page_address(res_page);
reply_ary = svc_rdma_get_reply_array(rdma_argp);
if (reply_ary)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f609c1c2d..6b36279e4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_name = "rdma",
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_ops,
- .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
+ .xcl_max_payload = RPCRDMA_MAXPAYLOAD,
.xcl_ident = XPRT_TRANSPORT_RDMA,
};
@@ -99,12 +99,8 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt;
- while (1) {
- ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
- if (ctxt)
- break;
- schedule_timeout_uninterruptible(msecs_to_jiffies(500));
- }
+ ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
+ GFP_KERNEL | __GFP_NOFAIL);
ctxt->xprt = xprt;
INIT_LIST_HEAD(&ctxt->dto_q);
ctxt->count = 0;
@@ -156,12 +152,8 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
struct svc_rdma_req_map *svc_rdma_get_req_map(void)
{
struct svc_rdma_req_map *map;
- while (1) {
- map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
- if (map)
- break;
- schedule_timeout_uninterruptible(msecs_to_jiffies(500));
- }
+ map = kmem_cache_alloc(svc_rdma_map_cachep,
+ GFP_KERNEL | __GFP_NOFAIL);
map->count = 0;
return map;
}
@@ -175,8 +167,8 @@ void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
static void cq_event_handler(struct ib_event *event, void *context)
{
struct svc_xprt *xprt = context;
- dprintk("svcrdma: received CQ event id=%d, context=%p\n",
- event->event, context);
+ dprintk("svcrdma: received CQ event %s (%d), context=%p\n",
+ ib_event_msg(event->event), event->event, context);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
}
@@ -191,8 +183,9 @@ static void qp_event_handler(struct ib_event *event, void *context)
case IB_EVENT_COMM_EST:
case IB_EVENT_SQ_DRAINED:
case IB_EVENT_QP_LAST_WQE_REACHED:
- dprintk("svcrdma: QP event %d received for QP=%p\n",
- event->event, event->element.qp);
+ dprintk("svcrdma: QP event %s (%d) received for QP=%p\n",
+ ib_event_msg(event->event), event->event,
+ event->element.qp);
break;
/* These are considered fatal events */
case IB_EVENT_PATH_MIG_ERR:
@@ -201,9 +194,10 @@ static void qp_event_handler(struct ib_event *event, void *context)
case IB_EVENT_QP_ACCESS_ERR:
case IB_EVENT_DEVICE_FATAL:
default:
- dprintk("svcrdma: QP ERROR event %d received for QP=%p, "
+ dprintk("svcrdma: QP ERROR event %s (%d) received for QP=%p, "
"closing transport\n",
- event->event, event->element.qp);
+ ib_event_msg(event->event), event->event,
+ event->element.qp);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
break;
}
@@ -402,7 +396,8 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
for (i = 0; i < ret; i++) {
wc = &wc_a[i];
if (wc->status != IB_WC_SUCCESS) {
- dprintk("svcrdma: sq wc err status %d\n",
+ dprintk("svcrdma: sq wc err status %s (%d)\n",
+ ib_wc_status_msg(wc->status),
wc->status);
/* Close the transport */
@@ -490,18 +485,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
return cma_xprt;
}
-struct page *svc_rdma_get_page(void)
-{
- struct page *page;
-
- while ((page = alloc_page(GFP_KERNEL)) == NULL) {
- /* If we can't get memory, wait a bit and try again */
- printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n");
- schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
- }
- return page;
-}
-
int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
{
struct ib_recv_wr recv_wr, *bad_recv_wr;
@@ -520,7 +503,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
goto err_put_ctxt;
}
- page = svc_rdma_get_page();
+ page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
ctxt->pages[sge_no] = page;
pa = ib_dma_map_page(xprt->sc_cm_id->device,
page, 0, PAGE_SIZE,
@@ -616,7 +599,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
- "event=%d\n", cma_id, cma_id->context, event->event);
+ "event = %s (%d)\n", cma_id, cma_id->context,
+ rdma_event_msg(event->event), event->event);
handle_connect_req(cma_id,
event->param.conn.initiator_depth);
break;
@@ -636,7 +620,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
default:
dprintk("svcrdma: Unexpected event on listening endpoint %p, "
- "event=%d\n", cma_id, event->event);
+ "event = %s (%d)\n", cma_id,
+ rdma_event_msg(event->event), event->event);
break;
}
@@ -669,7 +654,8 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, "
- "event=%d\n", cma_id, xprt, event->event);
+ "event = %s (%d)\n", cma_id, xprt,
+ rdma_event_msg(event->event), event->event);
if (xprt) {
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
@@ -677,7 +663,8 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
break;
default:
dprintk("svcrdma: Unexpected event on DTO endpoint %p, "
- "event=%d\n", cma_id, event->event);
+ "event = %s (%d)\n", cma_id,
+ rdma_event_msg(event->event), event->event);
break;
}
return 0;
@@ -848,10 +835,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct svcxprt_rdma *listen_rdma;
struct svcxprt_rdma *newxprt = NULL;
struct rdma_conn_param conn_param;
+ struct ib_cq_init_attr cq_attr = {};
struct ib_qp_init_attr qp_attr;
struct ib_device_attr devattr;
int uninitialized_var(dma_mr_acc);
- int need_dma_mr;
+ int need_dma_mr = 0;
int ret;
int i;
@@ -900,22 +888,22 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk("svcrdma: error creating PD for connect request\n");
goto errout;
}
+ cq_attr.cqe = newxprt->sc_sq_depth;
newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device,
sq_comp_handler,
cq_event_handler,
newxprt,
- newxprt->sc_sq_depth,
- 0);
+ &cq_attr);
if (IS_ERR(newxprt->sc_sq_cq)) {
dprintk("svcrdma: error creating SQ CQ for connect request\n");
goto errout;
}
+ cq_attr.cqe = newxprt->sc_max_requests;
newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device,
rq_comp_handler,
cq_event_handler,
newxprt,
- newxprt->sc_max_requests,
- 0);
+ &cq_attr);
if (IS_ERR(newxprt->sc_rq_cq)) {
dprintk("svcrdma: error creating RQ CQ for connect request\n");
goto errout;
@@ -985,35 +973,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/*
* Determine if a DMA MR is required and if so, what privs are required
*/
- switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) {
- case RDMA_TRANSPORT_IWARP:
- newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
- if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
- need_dma_mr = 1;
- dma_mr_acc =
- (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE);
- } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
- need_dma_mr = 1;
- dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
- } else
- need_dma_mr = 0;
- break;
- case RDMA_TRANSPORT_IB:
- if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
- need_dma_mr = 1;
- dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
- } else if (!(devattr.device_cap_flags &
- IB_DEVICE_LOCAL_DMA_LKEY)) {
- need_dma_mr = 1;
- dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
- } else
- need_dma_mr = 0;
- break;
- default:
+ if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device,
+ newxprt->sc_cm_id->port_num) &&
+ !rdma_ib_or_roce(newxprt->sc_cm_id->device,
+ newxprt->sc_cm_id->port_num))
goto errout;
+
+ if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) ||
+ !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+ need_dma_mr = 1;
+ dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+ if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
+ newxprt->sc_cm_id->port_num) &&
+ !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
+ dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
}
+ if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
+ newxprt->sc_cm_id->port_num))
+ newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
+
/* Create the DMA MR if needed, otherwise, use the DMA LKEY */
if (need_dma_mr) {
/* Register all of physical memory */
@@ -1319,11 +1298,11 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
struct ib_send_wr err_wr;
struct page *p;
struct svc_rdma_op_ctxt *ctxt;
- u32 *va;
+ __be32 *va;
int length;
int ret;
- p = svc_rdma_get_page();
+ p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
va = page_address(p);
/* XDR encode error */
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 54f23b1be..680f888a9 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -48,7 +48,6 @@
*/
#include <linux/module.h>
-#include <linux/init.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/addr.h>
@@ -59,11 +58,6 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-MODULE_LICENSE("Dual BSD/GPL");
-
-MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
-MODULE_AUTHOR("Network Appliance, Inc.");
-
/*
* tunables
*/
@@ -246,6 +240,16 @@ xprt_rdma_connect_worker(struct work_struct *work)
xprt_clear_connecting(xprt);
}
+static void
+xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
+{
+ struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
+ rx_xprt);
+
+ pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt);
+ rdma_disconnect(r_xprt->rx_ia.ri_id);
+}
+
/*
* xprt_rdma_destroy
*
@@ -618,12 +622,6 @@ xprt_rdma_send_request(struct rpc_task *task)
if (req->rl_reply == NULL) /* e.g. reconnection */
rpcrdma_recv_buffer_get(req);
- if (req->rl_reply) {
- req->rl_reply->rr_func = rpcrdma_reply_handler;
- /* this need only be done once, but... */
- req->rl_reply->rr_xprt = xprt;
- }
-
/* Must suppress retransmit to maintain credits */
if (req->rl_connect_cookie == xprt->connect_cookie)
goto drop_connection;
@@ -682,6 +680,17 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
r_xprt->rx_stats.bad_reply_count);
}
+static int
+xprt_rdma_enable_swap(struct rpc_xprt *xprt)
+{
+ return -EINVAL;
+}
+
+static void
+xprt_rdma_disable_swap(struct rpc_xprt *xprt)
+{
+}
+
/*
* Plumbing for rpc transport switch and kernel module
*/
@@ -700,7 +709,10 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
.send_request = xprt_rdma_send_request,
.close = xprt_rdma_close,
.destroy = xprt_rdma_destroy,
- .print_stats = xprt_rdma_print_stats
+ .print_stats = xprt_rdma_print_stats,
+ .enable_swap = xprt_rdma_enable_swap,
+ .disable_swap = xprt_rdma_disable_swap,
+ .inject_disconnect = xprt_rdma_inject_disconnect
};
static struct xprt_class xprt_rdma = {
@@ -711,7 +723,7 @@ static struct xprt_class xprt_rdma = {
.setup = xprt_setup_rdma,
};
-static void __exit xprt_rdma_cleanup(void)
+void xprt_rdma_cleanup(void)
{
int rc;
@@ -726,17 +738,24 @@ static void __exit xprt_rdma_cleanup(void)
if (rc)
dprintk("RPC: %s: xprt_unregister returned %i\n",
__func__, rc);
+
+ frwr_destroy_recovery_wq();
}
-static int __init xprt_rdma_init(void)
+int xprt_rdma_init(void)
{
int rc;
- rc = xprt_register_transport(&xprt_rdma);
-
+ rc = frwr_alloc_recovery_wq();
if (rc)
return rc;
+ rc = xprt_register_transport(&xprt_rdma);
+ if (rc) {
+ frwr_destroy_recovery_wq();
+ return rc;
+ }
+
dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
dprintk("Defaults:\n");
@@ -753,6 +772,3 @@ static int __init xprt_rdma_init(void)
#endif
return 0;
}
-
-module_init(xprt_rdma_init);
-module_exit(xprt_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 4870d272e..891c4ede2 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -80,7 +80,6 @@ static void
rpcrdma_run_tasklet(unsigned long data)
{
struct rpcrdma_rep *rep;
- void (*func)(struct rpcrdma_rep *);
unsigned long flags;
data = data;
@@ -89,14 +88,9 @@ rpcrdma_run_tasklet(unsigned long data)
rep = list_entry(rpcrdma_tasklets_g.next,
struct rpcrdma_rep, rr_list);
list_del(&rep->rr_list);
- func = rep->rr_func;
- rep->rr_func = NULL;
spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
- if (func)
- func(rep);
- else
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_reply_handler(rep);
spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
}
@@ -105,32 +99,6 @@ rpcrdma_run_tasklet(unsigned long data)
static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
-static const char * const async_event[] = {
- "CQ error",
- "QP fatal error",
- "QP request error",
- "QP access error",
- "communication established",
- "send queue drained",
- "path migration successful",
- "path mig error",
- "device fatal error",
- "port active",
- "port error",
- "LID change",
- "P_key change",
- "SM change",
- "SRQ error",
- "SRQ limit reached",
- "last WQE reached",
- "client reregister",
- "GID change",
-};
-
-#define ASYNC_MSG(status) \
- ((status) < ARRAY_SIZE(async_event) ? \
- async_event[(status)] : "unknown async error")
-
static void
rpcrdma_schedule_tasklet(struct list_head *sched_list)
{
@@ -148,7 +116,7 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
struct rpcrdma_ep *ep = context;
pr_err("RPC: %s: %s on device %s ep %p\n",
- __func__, ASYNC_MSG(event->event),
+ __func__, ib_event_msg(event->event),
event->device->name, context);
if (ep->rep_connected == 1) {
ep->rep_connected = -EIO;
@@ -163,7 +131,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
struct rpcrdma_ep *ep = context;
pr_err("RPC: %s: %s on device %s ep %p\n",
- __func__, ASYNC_MSG(event->event),
+ __func__, ib_event_msg(event->event),
event->device->name, context);
if (ep->rep_connected == 1) {
ep->rep_connected = -EIO;
@@ -172,35 +140,6 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
}
}
-static const char * const wc_status[] = {
- "success",
- "local length error",
- "local QP operation error",
- "local EE context operation error",
- "local protection error",
- "WR flushed",
- "memory management operation error",
- "bad response error",
- "local access error",
- "remote invalid request error",
- "remote access error",
- "remote operation error",
- "transport retry counter exceeded",
- "RNR retry counter exceeded",
- "local RDD violation error",
- "remove invalid RD request",
- "operation aborted",
- "invalid EE context number",
- "invalid EE context state",
- "fatal error",
- "response timeout error",
- "general error",
-};
-
-#define COMPLETION_MSG(status) \
- ((status) < ARRAY_SIZE(wc_status) ? \
- wc_status[(status)] : "unexpected completion error")
-
static void
rpcrdma_sendcq_process_wc(struct ib_wc *wc)
{
@@ -209,7 +148,7 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("RPC: %s: SEND: %s\n",
- __func__, COMPLETION_MSG(wc->status));
+ __func__, ib_wc_status_msg(wc->status));
} else {
struct rpcrdma_mw *r;
@@ -291,7 +230,7 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
__func__, rep, wc->byte_len);
rep->rr_len = wc->byte_len;
- ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+ ib_dma_sync_single_for_cpu(rep->rr_device,
rdmab_addr(rep->rr_rdmabuf),
rep->rr_len, DMA_FROM_DEVICE);
prefetch(rdmab_to_msg(rep->rr_rdmabuf));
@@ -302,7 +241,7 @@ out_schedule:
out_fail:
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("RPC: %s: rep %p: %s\n",
- __func__, rep, COMPLETION_MSG(wc->status));
+ __func__, rep, ib_wc_status_msg(wc->status));
rep->rr_len = ~0U;
goto out_schedule;
}
@@ -386,31 +325,6 @@ rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
rpcrdma_sendcq_process_wc(&wc);
}
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-static const char * const conn[] = {
- "address resolved",
- "address error",
- "route resolved",
- "route error",
- "connect request",
- "connect response",
- "connect error",
- "unreachable",
- "rejected",
- "established",
- "disconnected",
- "device removal",
- "multicast join",
- "multicast error",
- "address change",
- "timewait exit",
-};
-
-#define CONNECTION_MSG(status) \
- ((status) < ARRAY_SIZE(conn) ? \
- conn[(status)] : "unrecognized connection error")
-#endif
-
static int
rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
@@ -476,7 +390,7 @@ connected:
default:
dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
__func__, sap, rpc_get_port(sap), ep,
- CONNECTION_MSG(event->event));
+ rdma_event_msg(event->event));
break;
}
@@ -487,7 +401,7 @@ connected:
pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
sap, rpc_get_port(sap),
- ia->ri_id->device->name,
+ ia->ri_device->name,
ia->ri_ops->ro_displayname,
xprt->rx_buf.rb_max_requests,
ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
@@ -588,8 +502,9 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
rc = PTR_ERR(ia->ri_id);
goto out1;
}
+ ia->ri_device = ia->ri_id->device;
- ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
+ ia->ri_pd = ib_alloc_pd(ia->ri_device);
if (IS_ERR(ia->ri_pd)) {
rc = PTR_ERR(ia->ri_pd);
dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
@@ -597,7 +512,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
goto out2;
}
- rc = ib_query_device(ia->ri_id->device, devattr);
+ rc = ib_query_device(ia->ri_device, devattr);
if (rc) {
dprintk("RPC: %s: ib_query_device failed %d\n",
__func__, rc);
@@ -606,7 +521,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
ia->ri_have_dma_lkey = 1;
- ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
+ ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
}
if (memreg == RPCRDMA_FRMR) {
@@ -621,7 +536,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
}
}
if (memreg == RPCRDMA_MTHCAFMR) {
- if (!ia->ri_id->device->alloc_fmr) {
+ if (!ia->ri_device->alloc_fmr) {
dprintk("RPC: %s: MTHCAFMR registration "
"not supported by HCA\n", __func__);
memreg = RPCRDMA_ALLPHYSICAL;
@@ -670,9 +585,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
dprintk("RPC: %s: memory registration strategy is '%s'\n",
__func__, ia->ri_ops->ro_displayname);
- /* Else will do memory reg/dereg for each chunk */
- ia->ri_memreg_strategy = memreg;
-
rwlock_init(&ia->ri_qplock);
return 0;
@@ -702,17 +614,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
dprintk("RPC: %s: ib_dereg_mr returned %i\n",
__func__, rc);
}
+
if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
if (ia->ri_id->qp)
rdma_destroy_qp(ia->ri_id);
rdma_destroy_id(ia->ri_id);
ia->ri_id = NULL;
}
- if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
- rc = ib_dealloc_pd(ia->ri_pd);
- dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
- __func__, rc);
- }
+
+ /* If the pd is still busy, xprtrdma missed freeing a resource */
+ if (ia->ri_pd && !IS_ERR(ia->ri_pd))
+ WARN_ON(ib_dealloc_pd(ia->ri_pd));
}
/*
@@ -724,6 +636,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
{
struct ib_device_attr *devattr = &ia->ri_devattr;
struct ib_cq *sendcq, *recvcq;
+ struct ib_cq_init_attr cq_attr = {};
int rc, err;
/* check provider's send/recv wr limits */
@@ -771,9 +684,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
init_waitqueue_head(&ep->rep_connect_wait);
INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
- sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
- rpcrdma_cq_async_error_upcall, ep,
- ep->rep_attr.cap.max_send_wr + 1, 0);
+ cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1;
+ sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall,
+ rpcrdma_cq_async_error_upcall, ep, &cq_attr);
if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq);
dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -788,9 +701,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
goto out2;
}
- recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
- rpcrdma_cq_async_error_upcall, ep,
- ep->rep_attr.cap.max_recv_wr + 1, 0);
+ cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
+ recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
+ rpcrdma_cq_async_error_upcall, ep, &cq_attr);
if (IS_ERR(recvcq)) {
rc = PTR_ERR(recvcq);
dprintk("RPC: %s: failed to create recv CQ: %i\n",
@@ -896,8 +809,6 @@ retry:
rpcrdma_flush_cqs(ep);
xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
- ia->ri_ops->ro_reset(xprt);
-
id = rpcrdma_create_id(xprt, ia,
(struct sockaddr *)&xprt->rx_data.addr);
if (IS_ERR(id)) {
@@ -911,7 +822,7 @@ retry:
* More stuff I haven't thought of!
* Rrrgh!
*/
- if (ia->ri_id->device != id->device) {
+ if (ia->ri_device != id->device) {
printk("RPC: %s: can't reconnect on "
"different device!\n", __func__);
rdma_destroy_id(id);
@@ -1053,7 +964,8 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
goto out_free;
}
- rep->rr_buffer = &r_xprt->rx_buf;
+ rep->rr_device = ia->ri_device;
+ rep->rr_rxprt = r_xprt;
return rep;
out_free:
@@ -1177,31 +1089,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
kfree(buf->rb_pool);
}
-/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
- * some req segments uninitialized.
- */
-static void
-rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
+struct rpcrdma_mw *
+rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
{
- if (*mw) {
- list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
- *mw = NULL;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_mw *mw = NULL;
+
+ spin_lock(&buf->rb_mwlock);
+ if (!list_empty(&buf->rb_mws)) {
+ mw = list_first_entry(&buf->rb_mws,
+ struct rpcrdma_mw, mw_list);
+ list_del_init(&mw->mw_list);
}
+ spin_unlock(&buf->rb_mwlock);
+
+ if (!mw)
+ pr_err("RPC: %s: no MWs available\n", __func__);
+ return mw;
}
-/* Cycle mw's back in reverse order, and "spin" them.
- * This delays and scrambles reuse as much as possible.
- */
-static void
-rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
+void
+rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
{
- struct rpcrdma_mr_seg *seg = req->rl_segments;
- struct rpcrdma_mr_seg *seg1 = seg;
- int i;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
- rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
- rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
+ spin_lock(&buf->rb_mwlock);
+ list_add_tail(&mw->mw_list, &buf->rb_mws);
+ spin_unlock(&buf->rb_mwlock);
}
static void
@@ -1211,115 +1125,10 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
req->rl_niovs = 0;
if (req->rl_reply) {
buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
- req->rl_reply->rr_func = NULL;
req->rl_reply = NULL;
}
}
-/* rpcrdma_unmap_one() was already done during deregistration.
- * Redo only the ib_post_send().
- */
-static void
-rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
-{
- struct rpcrdma_xprt *r_xprt =
- container_of(ia, struct rpcrdma_xprt, rx_ia);
- struct ib_send_wr invalidate_wr, *bad_wr;
- int rc;
-
- dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
-
- /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
- r->r.frmr.fr_state = FRMR_IS_INVALID;
-
- memset(&invalidate_wr, 0, sizeof(invalidate_wr));
- invalidate_wr.wr_id = (unsigned long)(void *)r;
- invalidate_wr.opcode = IB_WR_LOCAL_INV;
- invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
- DECR_CQCOUNT(&r_xprt->rx_ep);
-
- dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
- __func__, r, r->r.frmr.fr_mr->rkey);
-
- read_lock(&ia->ri_qplock);
- rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
- read_unlock(&ia->ri_qplock);
- if (rc) {
- /* Force rpcrdma_buffer_get() to retry */
- r->r.frmr.fr_state = FRMR_IS_STALE;
- dprintk("RPC: %s: ib_post_send failed, %i\n",
- __func__, rc);
- }
-}
-
-static void
-rpcrdma_retry_flushed_linv(struct list_head *stale,
- struct rpcrdma_buffer *buf)
-{
- struct rpcrdma_ia *ia = rdmab_to_ia(buf);
- struct list_head *pos;
- struct rpcrdma_mw *r;
- unsigned long flags;
-
- list_for_each(pos, stale) {
- r = list_entry(pos, struct rpcrdma_mw, mw_list);
- rpcrdma_retry_local_inv(r, ia);
- }
-
- spin_lock_irqsave(&buf->rb_lock, flags);
- list_splice_tail(stale, &buf->rb_mws);
- spin_unlock_irqrestore(&buf->rb_lock, flags);
-}
-
-static struct rpcrdma_req *
-rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
- struct list_head *stale)
-{
- struct rpcrdma_mw *r;
- int i;
-
- i = RPCRDMA_MAX_SEGS - 1;
- while (!list_empty(&buf->rb_mws)) {
- r = list_entry(buf->rb_mws.next,
- struct rpcrdma_mw, mw_list);
- list_del(&r->mw_list);
- if (r->r.frmr.fr_state == FRMR_IS_STALE) {
- list_add(&r->mw_list, stale);
- continue;
- }
- req->rl_segments[i].rl_mw = r;
- if (unlikely(i-- == 0))
- return req; /* Success */
- }
-
- /* Not enough entries on rb_mws for this req */
- rpcrdma_buffer_put_sendbuf(req, buf);
- rpcrdma_buffer_put_mrs(req, buf);
- return NULL;
-}
-
-static struct rpcrdma_req *
-rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
-{
- struct rpcrdma_mw *r;
- int i;
-
- i = RPCRDMA_MAX_SEGS - 1;
- while (!list_empty(&buf->rb_mws)) {
- r = list_entry(buf->rb_mws.next,
- struct rpcrdma_mw, mw_list);
- list_del(&r->mw_list);
- req->rl_segments[i].rl_mw = r;
- if (unlikely(i-- == 0))
- return req; /* Success */
- }
-
- /* Not enough entries on rb_mws for this req */
- rpcrdma_buffer_put_sendbuf(req, buf);
- rpcrdma_buffer_put_mrs(req, buf);
- return NULL;
-}
-
/*
* Get a set of request/reply buffers.
*
@@ -1332,12 +1141,11 @@ rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
struct rpcrdma_req *
rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
{
- struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
- struct list_head stale;
struct rpcrdma_req *req;
unsigned long flags;
spin_lock_irqsave(&buffers->rb_lock, flags);
+
if (buffers->rb_send_index == buffers->rb_max_requests) {
spin_unlock_irqrestore(&buffers->rb_lock, flags);
dprintk("RPC: %s: out of request buffers\n", __func__);
@@ -1356,20 +1164,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
}
buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
- INIT_LIST_HEAD(&stale);
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
- req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
- break;
- case RPCRDMA_MTHCAFMR:
- req = rpcrdma_buffer_get_fmrs(req, buffers);
- break;
- default:
- break;
- }
spin_unlock_irqrestore(&buffers->rb_lock, flags);
- if (!list_empty(&stale))
- rpcrdma_retry_flushed_linv(&stale, buffers);
return req;
}
@@ -1381,19 +1176,10 @@ void
rpcrdma_buffer_put(struct rpcrdma_req *req)
{
struct rpcrdma_buffer *buffers = req->rl_buffer;
- struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
unsigned long flags;
spin_lock_irqsave(&buffers->rb_lock, flags);
rpcrdma_buffer_put_sendbuf(req, buffers);
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
- case RPCRDMA_MTHCAFMR:
- rpcrdma_buffer_put_mrs(req, buffers);
- break;
- default:
- break;
- }
spin_unlock_irqrestore(&buffers->rb_lock, flags);
}
@@ -1423,10 +1209,9 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
void
rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
{
- struct rpcrdma_buffer *buffers = rep->rr_buffer;
+ struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
unsigned long flags;
- rep->rr_func = NULL;
spin_lock_irqsave(&buffers->rb_lock, flags);
buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
spin_unlock_irqrestore(&buffers->rb_lock, flags);
@@ -1455,9 +1240,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
/*
* All memory passed here was kmalloc'ed, therefore phys-contiguous.
*/
- iov->addr = ib_dma_map_single(ia->ri_id->device,
+ iov->addr = ib_dma_map_single(ia->ri_device,
va, len, DMA_BIDIRECTIONAL);
- if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
+ if (ib_dma_mapping_error(ia->ri_device, iov->addr))
return -ENOMEM;
iov->length = len;
@@ -1501,8 +1286,8 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
{
int rc;
- ib_dma_unmap_single(ia->ri_id->device,
- iov->addr, iov->length, DMA_BIDIRECTIONAL);
+ ib_dma_unmap_single(ia->ri_device,
+ iov->addr, iov->length, DMA_BIDIRECTIONAL);
if (NULL == mr)
return 0;
@@ -1595,15 +1380,18 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
send_wr.num_sge = req->rl_niovs;
send_wr.opcode = IB_WR_SEND;
if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
- ib_dma_sync_single_for_device(ia->ri_id->device,
- req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
- DMA_TO_DEVICE);
- ib_dma_sync_single_for_device(ia->ri_id->device,
- req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
- DMA_TO_DEVICE);
- ib_dma_sync_single_for_device(ia->ri_id->device,
- req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
- DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(ia->ri_device,
+ req->rl_send_iov[3].addr,
+ req->rl_send_iov[3].length,
+ DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(ia->ri_device,
+ req->rl_send_iov[1].addr,
+ req->rl_send_iov[1].length,
+ DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(ia->ri_device,
+ req->rl_send_iov[0].addr,
+ req->rl_send_iov[0].length,
+ DMA_TO_DEVICE);
if (DECR_CQCOUNT(ep) > 0)
send_wr.send_flags = 0;
@@ -1636,7 +1424,7 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
recv_wr.num_sge = 1;
- ib_dma_sync_single_for_cpu(ia->ri_id->device,
+ ib_dma_sync_single_for_cpu(ia->ri_device,
rdmab_addr(rep->rr_rdmabuf),
rdmab_length(rep->rr_rdmabuf),
DMA_BIDIRECTIONAL);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 78e0b8bea..f49dd8b38 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -62,6 +62,7 @@
struct rpcrdma_ia {
const struct rpcrdma_memreg_ops *ri_ops;
rwlock_t ri_qplock;
+ struct ib_device *ri_device;
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct ib_mr *ri_bind_mem;
@@ -69,7 +70,6 @@ struct rpcrdma_ia {
int ri_have_dma_lkey;
struct completion ri_done;
int ri_async_rc;
- enum rpcrdma_memreg ri_memreg_strategy;
unsigned int ri_max_frmr_depth;
struct ib_device_attr ri_devattr;
struct ib_qp_attr ri_qp_attr;
@@ -173,9 +173,8 @@ struct rpcrdma_buffer;
struct rpcrdma_rep {
unsigned int rr_len;
- struct rpcrdma_buffer *rr_buffer;
- struct rpc_xprt *rr_xprt;
- void (*rr_func)(struct rpcrdma_rep *);
+ struct ib_device *rr_device;
+ struct rpcrdma_xprt *rr_rxprt;
struct list_head rr_list;
struct rpcrdma_regbuf *rr_rdmabuf;
};
@@ -203,11 +202,18 @@ struct rpcrdma_frmr {
struct ib_fast_reg_page_list *fr_pgl;
struct ib_mr *fr_mr;
enum rpcrdma_frmr_state fr_state;
+ struct work_struct fr_work;
+ struct rpcrdma_xprt *fr_xprt;
+};
+
+struct rpcrdma_fmr {
+ struct ib_fmr *fmr;
+ u64 *physaddrs;
};
struct rpcrdma_mw {
union {
- struct ib_fmr *fmr;
+ struct rpcrdma_fmr fmr;
struct rpcrdma_frmr frmr;
} r;
void (*mw_sendcompletion)(struct ib_wc *);
@@ -281,15 +287,17 @@ rpcr_to_rdmar(struct rpc_rqst *rqst)
* One of these is associated with a transport instance
*/
struct rpcrdma_buffer {
- spinlock_t rb_lock; /* protects indexes */
- u32 rb_max_requests;/* client max requests */
- struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
- struct list_head rb_all;
- int rb_send_index;
+ spinlock_t rb_mwlock; /* protect rb_mws list */
+ struct list_head rb_mws;
+ struct list_head rb_all;
+ char *rb_pool;
+
+ spinlock_t rb_lock; /* protect buf arrays */
+ u32 rb_max_requests;
+ int rb_send_index;
+ int rb_recv_index;
struct rpcrdma_req **rb_send_bufs;
- int rb_recv_index;
struct rpcrdma_rep **rb_recv_bufs;
- char *rb_pool;
};
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
@@ -350,7 +358,6 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_create_data_internal *);
size_t (*ro_maxpages)(struct rpcrdma_xprt *);
int (*ro_init)(struct rpcrdma_xprt *);
- void (*ro_reset)(struct rpcrdma_xprt *);
void (*ro_destroy)(struct rpcrdma_buffer *);
const char *ro_displayname;
};
@@ -413,6 +420,8 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
+struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
+void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
@@ -425,6 +434,9 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
+int frwr_alloc_recovery_wq(void);
+void frwr_destroy_recovery_wq(void);
+
/*
* Wrappers for chunk registration, shared by read/write chunk code.
*/
@@ -480,6 +492,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
*/
int rpcrdma_marshal_req(struct rpc_rqst *);
+/* RPC/RDMA module init - xprtrdma/transport.c
+ */
+int xprt_rdma_init(void);
+void xprt_rdma_cleanup(void);
+
/* Temporary NFS request map cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_map_cachep;
/* WR context cache. Created in svc_rdma.c */
@@ -487,10 +504,4 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
/* Workqueue created in svc_rdma.c */
extern struct workqueue_struct *svc_rdma_wq;
-#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
-#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
-#else
-#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
-#endif
-
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 66891e32c..003037632 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -527,6 +527,10 @@ static int xs_local_send_request(struct rpc_task *task)
true, &sent);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - req->rq_bytes_sent, status);
+
+ if (status == -EAGAIN && sock_writeable(transport->inet))
+ status = -ENOBUFS;
+
if (likely(sent > 0) || status == 0) {
req->rq_bytes_sent += sent;
req->rq_xmit_bytes_sent += sent;
@@ -539,6 +543,7 @@ static int xs_local_send_request(struct rpc_task *task)
switch (status) {
case -ENOBUFS:
+ break;
case -EAGAIN:
status = xs_nospace(task);
break;
@@ -589,6 +594,9 @@ static int xs_udp_send_request(struct rpc_task *task)
if (status == -EPERM)
goto process_status;
+ if (status == -EAGAIN && sock_writeable(transport->inet))
+ status = -ENOBUFS;
+
if (sent > 0 || status == 0) {
req->rq_xmit_bytes_sent += sent;
if (sent >= req->rq_slen)
@@ -623,24 +631,6 @@ process_status:
}
/**
- * xs_tcp_shutdown - gracefully shut down a TCP socket
- * @xprt: transport
- *
- * Initiates a graceful shutdown of the TCP socket by calling the
- * equivalent of shutdown(SHUT_RDWR);
- */
-static void xs_tcp_shutdown(struct rpc_xprt *xprt)
-{
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- struct socket *sock = transport->sock;
-
- if (sock != NULL) {
- kernel_sock_shutdown(sock, SHUT_RDWR);
- trace_rpc_socket_shutdown(xprt, sock);
- }
-}
-
-/**
* xs_tcp_send_request - write an RPC request to a TCP socket
* @task: address of RPC task that manages the state of an RPC request
*
@@ -687,9 +677,6 @@ static int xs_tcp_send_request(struct rpc_task *task)
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
- if (unlikely(sent == 0 && status < 0))
- break;
-
/* If we've sent the entire packet, immediately
* reset the count of bytes sent. */
req->rq_bytes_sent += sent;
@@ -699,18 +686,21 @@ static int xs_tcp_send_request(struct rpc_task *task)
return 0;
}
- if (sent != 0)
- continue;
- status = -EAGAIN;
- break;
+ if (status < 0)
+ break;
+ if (sent == 0) {
+ status = -EAGAIN;
+ break;
+ }
}
+ if (status == -EAGAIN && sk_stream_is_writeable(transport->inet))
+ status = -ENOBUFS;
switch (status) {
case -ENOTSOCK:
status = -ENOTCONN;
/* Should we call xs_close() here? */
break;
- case -ENOBUFS:
case -EAGAIN:
status = xs_nospace(task);
break;
@@ -721,6 +711,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
case -ECONNREFUSED:
case -ENOTCONN:
case -EADDRINUSE:
+ case -ENOBUFS:
case -EPIPE:
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
}
@@ -786,6 +777,7 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt)
xs_sock_reset_connection_flags(xprt);
/* Mark transport as closed and wake up all pending tasks */
xprt_disconnect_done(xprt);
+ xprt_force_disconnect(xprt);
}
/**
@@ -827,6 +819,9 @@ static void xs_reset_transport(struct sock_xprt *transport)
if (sk == NULL)
return;
+ if (atomic_read(&transport->xprt.swapper))
+ sk_clear_memalloc(sk);
+
write_lock_bh(&sk->sk_callback_lock);
transport->inet = NULL;
transport->sock = NULL;
@@ -863,6 +858,13 @@ static void xs_close(struct rpc_xprt *xprt)
xprt_disconnect_done(xprt);
}
+static void xs_inject_disconnect(struct rpc_xprt *xprt)
+{
+ dprintk("RPC: injecting transport disconnect on xprt=%p\n",
+ xprt);
+ xprt_disconnect_done(xprt);
+}
+
static void xs_xprt_free(struct rpc_xprt *xprt)
{
xs_free_peer_addresses(xprt);
@@ -901,7 +903,6 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
/**
* xs_local_data_ready - "data ready" callback for AF_LOCAL sockets
* @sk: socket with data to read
- * @len: how much data to read
*
* Currently this assumes we can read the whole reply in a single gulp.
*/
@@ -965,7 +966,6 @@ static void xs_local_data_ready(struct sock *sk)
/**
* xs_udp_data_ready - "data ready" callback for UDP sockets
* @sk: socket with data to read
- * @len: how much data to read
*
*/
static void xs_udp_data_ready(struct sock *sk)
@@ -1389,7 +1389,6 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
/**
* xs_tcp_data_ready - "data ready" callback for TCP sockets
* @sk: socket with data to read
- * @bytes: how much data to read
*
*/
static void xs_tcp_data_ready(struct sock *sk)
@@ -1886,9 +1885,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
/**
* xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint
- * @xprt: RPC transport to connect
* @transport: socket transport to connect
- * @create_sock: function to create a socket of the correct type
*/
static int xs_local_setup_socket(struct sock_xprt *transport)
{
@@ -1960,43 +1957,84 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
msleep_interruptible(15000);
}
-#ifdef CONFIG_SUNRPC_SWAP
+#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
+/*
+ * Note that this should be called with XPRT_LOCKED held (or when we otherwise
+ * know that we have exclusive access to the socket), to guard against
+ * races with xs_reset_transport.
+ */
static void xs_set_memalloc(struct rpc_xprt *xprt)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
xprt);
- if (xprt->swapper)
+ /*
+ * If there's no sock, then we have nothing to set. The
+ * reconnecting process will get it for us.
+ */
+ if (!transport->inet)
+ return;
+ if (atomic_read(&xprt->swapper))
sk_set_memalloc(transport->inet);
}
/**
- * xs_swapper - Tag this transport as being used for swap.
+ * xs_enable_swap - Tag this transport as being used for swap.
* @xprt: transport to tag
- * @enable: enable/disable
*
+ * Take a reference to this transport on behalf of the rpc_clnt, and
+ * optionally mark it for swapping if it wasn't already.
*/
-int xs_swapper(struct rpc_xprt *xprt, int enable)
+static int
+xs_enable_swap(struct rpc_xprt *xprt)
{
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
- xprt);
- int err = 0;
+ struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
- if (enable) {
- xprt->swapper++;
- xs_set_memalloc(xprt);
- } else if (xprt->swapper) {
- xprt->swapper--;
- sk_clear_memalloc(transport->inet);
- }
+ if (atomic_inc_return(&xprt->swapper) != 1)
+ return 0;
+ if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
+ return -ERESTARTSYS;
+ if (xs->inet)
+ sk_set_memalloc(xs->inet);
+ xprt_release_xprt(xprt, NULL);
+ return 0;
+}
- return err;
+/**
+ * xs_disable_swap - Untag this transport as being used for swap.
+ * @xprt: transport to tag
+ *
+ * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the
+ * swapper refcount goes to 0, untag the socket as a memalloc socket.
+ */
+static void
+xs_disable_swap(struct rpc_xprt *xprt)
+{
+ struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
+
+ if (!atomic_dec_and_test(&xprt->swapper))
+ return;
+ if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
+ return;
+ if (xs->inet)
+ sk_clear_memalloc(xs->inet);
+ xprt_release_xprt(xprt, NULL);
}
-EXPORT_SYMBOL_GPL(xs_swapper);
#else
static void xs_set_memalloc(struct rpc_xprt *xprt)
{
}
+
+static int
+xs_enable_swap(struct rpc_xprt *xprt)
+{
+ return -EINVAL;
+}
+
+static void
+xs_disable_swap(struct rpc_xprt *xprt)
+{
+}
#endif
static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
@@ -2057,6 +2095,27 @@ out:
xprt_wake_pending_tasks(xprt, status);
}
+/**
+ * xs_tcp_shutdown - gracefully shut down a TCP socket
+ * @xprt: transport
+ *
+ * Initiates a graceful shutdown of the TCP socket by calling the
+ * equivalent of shutdown(SHUT_RDWR);
+ */
+static void xs_tcp_shutdown(struct rpc_xprt *xprt)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct socket *sock = transport->sock;
+
+ if (sock == NULL)
+ return;
+ if (xprt_connected(xprt)) {
+ kernel_sock_shutdown(sock, SHUT_RDWR);
+ trace_rpc_socket_shutdown(xprt, sock);
+ } else
+ xs_reset_transport(transport);
+}
+
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2067,6 +2126,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
unsigned int keepidle = xprt->timeout->to_initval / HZ;
unsigned int keepcnt = xprt->timeout->to_retries + 1;
unsigned int opt_on = 1;
+ unsigned int timeo;
/* TCP Keepalive options */
kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2078,6 +2138,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
(char *)&keepcnt, sizeof(keepcnt));
+ /* TCP user timeout (see RFC5482) */
+ timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
+ (xprt->timeout->to_retries + 1);
+ kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
+ (char *)&timeo, sizeof(timeo));
+
write_lock_bh(&sk->sk_callback_lock);
xs_save_old_callbacks(transport, sk);
@@ -2125,9 +2191,6 @@ out:
/**
* xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
- * @xprt: RPC transport to connect
- * @transport: socket transport to connect
- * @create_sock: function to create a socket of the correct type
*
* Invoked by a work queue tasklet.
*/
@@ -2463,6 +2526,8 @@ static struct rpc_xprt_ops xs_local_ops = {
.close = xs_close,
.destroy = xs_destroy,
.print_stats = xs_local_print_stats,
+ .enable_swap = xs_enable_swap,
+ .disable_swap = xs_disable_swap,
};
static struct rpc_xprt_ops xs_udp_ops = {
@@ -2482,6 +2547,9 @@ static struct rpc_xprt_ops xs_udp_ops = {
.close = xs_close,
.destroy = xs_destroy,
.print_stats = xs_udp_print_stats,
+ .enable_swap = xs_enable_swap,
+ .disable_swap = xs_disable_swap,
+ .inject_disconnect = xs_inject_disconnect,
};
static struct rpc_xprt_ops xs_tcp_ops = {
@@ -2498,6 +2566,9 @@ static struct rpc_xprt_ops xs_tcp_ops = {
.close = xs_tcp_shutdown,
.destroy = xs_destroy,
.print_stats = xs_tcp_print_stats,
+ .enable_swap = xs_enable_swap,
+ .disable_swap = xs_disable_swap,
+ .inject_disconnect = xs_inject_disconnect,
};
/*
@@ -2515,6 +2586,9 @@ static struct rpc_xprt_ops bc_tcp_ops = {
.close = bc_close,
.destroy = bc_destroy,
.print_stats = xs_tcp_print_stats,
+ .enable_swap = xs_enable_swap,
+ .disable_swap = xs_disable_swap,
+ .inject_disconnect = xs_inject_disconnect,
};
static int xs_init_anyaddr(const int family, struct sockaddr *sap)
@@ -2982,7 +3056,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp)
RPC_MAX_RESVPORT);
}
-static struct kernel_param_ops param_ops_portnr = {
+static const struct kernel_param_ops param_ops_portnr = {
.set = param_set_portnr,
.get = param_get_uint,
};
@@ -3001,7 +3075,7 @@ static int param_set_slot_table_size(const char *val,
RPC_MAX_SLOT_TABLE);
}
-static struct kernel_param_ops param_ops_slot_table_size = {
+static const struct kernel_param_ops param_ops_slot_table_size = {
.set = param_set_slot_table_size,
.get = param_get_uint,
};
@@ -3017,7 +3091,7 @@ static int param_set_max_slot_table_size(const char *val,
RPC_MAX_SLOT_TABLE_LIMIT);
}
-static struct kernel_param_ops param_ops_max_slot_table_size = {
+static const struct kernel_param_ops param_ops_max_slot_table_size = {
.set = param_set_max_slot_table_size,
.get = param_get_uint,
};
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 055453d48..9f2add3cb 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -15,97 +15,366 @@
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
#include <net/ip_fib.h>
#include <net/switchdev.h>
/**
- * netdev_switch_parent_id_get - Get ID of a switch
+ * switchdev_port_attr_get - Get port attribute
+ *
* @dev: port device
- * @psid: switch ID
+ * @attr: attribute to get
+ */
+int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+{
+ const struct switchdev_ops *ops = dev->switchdev_ops;
+ struct net_device *lower_dev;
+ struct list_head *iter;
+ struct switchdev_attr first = {
+ .id = SWITCHDEV_ATTR_UNDEFINED
+ };
+ int err = -EOPNOTSUPP;
+
+ if (ops && ops->switchdev_port_attr_get)
+ return ops->switchdev_port_attr_get(dev, attr);
+
+ if (attr->flags & SWITCHDEV_F_NO_RECURSE)
+ return err;
+
+ /* Switch device port(s) may be stacked under
+ * bond/team/vlan dev, so recurse down to get attr on
+ * each port. Return -ENODATA if attr values don't
+ * compare across ports.
+ */
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ err = switchdev_port_attr_get(lower_dev, attr);
+ if (err)
+ break;
+ if (first.id == SWITCHDEV_ATTR_UNDEFINED)
+ first = *attr;
+ else if (memcmp(&first, attr, sizeof(*attr)))
+ return -ENODATA;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
+
+static int __switchdev_port_attr_set(struct net_device *dev,
+ struct switchdev_attr *attr)
+{
+ const struct switchdev_ops *ops = dev->switchdev_ops;
+ struct net_device *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (ops && ops->switchdev_port_attr_set)
+ return ops->switchdev_port_attr_set(dev, attr);
+
+ if (attr->flags & SWITCHDEV_F_NO_RECURSE)
+ return err;
+
+ /* Switch device port(s) may be stacked under
+ * bond/team/vlan dev, so recurse down to set attr on
+ * each port.
+ */
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ err = __switchdev_port_attr_set(lower_dev, attr);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+struct switchdev_attr_set_work {
+ struct work_struct work;
+ struct net_device *dev;
+ struct switchdev_attr attr;
+};
+
+static void switchdev_port_attr_set_work(struct work_struct *work)
+{
+ struct switchdev_attr_set_work *asw =
+ container_of(work, struct switchdev_attr_set_work, work);
+ int err;
+
+ rtnl_lock();
+ err = switchdev_port_attr_set(asw->dev, &asw->attr);
+ if (err && err != -EOPNOTSUPP)
+ netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
+ err, asw->attr.id);
+ rtnl_unlock();
+
+ dev_put(asw->dev);
+ kfree(work);
+}
+
+static int switchdev_port_attr_set_defer(struct net_device *dev,
+ struct switchdev_attr *attr)
+{
+ struct switchdev_attr_set_work *asw;
+
+ asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
+ if (!asw)
+ return -ENOMEM;
+
+ INIT_WORK(&asw->work, switchdev_port_attr_set_work);
+
+ dev_hold(dev);
+ asw->dev = dev;
+ memcpy(&asw->attr, attr, sizeof(asw->attr));
+
+ schedule_work(&asw->work);
+
+ return 0;
+}
+
+/**
+ * switchdev_port_attr_set - Set port attribute
+ *
+ * @dev: port device
+ * @attr: attribute to set
*
- * Get ID of a switch this port is part of.
+ * Use a 2-phase prepare-commit transaction model to ensure
+ * system is not left in a partially updated state due to
+ * failure from driver/device.
*/
-int netdev_switch_parent_id_get(struct net_device *dev,
- struct netdev_phys_item_id *psid)
+int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
{
- const struct swdev_ops *ops = dev->swdev_ops;
+ int err;
+
+ if (!rtnl_is_locked()) {
+ /* Running prepare-commit transaction across stacked
+ * devices requires nothing moves, so if rtnl_lock is
+ * not held, schedule a worker thread to hold rtnl_lock
+ * while setting attr.
+ */
+
+ return switchdev_port_attr_set_defer(dev, attr);
+ }
+
+ /* Phase I: prepare for attr set. Driver/device should fail
+ * here if there are going to be issues in the commit phase,
+ * such as lack of resources or support. The driver/device
+ * should reserve resources needed for the commit phase here,
+ * but should not commit the attr.
+ */
- if (!ops || !ops->swdev_parent_id_get)
- return -EOPNOTSUPP;
- return ops->swdev_parent_id_get(dev, psid);
+ attr->trans = SWITCHDEV_TRANS_PREPARE;
+ err = __switchdev_port_attr_set(dev, attr);
+ if (err) {
+ /* Prepare phase failed: abort the transaction. Any
+ * resources reserved in the prepare phase are
+ * released.
+ */
+
+ if (err != -EOPNOTSUPP) {
+ attr->trans = SWITCHDEV_TRANS_ABORT;
+ __switchdev_port_attr_set(dev, attr);
+ }
+
+ return err;
+ }
+
+ /* Phase II: commit attr set. This cannot fail as a fault
+ * of driver/device. If it does, it's a bug in the driver/device
+ * because the driver said everythings was OK in phase I.
+ */
+
+ attr->trans = SWITCHDEV_TRANS_COMMIT;
+ err = __switchdev_port_attr_set(dev, attr);
+ WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
+ dev->name, attr->id);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
+
+static int __switchdev_port_obj_add(struct net_device *dev,
+ struct switchdev_obj *obj)
+{
+ const struct switchdev_ops *ops = dev->switchdev_ops;
+ struct net_device *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (ops && ops->switchdev_port_obj_add)
+ return ops->switchdev_port_obj_add(dev, obj);
+
+ /* Switch device port(s) may be stacked under
+ * bond/team/vlan dev, so recurse down to add object on
+ * each port.
+ */
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ err = __switchdev_port_obj_add(lower_dev, obj);
+ if (err)
+ break;
+ }
+
+ return err;
}
-EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get);
/**
- * netdev_switch_port_stp_update - Notify switch device port of STP
- * state change
+ * switchdev_port_obj_add - Add port object
+ *
* @dev: port device
- * @state: port STP state
+ * @obj: object to add
+ *
+ * Use a 2-phase prepare-commit transaction model to ensure
+ * system is not left in a partially updated state due to
+ * failure from driver/device.
+ *
+ * rtnl_lock must be held.
+ */
+int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
+{
+ int err;
+
+ ASSERT_RTNL();
+
+ /* Phase I: prepare for obj add. Driver/device should fail
+ * here if there are going to be issues in the commit phase,
+ * such as lack of resources or support. The driver/device
+ * should reserve resources needed for the commit phase here,
+ * but should not commit the obj.
+ */
+
+ obj->trans = SWITCHDEV_TRANS_PREPARE;
+ err = __switchdev_port_obj_add(dev, obj);
+ if (err) {
+ /* Prepare phase failed: abort the transaction. Any
+ * resources reserved in the prepare phase are
+ * released.
+ */
+
+ if (err != -EOPNOTSUPP) {
+ obj->trans = SWITCHDEV_TRANS_ABORT;
+ __switchdev_port_obj_add(dev, obj);
+ }
+
+ return err;
+ }
+
+ /* Phase II: commit obj add. This cannot fail as a fault
+ * of driver/device. If it does, it's a bug in the driver/device
+ * because the driver said everythings was OK in phase I.
+ */
+
+ obj->trans = SWITCHDEV_TRANS_COMMIT;
+ err = __switchdev_port_obj_add(dev, obj);
+ WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
+
+/**
+ * switchdev_port_obj_del - Delete port object
*
- * Notify switch device port of bridge port STP state change.
+ * @dev: port device
+ * @obj: object to delete
*/
-int netdev_switch_port_stp_update(struct net_device *dev, u8 state)
+int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
{
- const struct swdev_ops *ops = dev->swdev_ops;
+ const struct switchdev_ops *ops = dev->switchdev_ops;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
- if (ops && ops->swdev_port_stp_update)
- return ops->swdev_port_stp_update(dev, state);
+ if (ops && ops->switchdev_port_obj_del)
+ return ops->switchdev_port_obj_del(dev, obj);
+
+ /* Switch device port(s) may be stacked under
+ * bond/team/vlan dev, so recurse down to delete object on
+ * each port.
+ */
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = netdev_switch_port_stp_update(lower_dev, state);
- if (err && err != -EOPNOTSUPP)
- return err;
+ err = switchdev_port_obj_del(lower_dev, obj);
+ if (err)
+ break;
}
return err;
}
-EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update);
+EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
-static DEFINE_MUTEX(netdev_switch_mutex);
-static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain);
+/**
+ * switchdev_port_obj_dump - Dump port objects
+ *
+ * @dev: port device
+ * @obj: object to dump
+ */
+int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
+{
+ const struct switchdev_ops *ops = dev->switchdev_ops;
+ struct net_device *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (ops && ops->switchdev_port_obj_dump)
+ return ops->switchdev_port_obj_dump(dev, obj);
+
+ /* Switch device port(s) may be stacked under
+ * bond/team/vlan dev, so recurse down to dump objects on
+ * first port at bottom of stack.
+ */
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ err = switchdev_port_obj_dump(lower_dev, obj);
+ break;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
+
+static DEFINE_MUTEX(switchdev_mutex);
+static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
/**
- * register_netdev_switch_notifier - Register notifier
+ * register_switchdev_notifier - Register notifier
* @nb: notifier_block
*
* Register switch device notifier. This should be used by code
* which needs to monitor events happening in particular device.
* Return values are same as for atomic_notifier_chain_register().
*/
-int register_netdev_switch_notifier(struct notifier_block *nb)
+int register_switchdev_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&netdev_switch_mutex);
- err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb);
- mutex_unlock(&netdev_switch_mutex);
+ mutex_lock(&switchdev_mutex);
+ err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
+ mutex_unlock(&switchdev_mutex);
return err;
}
-EXPORT_SYMBOL_GPL(register_netdev_switch_notifier);
+EXPORT_SYMBOL_GPL(register_switchdev_notifier);
/**
- * unregister_netdev_switch_notifier - Unregister notifier
+ * unregister_switchdev_notifier - Unregister notifier
* @nb: notifier_block
*
* Unregister switch device notifier.
* Return values are same as for atomic_notifier_chain_unregister().
*/
-int unregister_netdev_switch_notifier(struct notifier_block *nb)
+int unregister_switchdev_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&netdev_switch_mutex);
- err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb);
- mutex_unlock(&netdev_switch_mutex);
+ mutex_lock(&switchdev_mutex);
+ err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
+ mutex_unlock(&switchdev_mutex);
return err;
}
-EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier);
+EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
/**
- * call_netdev_switch_notifiers - Call notifiers
+ * call_switchdev_notifiers - Call notifiers
* @val: value passed unmodified to notifier function
* @dev: port device
* @info: notifier information data
@@ -114,146 +383,502 @@ EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier);
* when it needs to propagate hardware event.
* Return values are same as for atomic_notifier_call_chain().
*/
-int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
- struct netdev_switch_notifier_info *info)
+int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
+ struct switchdev_notifier_info *info)
{
int err;
info->dev = dev;
- mutex_lock(&netdev_switch_mutex);
- err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info);
- mutex_unlock(&netdev_switch_mutex);
+ mutex_lock(&switchdev_mutex);
+ err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
+ mutex_unlock(&switchdev_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
+
+struct switchdev_vlan_dump {
+ struct switchdev_obj obj;
+ struct sk_buff *skb;
+ u32 filter_mask;
+ u16 flags;
+ u16 begin;
+ u16 end;
+};
+
+static int switchdev_port_vlan_dump_put(struct net_device *dev,
+ struct switchdev_vlan_dump *dump)
+{
+ struct bridge_vlan_info vinfo;
+
+ vinfo.flags = dump->flags;
+
+ if (dump->begin == 0 && dump->end == 0) {
+ return 0;
+ } else if (dump->begin == dump->end) {
+ vinfo.vid = dump->begin;
+ if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
+ sizeof(vinfo), &vinfo))
+ return -EMSGSIZE;
+ } else {
+ vinfo.vid = dump->begin;
+ vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
+ if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
+ sizeof(vinfo), &vinfo))
+ return -EMSGSIZE;
+ vinfo.vid = dump->end;
+ vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
+ vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
+ if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
+ sizeof(vinfo), &vinfo))
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+
+static int switchdev_port_vlan_dump_cb(struct net_device *dev,
+ struct switchdev_obj *obj)
+{
+ struct switchdev_vlan_dump *dump =
+ container_of(obj, struct switchdev_vlan_dump, obj);
+ struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
+ int err = 0;
+
+ if (vlan->vid_begin > vlan->vid_end)
+ return -EINVAL;
+
+ if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
+ dump->flags = vlan->flags;
+ for (dump->begin = dump->end = vlan->vid_begin;
+ dump->begin <= vlan->vid_end;
+ dump->begin++, dump->end++) {
+ err = switchdev_port_vlan_dump_put(dev, dump);
+ if (err)
+ return err;
+ }
+ } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
+ if (dump->begin > vlan->vid_begin &&
+ dump->begin >= vlan->vid_end) {
+ if ((dump->begin - 1) == vlan->vid_end &&
+ dump->flags == vlan->flags) {
+ /* prepend */
+ dump->begin = vlan->vid_begin;
+ } else {
+ err = switchdev_port_vlan_dump_put(dev, dump);
+ dump->flags = vlan->flags;
+ dump->begin = vlan->vid_begin;
+ dump->end = vlan->vid_end;
+ }
+ } else if (dump->end <= vlan->vid_begin &&
+ dump->end < vlan->vid_end) {
+ if ((dump->end + 1) == vlan->vid_begin &&
+ dump->flags == vlan->flags) {
+ /* append */
+ dump->end = vlan->vid_end;
+ } else {
+ err = switchdev_port_vlan_dump_put(dev, dump);
+ dump->flags = vlan->flags;
+ dump->begin = vlan->vid_begin;
+ dump->end = vlan->vid_end;
+ }
+ } else {
+ err = -EINVAL;
+ }
+ }
+
return err;
}
-EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers);
+
+static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
+ u32 filter_mask)
+{
+ struct switchdev_vlan_dump dump = {
+ .obj = {
+ .id = SWITCHDEV_OBJ_PORT_VLAN,
+ .cb = switchdev_port_vlan_dump_cb,
+ },
+ .skb = skb,
+ .filter_mask = filter_mask,
+ };
+ int err = 0;
+
+ if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
+ (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
+ err = switchdev_port_obj_dump(dev, &dump.obj);
+ if (err)
+ goto err_out;
+ if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
+ /* last one */
+ err = switchdev_port_vlan_dump_put(dev, &dump);
+ }
+
+err_out:
+ return err == -EOPNOTSUPP ? 0 : err;
+}
/**
- * netdev_switch_port_bridge_setlink - Notify switch device port of bridge
- * port attributes
+ * switchdev_port_bridge_getlink - Get bridge port attributes
*
* @dev: port device
- * @nlh: netlink msg with bridge port attributes
- * @flags: bridge setlink flags
*
- * Notify switch device port of bridge port attributes
+ * Called for SELF on rtnl_bridge_getlink to get bridge port
+ * attributes.
*/
-int netdev_switch_port_bridge_setlink(struct net_device *dev,
- struct nlmsghdr *nlh, u16 flags)
+int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev, u32 filter_mask,
+ int nlflags)
{
- const struct net_device_ops *ops = dev->netdev_ops;
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
+ };
+ u16 mode = BRIDGE_MODE_UNDEF;
+ u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
+ int err;
- if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
- return 0;
+ err = switchdev_port_attr_get(dev, &attr);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
+ attr.u.brport_flags, mask, nlflags,
+ filter_mask, switchdev_port_vlan_fill);
+}
+EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
+
+static int switchdev_port_br_setflag(struct net_device *dev,
+ struct nlattr *nlattr,
+ unsigned long brport_flag)
+{
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
+ };
+ u8 flag = nla_get_u8(nlattr);
+ int err;
+
+ err = switchdev_port_attr_get(dev, &attr);
+ if (err)
+ return err;
+
+ if (flag)
+ attr.u.brport_flags |= brport_flag;
+ else
+ attr.u.brport_flags &= ~brport_flag;
+
+ return switchdev_port_attr_set(dev, &attr);
+}
+
+static const struct nla_policy
+switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
+ [IFLA_BRPORT_STATE] = { .type = NLA_U8 },
+ [IFLA_BRPORT_COST] = { .type = NLA_U32 },
+ [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
+ [IFLA_BRPORT_MODE] = { .type = NLA_U8 },
+ [IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
+ [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
+ [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 },
+ [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 },
+ [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 },
+ [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
+};
+
+static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
+ struct nlattr *protinfo)
+{
+ struct nlattr *attr;
+ int rem;
+ int err;
+
+ err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
+ switchdev_port_bridge_policy);
+ if (err)
+ return err;
+
+ nla_for_each_nested(attr, protinfo, rem) {
+ switch (nla_type(attr)) {
+ case IFLA_BRPORT_LEARNING:
+ err = switchdev_port_br_setflag(dev, attr,
+ BR_LEARNING);
+ break;
+ case IFLA_BRPORT_LEARNING_SYNC:
+ err = switchdev_port_br_setflag(dev, attr,
+ BR_LEARNING_SYNC);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int switchdev_port_br_afspec(struct net_device *dev,
+ struct nlattr *afspec,
+ int (*f)(struct net_device *dev,
+ struct switchdev_obj *obj))
+{
+ struct nlattr *attr;
+ struct bridge_vlan_info *vinfo;
+ struct switchdev_obj obj = {
+ .id = SWITCHDEV_OBJ_PORT_VLAN,
+ };
+ struct switchdev_obj_vlan *vlan = &obj.u.vlan;
+ int rem;
+ int err;
- if (!ops->ndo_bridge_setlink)
- return -EOPNOTSUPP;
+ nla_for_each_nested(attr, afspec, rem) {
+ if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
+ continue;
+ if (nla_len(attr) != sizeof(struct bridge_vlan_info))
+ return -EINVAL;
+ vinfo = nla_data(attr);
+ vlan->flags = vinfo->flags;
+ if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
+ if (vlan->vid_begin)
+ return -EINVAL;
+ vlan->vid_begin = vinfo->vid;
+ } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
+ if (!vlan->vid_begin)
+ return -EINVAL;
+ vlan->vid_end = vinfo->vid;
+ if (vlan->vid_end <= vlan->vid_begin)
+ return -EINVAL;
+ err = f(dev, &obj);
+ if (err)
+ return err;
+ memset(vlan, 0, sizeof(*vlan));
+ } else {
+ if (vlan->vid_begin)
+ return -EINVAL;
+ vlan->vid_begin = vinfo->vid;
+ vlan->vid_end = vinfo->vid;
+ err = f(dev, &obj);
+ if (err)
+ return err;
+ memset(vlan, 0, sizeof(*vlan));
+ }
+ }
- return ops->ndo_bridge_setlink(dev, nlh, flags);
+ return 0;
}
-EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_setlink);
/**
- * netdev_switch_port_bridge_dellink - Notify switch device port of bridge
- * port attribute delete
+ * switchdev_port_bridge_setlink - Set bridge port attributes
*
* @dev: port device
- * @nlh: netlink msg with bridge port attributes
- * @flags: bridge setlink flags
+ * @nlh: netlink header
+ * @flags: netlink flags
*
- * Notify switch device port of bridge port attribute delete
+ * Called for SELF on rtnl_bridge_setlink to set bridge port
+ * attributes.
*/
-int netdev_switch_port_bridge_dellink(struct net_device *dev,
- struct nlmsghdr *nlh, u16 flags)
+int switchdev_port_bridge_setlink(struct net_device *dev,
+ struct nlmsghdr *nlh, u16 flags)
{
- const struct net_device_ops *ops = dev->netdev_ops;
+ struct nlattr *protinfo;
+ struct nlattr *afspec;
+ int err = 0;
- if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
- return 0;
+ protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
+ IFLA_PROTINFO);
+ if (protinfo) {
+ err = switchdev_port_br_setlink_protinfo(dev, protinfo);
+ if (err)
+ return err;
+ }
- if (!ops->ndo_bridge_dellink)
- return -EOPNOTSUPP;
+ afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
+ IFLA_AF_SPEC);
+ if (afspec)
+ err = switchdev_port_br_afspec(dev, afspec,
+ switchdev_port_obj_add);
- return ops->ndo_bridge_dellink(dev, nlh, flags);
+ return err;
}
-EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_dellink);
+EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
/**
- * ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink
- * op for master devices
+ * switchdev_port_bridge_dellink - Set bridge port attributes
*
* @dev: port device
- * @nlh: netlink msg with bridge port attributes
- * @flags: bridge setlink flags
+ * @nlh: netlink header
+ * @flags: netlink flags
*
- * Notify master device slaves of bridge port attributes
+ * Called for SELF on rtnl_bridge_dellink to set bridge port
+ * attributes.
*/
-int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
- struct nlmsghdr *nlh, u16 flags)
+int switchdev_port_bridge_dellink(struct net_device *dev,
+ struct nlmsghdr *nlh, u16 flags)
{
- struct net_device *lower_dev;
- struct list_head *iter;
- int ret = 0, err = 0;
+ struct nlattr *afspec;
- if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
- return ret;
+ afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
+ IFLA_AF_SPEC);
+ if (afspec)
+ return switchdev_port_br_afspec(dev, afspec,
+ switchdev_port_obj_del);
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = netdev_switch_port_bridge_setlink(lower_dev, nlh, flags);
- if (err && err != -EOPNOTSUPP)
- ret = err;
- }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
+
+/**
+ * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
+ *
+ * @ndmsg: netlink hdr
+ * @nlattr: netlink attributes
+ * @dev: port device
+ * @addr: MAC address to add
+ * @vid: VLAN to add
+ *
+ * Add FDB entry to switch device.
+ */
+int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, const unsigned char *addr,
+ u16 vid, u16 nlm_flags)
+{
+ struct switchdev_obj obj = {
+ .id = SWITCHDEV_OBJ_PORT_FDB,
+ .u.fdb = {
+ .addr = addr,
+ .vid = vid,
+ },
+ };
- return ret;
+ return switchdev_port_obj_add(dev, &obj);
}
-EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_setlink);
+EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
/**
- * ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink
- * op for master devices
+ * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
*
+ * @ndmsg: netlink hdr
+ * @nlattr: netlink attributes
* @dev: port device
- * @nlh: netlink msg with bridge port attributes
- * @flags: bridge dellink flags
+ * @addr: MAC address to delete
+ * @vid: VLAN to delete
*
- * Notify master device slaves of bridge port attribute deletes
+ * Delete FDB entry from switch device.
*/
-int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
- struct nlmsghdr *nlh, u16 flags)
+int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, const unsigned char *addr,
+ u16 vid)
{
- struct net_device *lower_dev;
- struct list_head *iter;
- int ret = 0, err = 0;
+ struct switchdev_obj obj = {
+ .id = SWITCHDEV_OBJ_PORT_FDB,
+ .u.fdb = {
+ .addr = addr,
+ .vid = vid,
+ },
+ };
- if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
- return ret;
+ return switchdev_port_obj_del(dev, &obj);
+}
+EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = netdev_switch_port_bridge_dellink(lower_dev, nlh, flags);
- if (err && err != -EOPNOTSUPP)
- ret = err;
- }
+struct switchdev_fdb_dump {
+ struct switchdev_obj obj;
+ struct sk_buff *skb;
+ struct netlink_callback *cb;
+ int idx;
+};
+
+static int switchdev_port_fdb_dump_cb(struct net_device *dev,
+ struct switchdev_obj *obj)
+{
+ struct switchdev_fdb_dump *dump =
+ container_of(obj, struct switchdev_fdb_dump, obj);
+ u32 portid = NETLINK_CB(dump->cb->skb).portid;
+ u32 seq = dump->cb->nlh->nlmsg_seq;
+ struct nlmsghdr *nlh;
+ struct ndmsg *ndm;
+
+ if (dump->idx < dump->cb->args[0])
+ goto skip;
+
+ nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
+ sizeof(*ndm), NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ ndm = nlmsg_data(nlh);
+ ndm->ndm_family = AF_BRIDGE;
+ ndm->ndm_pad1 = 0;
+ ndm->ndm_pad2 = 0;
+ ndm->ndm_flags = NTF_SELF;
+ ndm->ndm_type = 0;
+ ndm->ndm_ifindex = dev->ifindex;
+ ndm->ndm_state = NUD_REACHABLE;
+
+ if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
+ goto nla_put_failure;
+
+ if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid))
+ goto nla_put_failure;
+
+ nlmsg_end(dump->skb, nlh);
+
+skip:
+ dump->idx++;
+ return 0;
+
+nla_put_failure:
+ nlmsg_cancel(dump->skb, nlh);
+ return -EMSGSIZE;
+}
- return ret;
+/**
+ * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
+ *
+ * @skb: netlink skb
+ * @cb: netlink callback
+ * @dev: port device
+ * @filter_dev: filter device
+ * @idx:
+ *
+ * Delete FDB entry from switch device.
+ */
+int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev,
+ struct net_device *filter_dev, int idx)
+{
+ struct switchdev_fdb_dump dump = {
+ .obj = {
+ .id = SWITCHDEV_OBJ_PORT_FDB,
+ .cb = switchdev_port_fdb_dump_cb,
+ },
+ .skb = skb,
+ .cb = cb,
+ .idx = idx,
+ };
+ int err;
+
+ err = switchdev_port_obj_dump(dev, &dump.obj);
+ if (err)
+ return err;
+
+ return dump.idx;
}
-EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_dellink);
+EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
-static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev)
+static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
{
- const struct swdev_ops *ops = dev->swdev_ops;
+ const struct switchdev_ops *ops = dev->switchdev_ops;
struct net_device *lower_dev;
struct net_device *port_dev;
struct list_head *iter;
/* Recusively search down until we find a sw port dev.
- * (A sw port dev supports swdev_parent_id_get).
+ * (A sw port dev supports switchdev_port_attr_get).
*/
- if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD &&
- ops && ops->swdev_parent_id_get)
+ if (ops && ops->switchdev_port_attr_get)
return dev;
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- port_dev = netdev_switch_get_lowest_dev(lower_dev);
+ port_dev = switchdev_get_lowest_dev(lower_dev);
if (port_dev)
return port_dev;
}
@@ -261,10 +886,12 @@ static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev)
return NULL;
}
-static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi)
+static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
{
- struct netdev_phys_item_id psid;
- struct netdev_phys_item_id prev_psid;
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ };
+ struct switchdev_attr prev_attr;
struct net_device *dev = NULL;
int nhsel;
@@ -276,28 +903,29 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi)
if (!nh->nh_dev)
return NULL;
- dev = netdev_switch_get_lowest_dev(nh->nh_dev);
+ dev = switchdev_get_lowest_dev(nh->nh_dev);
if (!dev)
return NULL;
- if (netdev_switch_parent_id_get(dev, &psid))
+ if (switchdev_port_attr_get(dev, &attr))
return NULL;
if (nhsel > 0) {
- if (prev_psid.id_len != psid.id_len)
+ if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len)
return NULL;
- if (memcmp(prev_psid.id, psid.id, psid.id_len))
+ if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id,
+ attr.u.ppid.id_len))
return NULL;
}
- prev_psid = psid;
+ prev_attr = attr;
}
return dev;
}
/**
- * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch
+ * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
*
* @dst: route's IPv4 destination address
* @dst_len: destination address length (prefix length)
@@ -307,13 +935,24 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi)
* @nlflags: netlink flags passed in (NLM_F_*)
* @tb_id: route table ID
*
- * Add IPv4 route entry to switch device.
+ * Add/modify switch IPv4 route entry.
*/
-int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
- u8 tos, u8 type, u32 nlflags, u32 tb_id)
+int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
+ u8 tos, u8 type, u32 nlflags, u32 tb_id)
{
+ struct switchdev_obj fib_obj = {
+ .id = SWITCHDEV_OBJ_IPV4_FIB,
+ .u.ipv4_fib = {
+ .dst = dst,
+ .dst_len = dst_len,
+ .fi = fi,
+ .tos = tos,
+ .type = type,
+ .nlflags = nlflags,
+ .tb_id = tb_id,
+ },
+ };
struct net_device *dev;
- const struct swdev_ops *ops;
int err = 0;
/* Don't offload route if using custom ip rules or if
@@ -328,25 +967,20 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
if (fi->fib_net->ipv4.fib_offload_disabled)
return 0;
- dev = netdev_switch_get_dev_by_nhs(fi);
+ dev = switchdev_get_dev_by_nhs(fi);
if (!dev)
return 0;
- ops = dev->swdev_ops;
-
- if (ops->swdev_fib_ipv4_add) {
- err = ops->swdev_fib_ipv4_add(dev, htonl(dst), dst_len,
- fi, tos, type, nlflags,
- tb_id);
- if (!err)
- fi->fib_flags |= RTNH_F_OFFLOAD;
- }
- return err;
+ err = switchdev_port_obj_add(dev, &fib_obj);
+ if (!err)
+ fi->fib_flags |= RTNH_F_OFFLOAD;
+
+ return err == -EOPNOTSUPP ? 0 : err;
}
-EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add);
+EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
/**
- * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch
+ * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
*
* @dst: route's IPv4 destination address
* @dst_len: destination address length (prefix length)
@@ -357,38 +991,45 @@ EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add);
*
* Delete IPv4 route entry from switch device.
*/
-int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
- u8 tos, u8 type, u32 tb_id)
+int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id)
{
+ struct switchdev_obj fib_obj = {
+ .id = SWITCHDEV_OBJ_IPV4_FIB,
+ .u.ipv4_fib = {
+ .dst = dst,
+ .dst_len = dst_len,
+ .fi = fi,
+ .tos = tos,
+ .type = type,
+ .nlflags = 0,
+ .tb_id = tb_id,
+ },
+ };
struct net_device *dev;
- const struct swdev_ops *ops;
int err = 0;
if (!(fi->fib_flags & RTNH_F_OFFLOAD))
return 0;
- dev = netdev_switch_get_dev_by_nhs(fi);
+ dev = switchdev_get_dev_by_nhs(fi);
if (!dev)
return 0;
- ops = dev->swdev_ops;
- if (ops->swdev_fib_ipv4_del) {
- err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len,
- fi, tos, type, tb_id);
- if (!err)
- fi->fib_flags &= ~RTNH_F_OFFLOAD;
- }
+ err = switchdev_port_obj_del(dev, &fib_obj);
+ if (!err)
+ fi->fib_flags &= ~RTNH_F_OFFLOAD;
- return err;
+ return err == -EOPNOTSUPP ? 0 : err;
}
-EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_del);
+EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
/**
- * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation
+ * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
*
* @fi: route FIB info structure
*/
-void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
+void switchdev_fib_ipv4_abort(struct fib_info *fi)
{
/* There was a problem installing this route to the offload
* device. For now, until we come up with more refined
@@ -401,4 +1042,4 @@ void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
fib_flush_external(fi->fib_net);
fi->fib_net->ipv4.fib_offload_disabled = true;
}
-EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_abort);
+EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index ba7daa864..48fd3b5a7 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -38,13 +38,6 @@
#include "addr.h"
#include "core.h"
-u32 tipc_own_addr(struct net *net)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- return tn->own_addr;
-}
-
/**
* in_own_cluster - test for cluster inclusion; <0.0.0> always matches
*/
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 7ba6d5c8a..93f7c983b 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -41,10 +41,18 @@
#include <linux/tipc.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include "core.h"
#define TIPC_ZONE_MASK 0xff000000u
#define TIPC_CLUSTER_MASK 0xfffff000u
+static inline u32 tipc_own_addr(struct net *net)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+ return tn->own_addr;
+}
+
static inline u32 tipc_zone_mask(u32 addr)
{
return addr & TIPC_ZONE_MASK;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index c5cbdcb1f..a816382fc 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -108,6 +108,11 @@ void tipc_bclink_remove_node(struct net *net, u32 addr)
tipc_bclink_lock(net);
tipc_nmap_remove(&tn->bclink->bcast_nodes, addr);
+
+ /* Last node? => reset backlog queue */
+ if (!tn->bclink->bcast_nodes.count)
+ tipc_link_purge_backlog(&tn->bclink->link);
+
tipc_bclink_unlock(net);
}
@@ -115,19 +120,15 @@ static void bclink_set_last_sent(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_link *bcl = tn->bcl;
- struct sk_buff *skb = skb_peek(&bcl->backlogq);
- if (skb)
- bcl->fsm_msg_cnt = mod(buf_seqno(skb) - 1);
- else
- bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1);
+ bcl->silent_intv_cnt = mod(bcl->snd_nxt - 1);
}
u32 tipc_bclink_get_last_sent(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- return tn->bcl->fsm_msg_cnt;
+ return tn->bcl->silent_intv_cnt;
}
static void bclink_update_last_sent(struct tipc_node *node, u32 seqno)
@@ -212,16 +213,16 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
* or both sent and unsent messages (otherwise)
*/
if (tn->bclink->bcast_nodes.count)
- acked = tn->bcl->fsm_msg_cnt;
+ acked = tn->bcl->silent_intv_cnt;
else
- acked = tn->bcl->next_out_no;
+ acked = tn->bcl->snd_nxt;
} else {
/*
* Bail out if specified sequence number does not correspond
* to a message that has been sent and not yet acknowledged
*/
if (less(acked, buf_seqno(skb)) ||
- less(tn->bcl->fsm_msg_cnt, acked) ||
+ less(tn->bcl->silent_intv_cnt, acked) ||
less_eq(acked, n_ptr->bclink.acked))
goto exit;
}
@@ -803,9 +804,9 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
goto attr_msg_full;
if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name))
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->next_in_no))
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt))
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->next_out_no))
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt))
goto attr_msg_full;
prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP);
@@ -866,6 +867,27 @@ int tipc_bclink_set_queue_limits(struct net *net, u32 limit)
return 0;
}
+int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[])
+{
+ int err;
+ u32 win;
+ struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
+
+ if (!attrs[TIPC_NLA_LINK_PROP])
+ return -EINVAL;
+
+ err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props);
+ if (err)
+ return err;
+
+ if (!props[TIPC_NLA_PROP_WIN])
+ return -EOPNOTSUPP;
+
+ win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+
+ return tipc_bclink_set_queue_limits(net, win);
+}
+
int tipc_bclink_init(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
@@ -893,7 +915,7 @@ int tipc_bclink_init(struct net *net)
__skb_queue_head_init(&bcl->backlogq);
__skb_queue_head_init(&bcl->deferdq);
skb_queue_head_init(&bcl->wakeupq);
- bcl->next_out_no = 1;
+ bcl->snd_nxt = 1;
spin_lock_init(&bclink->node.lock);
__skb_queue_head_init(&bclink->arrvq);
skb_queue_head_init(&bclink->inputq);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 4bdc12277..3c290a48f 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -131,6 +131,7 @@ uint tipc_bclink_get_mtu(void);
int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list);
void tipc_bclink_wakeup_users(struct net *net);
int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
+int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
void tipc_bclink_input(struct net *net);
#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 70e3dacbf..00bc0e620 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -71,8 +71,7 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
[TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED }
};
-static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
- bool shutting_down);
+static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr);
/**
* tipc_media_find - locates specified media object by name
@@ -324,7 +323,7 @@ restart:
res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr);
if (res) {
- bearer_disable(net, b_ptr, false);
+ bearer_disable(net, b_ptr);
pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
name);
return -EINVAL;
@@ -344,7 +343,7 @@ restart:
static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr)
{
pr_info("Resetting bearer <%s>\n", b_ptr->name);
- tipc_link_reset_list(net, b_ptr->identity);
+ tipc_link_delete_list(net, b_ptr->identity);
tipc_disc_reset(net, b_ptr);
return 0;
}
@@ -354,8 +353,7 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr)
*
* Note: This routine assumes caller holds RTNL lock.
*/
-static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
- bool shutting_down)
+static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
u32 i;
@@ -363,7 +361,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
pr_info("Disabling bearer <%s>\n", b_ptr->name);
b_ptr->media->disable_media(b_ptr);
- tipc_link_delete_list(net, b_ptr->identity, shutting_down);
+ tipc_link_delete_list(net, b_ptr->identity);
if (b_ptr->link_req)
tipc_disc_delete(b_ptr->link_req);
@@ -541,7 +539,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
break;
case NETDEV_UNREGISTER:
case NETDEV_CHANGENAME:
- bearer_disable(dev_net(dev), b_ptr, false);
+ bearer_disable(dev_net(dev), b_ptr);
break;
}
return NOTIFY_OK;
@@ -583,7 +581,7 @@ void tipc_bearer_stop(struct net *net)
for (i = 0; i < MAX_BEARERS; i++) {
b_ptr = rtnl_dereference(tn->bearer_list[i]);
if (b_ptr) {
- bearer_disable(net, b_ptr, true);
+ bearer_disable(net, b_ptr);
tn->bearer_list[i] = NULL;
}
}
@@ -747,7 +745,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- bearer_disable(net, bearer, false);
+ bearer_disable(net, bearer);
rtnl_unlock();
return 0;
@@ -812,7 +810,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
char *name;
struct tipc_bearer *b;
struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
- struct net *net = genl_info_net(info);
+ struct net *net = sock_net(skb->sk);
if (!info->attrs[TIPC_NLA_BEARER])
return -EINVAL;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 5cad243ee..dc714d977 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -38,9 +38,9 @@
#define _TIPC_BEARER_H
#include "netlink.h"
+#include "core.h"
#include <net/genetlink.h>
-#define MAX_BEARERS 2
#define MAX_MEDIA 3
#define MAX_NODES 4096
#define WSIZE 32
diff --git a/net/tipc/core.c b/net/tipc/core.c
index be1c9fa60..005ba5eb0 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -68,7 +68,7 @@ static int __net_init tipc_init_net(struct net *net)
if (err)
goto out_nametbl;
- err = tipc_subscr_start(net);
+ err = tipc_topsrv_start(net);
if (err)
goto out_subscr;
return 0;
@@ -83,7 +83,7 @@ out_sk_rht:
static void __net_exit tipc_exit_net(struct net *net)
{
- tipc_subscr_stop(net);
+ tipc_topsrv_stop(net);
tipc_net_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 3dc68c7a9..0fcf133d5 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -60,16 +60,19 @@
#include <net/netns/generic.h>
#include <linux/rhashtable.h>
-#include "node.h"
-#include "bearer.h"
-#include "bcast.h"
-#include "netlink.h"
-#include "link.h"
-#include "node.h"
-#include "msg.h"
+struct tipc_node;
+struct tipc_bearer;
+struct tipc_bcbearer;
+struct tipc_bclink;
+struct tipc_link;
+struct tipc_name_table;
+struct tipc_server;
#define TIPC_MOD_VER "2.0.0"
+#define NODE_HTABLE_SIZE 512
+#define MAX_BEARERS 3
+
extern int tipc_net_id __read_mostly;
extern int sysctl_tipc_rmem[3] __read_mostly;
extern int sysctl_tipc_named_timeout __read_mostly;
@@ -106,6 +109,26 @@ struct tipc_net {
atomic_t subscription_count;
};
+static inline u16 mod(u16 x)
+{
+ return x & 0xffffu;
+}
+
+static inline int less_eq(u16 left, u16 right)
+{
+ return mod(right - left) < 32768u;
+}
+
+static inline int more(u16 left, u16 right)
+{
+ return !less_eq(left, right);
+}
+
+static inline int less(u16 left, u16 right)
+{
+ return less_eq(left, right) && (mod(right) != mod(left));
+}
+
#ifdef CONFIG_SYSCTL
int tipc_register_sysctl(void);
void tipc_unregister_sysctl(void);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 43a515dc9..eaa9fe54b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -86,7 +86,7 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
*/
#define STARTING_EVT 856384768 /* link processing trigger */
#define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */
-#define TIMEOUT_EVT 560817u /* link timer expired */
+#define SILENCE_EVT 560817u /* timer dicovered silence from peer */
/*
* State value stored in 'failover_pkts'
@@ -106,6 +106,7 @@ static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb);
static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb);
static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb);
+static void link_set_timer(struct tipc_link *link, unsigned long time);
/*
* Simple link routines
*/
@@ -197,11 +198,12 @@ static void link_timeout(unsigned long data)
}
/* do all other link processing performed on a periodic basis */
- link_state_event(l_ptr, TIMEOUT_EVT);
-
+ if (l_ptr->silent_intv_cnt || tipc_bclink_acks_missing(l_ptr->owner))
+ link_state_event(l_ptr, SILENCE_EVT);
+ l_ptr->silent_intv_cnt++;
if (skb_queue_len(&l_ptr->backlogq))
tipc_link_push_packets(l_ptr);
-
+ link_set_timer(l_ptr, l_ptr->keepalive_intv);
tipc_node_unlock(l_ptr->owner);
tipc_link_put(l_ptr);
}
@@ -233,8 +235,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
if (n_ptr->link_cnt >= MAX_BEARERS) {
tipc_addr_string_fill(addr_string, n_ptr->addr);
- pr_err("Attempt to establish %uth link to %s. Max %u allowed.\n",
- n_ptr->link_cnt, addr_string, MAX_BEARERS);
+ pr_err("Cannot establish %uth link to %s. Max %u allowed.\n",
+ n_ptr->link_cnt, addr_string, MAX_BEARERS);
return NULL;
}
@@ -261,7 +263,6 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
/* note: peer i/f name is updated by reset/activate message */
memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr));
l_ptr->owner = n_ptr;
- l_ptr->checkpoint = 1;
l_ptr->peer_session = INVALID_SESSION;
l_ptr->bearer_id = b_ptr->identity;
link_set_supervision_props(l_ptr, b_ptr->tolerance);
@@ -280,7 +281,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
l_ptr->mtu = l_ptr->advertised_mtu;
l_ptr->priority = b_ptr->priority;
tipc_link_set_queue_limits(l_ptr, b_ptr->window);
- l_ptr->next_out_no = 1;
+ l_ptr->snd_nxt = 1;
__skb_queue_head_init(&l_ptr->transmq);
__skb_queue_head_init(&l_ptr->backlogq);
__skb_queue_head_init(&l_ptr->deferdq);
@@ -311,8 +312,7 @@ void tipc_link_delete(struct tipc_link *l)
tipc_link_put(l);
}
-void tipc_link_delete_list(struct net *net, unsigned int bearer_id,
- bool shutting_down)
+void tipc_link_delete_list(struct net *net, unsigned int bearer_id)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_link *link;
@@ -404,7 +404,7 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr)
l_ptr->reasm_buf = NULL;
}
-static void tipc_link_purge_backlog(struct tipc_link *l)
+void tipc_link_purge_backlog(struct tipc_link *l)
{
__skb_queue_purge(&l->backlogq);
l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
@@ -451,9 +451,9 @@ void tipc_link_reset(struct tipc_link *l_ptr)
if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) {
l_ptr->flags |= LINK_FAILINGOVER;
- l_ptr->failover_checkpt = l_ptr->next_in_no;
+ l_ptr->failover_checkpt = l_ptr->rcv_nxt;
pl->failover_pkts = FIRST_FAILOVER;
- pl->failover_checkpt = l_ptr->next_in_no;
+ pl->failover_checkpt = l_ptr->rcv_nxt;
pl->failover_skb = l_ptr->reasm_buf;
} else {
kfree_skb(l_ptr->reasm_buf);
@@ -469,36 +469,19 @@ void tipc_link_reset(struct tipc_link *l_ptr)
tipc_link_purge_backlog(l_ptr);
l_ptr->reasm_buf = NULL;
l_ptr->rcv_unacked = 0;
- l_ptr->checkpoint = 1;
- l_ptr->next_out_no = 1;
- l_ptr->fsm_msg_cnt = 0;
+ l_ptr->snd_nxt = 1;
+ l_ptr->silent_intv_cnt = 0;
l_ptr->stale_count = 0;
link_reset_statistics(l_ptr);
}
-void tipc_link_reset_list(struct net *net, unsigned int bearer_id)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_link *l_ptr;
- struct tipc_node *n_ptr;
-
- rcu_read_lock();
- list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
- tipc_node_lock(n_ptr);
- l_ptr = n_ptr->links[bearer_id];
- if (l_ptr)
- tipc_link_reset(l_ptr);
- tipc_node_unlock(n_ptr);
- }
- rcu_read_unlock();
-}
-
static void link_activate(struct tipc_link *link)
{
struct tipc_node *node = link->owner;
- link->next_in_no = 1;
+ link->rcv_nxt = 1;
link->stats.recv_info = 1;
+ link->silent_intv_cnt = 0;
tipc_node_link_up(node, link);
tipc_bearer_add_dest(node->net, link->bearer_id, link->addr);
}
@@ -511,7 +494,7 @@ static void link_activate(struct tipc_link *link)
static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
{
struct tipc_link *other;
- unsigned long cont_intv = l_ptr->cont_intv;
+ unsigned long timer_intv = l_ptr->keepalive_intv;
if (l_ptr->flags & LINK_STOPPED)
return;
@@ -519,45 +502,33 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT))
return; /* Not yet. */
- if (l_ptr->flags & LINK_FAILINGOVER) {
- if (event == TIMEOUT_EVT)
- link_set_timer(l_ptr, cont_intv);
+ if (l_ptr->flags & LINK_FAILINGOVER)
return;
- }
switch (l_ptr->state) {
case WORKING_WORKING:
switch (event) {
case TRAFFIC_MSG_EVT:
case ACTIVATE_MSG:
+ l_ptr->silent_intv_cnt = 0;
break;
- case TIMEOUT_EVT:
- if (l_ptr->next_in_no != l_ptr->checkpoint) {
- l_ptr->checkpoint = l_ptr->next_in_no;
- if (tipc_bclink_acks_missing(l_ptr->owner)) {
+ case SILENCE_EVT:
+ if (!l_ptr->silent_intv_cnt) {
+ if (tipc_bclink_acks_missing(l_ptr->owner))
tipc_link_proto_xmit(l_ptr, STATE_MSG,
0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- }
- link_set_timer(l_ptr, cont_intv);
break;
}
l_ptr->state = WORKING_UNKNOWN;
- l_ptr->fsm_msg_cnt = 0;
tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv / 4);
break;
case RESET_MSG:
pr_debug("%s<%s>, requested by peer\n",
link_rst_msg, l_ptr->name);
tipc_link_reset(l_ptr);
l_ptr->state = RESET_RESET;
- l_ptr->fsm_msg_cnt = 0;
tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
break;
default:
pr_debug("%s%u in WW state\n", link_unk_evt, event);
@@ -568,46 +539,33 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
case TRAFFIC_MSG_EVT:
case ACTIVATE_MSG:
l_ptr->state = WORKING_WORKING;
- l_ptr->fsm_msg_cnt = 0;
- link_set_timer(l_ptr, cont_intv);
+ l_ptr->silent_intv_cnt = 0;
break;
case RESET_MSG:
pr_debug("%s<%s>, requested by peer while probing\n",
link_rst_msg, l_ptr->name);
tipc_link_reset(l_ptr);
l_ptr->state = RESET_RESET;
- l_ptr->fsm_msg_cnt = 0;
tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
break;
- case TIMEOUT_EVT:
- if (l_ptr->next_in_no != l_ptr->checkpoint) {
+ case SILENCE_EVT:
+ if (!l_ptr->silent_intv_cnt) {
l_ptr->state = WORKING_WORKING;
- l_ptr->fsm_msg_cnt = 0;
- l_ptr->checkpoint = l_ptr->next_in_no;
- if (tipc_bclink_acks_missing(l_ptr->owner)) {
+ if (tipc_bclink_acks_missing(l_ptr->owner))
tipc_link_proto_xmit(l_ptr, STATE_MSG,
0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- }
- link_set_timer(l_ptr, cont_intv);
- } else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) {
+ } else if (l_ptr->silent_intv_cnt <
+ l_ptr->abort_limit) {
tipc_link_proto_xmit(l_ptr, STATE_MSG,
1, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv / 4);
} else { /* Link has failed */
pr_debug("%s<%s>, peer not responding\n",
link_rst_msg, l_ptr->name);
tipc_link_reset(l_ptr);
l_ptr->state = RESET_UNKNOWN;
- l_ptr->fsm_msg_cnt = 0;
tipc_link_proto_xmit(l_ptr, RESET_MSG,
0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
}
break;
default:
@@ -623,31 +581,22 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
if (other && link_working_unknown(other))
break;
l_ptr->state = WORKING_WORKING;
- l_ptr->fsm_msg_cnt = 0;
link_activate(l_ptr);
tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
if (l_ptr->owner->working_links == 1)
tipc_link_sync_xmit(l_ptr);
- link_set_timer(l_ptr, cont_intv);
break;
case RESET_MSG:
l_ptr->state = RESET_RESET;
- l_ptr->fsm_msg_cnt = 0;
tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
1, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
break;
case STARTING_EVT:
l_ptr->flags |= LINK_STARTED;
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
+ link_set_timer(l_ptr, timer_intv);
break;
- case TIMEOUT_EVT:
+ case SILENCE_EVT:
tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
break;
default:
pr_err("%s%u in RU state\n", link_unk_evt, event);
@@ -661,21 +610,16 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
if (other && link_working_unknown(other))
break;
l_ptr->state = WORKING_WORKING;
- l_ptr->fsm_msg_cnt = 0;
link_activate(l_ptr);
tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
if (l_ptr->owner->working_links == 1)
tipc_link_sync_xmit(l_ptr);
- link_set_timer(l_ptr, cont_intv);
break;
case RESET_MSG:
break;
- case TIMEOUT_EVT:
+ case SILENCE_EVT:
tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
break;
default:
pr_err("%s%u in RR state\n", link_unk_evt, event);
@@ -701,53 +645,58 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
{
struct tipc_msg *msg = buf_msg(skb_peek(list));
unsigned int maxwin = link->window;
- unsigned int imp = msg_importance(msg);
+ unsigned int i, imp = msg_importance(msg);
uint mtu = link->mtu;
- uint ack = mod(link->next_in_no - 1);
- uint seqno = link->next_out_no;
- uint bc_last_in = link->owner->bclink.last_in;
+ u16 ack = mod(link->rcv_nxt - 1);
+ u16 seqno = link->snd_nxt;
+ u16 bc_last_in = link->owner->bclink.last_in;
struct tipc_media_addr *addr = &link->media_addr;
struct sk_buff_head *transmq = &link->transmq;
struct sk_buff_head *backlogq = &link->backlogq;
- struct sk_buff *skb, *tmp;
-
- /* Match backlog limit against msg importance: */
- if (unlikely(link->backlog[imp].len >= link->backlog[imp].limit))
- return link_schedule_user(link, list);
+ struct sk_buff *skb, *bskb;
+ /* Match msg importance against this and all higher backlog limits: */
+ for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
+ if (unlikely(link->backlog[i].len >= link->backlog[i].limit))
+ return link_schedule_user(link, list);
+ }
if (unlikely(msg_size(msg) > mtu)) {
__skb_queue_purge(list);
return -EMSGSIZE;
}
/* Prepare each packet for sending, and add to relevant queue: */
- skb_queue_walk_safe(list, skb, tmp) {
- __skb_unlink(skb, list);
+ while (skb_queue_len(list)) {
+ skb = skb_peek(list);
msg = buf_msg(skb);
msg_set_seqno(msg, seqno);
msg_set_ack(msg, ack);
msg_set_bcast_ack(msg, bc_last_in);
if (likely(skb_queue_len(transmq) < maxwin)) {
+ __skb_dequeue(list);
__skb_queue_tail(transmq, skb);
tipc_bearer_send(net, link->bearer_id, skb, addr);
link->rcv_unacked = 0;
seqno++;
continue;
}
- if (tipc_msg_bundle(skb_peek_tail(backlogq), skb, mtu)) {
+ if (tipc_msg_bundle(skb_peek_tail(backlogq), msg, mtu)) {
+ kfree_skb(__skb_dequeue(list));
link->stats.sent_bundled++;
continue;
}
- if (tipc_msg_make_bundle(&skb, mtu, link->addr)) {
+ if (tipc_msg_make_bundle(&bskb, msg, mtu, link->addr)) {
+ kfree_skb(__skb_dequeue(list));
+ __skb_queue_tail(backlogq, bskb);
+ link->backlog[msg_importance(buf_msg(bskb))].len++;
link->stats.sent_bundled++;
link->stats.sent_bundles++;
- imp = msg_importance(buf_msg(skb));
+ continue;
}
- __skb_queue_tail(backlogq, skb);
- link->backlog[imp].len++;
- seqno++;
+ link->backlog[imp].len += skb_queue_len(list);
+ skb_queue_splice_tail_init(list, backlogq);
}
- link->next_out_no = seqno;
+ link->snd_nxt = seqno;
return 0;
}
@@ -877,7 +826,8 @@ void tipc_link_push_packets(struct tipc_link *link)
{
struct sk_buff *skb;
struct tipc_msg *msg;
- unsigned int ack = mod(link->next_in_no - 1);
+ u16 seqno = link->snd_nxt;
+ u16 ack = mod(link->rcv_nxt - 1);
while (skb_queue_len(&link->transmq) < link->window) {
skb = __skb_dequeue(&link->backlogq);
@@ -886,12 +836,15 @@ void tipc_link_push_packets(struct tipc_link *link)
msg = buf_msg(skb);
link->backlog[msg_importance(msg)].len--;
msg_set_ack(msg, ack);
+ msg_set_seqno(msg, seqno);
+ seqno = mod(seqno + 1);
msg_set_bcast_ack(msg, link->owner->bclink.last_in);
link->rcv_unacked = 0;
__skb_queue_tail(&link->transmq, skb);
tipc_bearer_send(link->owner->net, link->bearer_id,
skb, &link->media_addr);
}
+ link->snd_nxt = seqno;
}
void tipc_link_reset_all(struct tipc_node *node)
@@ -964,13 +917,13 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb,
msg = buf_msg(skb);
/* Detect repeated retransmit failures */
- if (l_ptr->last_retransmitted == msg_seqno(msg)) {
+ if (l_ptr->last_retransm == msg_seqno(msg)) {
if (++l_ptr->stale_count > 100) {
link_retransmit_failure(l_ptr, skb);
return;
}
} else {
- l_ptr->last_retransmitted = msg_seqno(msg);
+ l_ptr->last_retransm = msg_seqno(msg);
l_ptr->stale_count = 1;
}
@@ -978,7 +931,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb,
if (!retransmits)
break;
msg = buf_msg(skb);
- msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
+ msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1));
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb,
&l_ptr->media_addr);
@@ -1001,11 +954,11 @@ static bool link_synch(struct tipc_link *l)
goto synched;
/* Was last pre-synch packet added to input queue ? */
- if (less_eq(pl->next_in_no, l->synch_point))
+ if (less_eq(pl->rcv_nxt, l->synch_point))
return false;
/* Is it still in the input queue ? */
- post_synch = mod(pl->next_in_no - l->synch_point) - 1;
+ post_synch = mod(pl->rcv_nxt - l->synch_point) - 1;
if (skb_queue_len(&pl->inputq) > post_synch)
return false;
synched:
@@ -1016,13 +969,13 @@ synched:
static void link_retrieve_defq(struct tipc_link *link,
struct sk_buff_head *list)
{
- u32 seq_no;
+ u16 seq_no;
if (skb_queue_empty(&link->deferdq))
return;
seq_no = buf_seqno(skb_peek(&link->deferdq));
- if (seq_no == mod(link->next_in_no))
+ if (seq_no == link->rcv_nxt)
skb_queue_splice_tail_init(&link->deferdq, list);
}
@@ -1043,8 +996,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
struct tipc_link *l_ptr;
struct sk_buff *skb1, *tmp;
struct tipc_msg *msg;
- u32 seq_no;
- u32 ackd;
+ u16 seq_no;
+ u16 ackd;
u32 released;
skb2list(skb, &head);
@@ -1137,18 +1090,20 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
}
/* Link is now in state WORKING_WORKING */
- if (unlikely(seq_no != mod(l_ptr->next_in_no))) {
+ if (unlikely(seq_no != l_ptr->rcv_nxt)) {
link_handle_out_of_seq_msg(l_ptr, skb);
link_retrieve_defq(l_ptr, &head);
skb = NULL;
goto unlock;
}
+ l_ptr->silent_intv_cnt = 0;
+
/* Synchronize with parallel link if applicable */
if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) {
if (!link_synch(l_ptr))
goto unlock;
}
- l_ptr->next_in_no++;
+ l_ptr->rcv_nxt++;
if (unlikely(!skb_queue_empty(&l_ptr->deferdq)))
link_retrieve_defq(l_ptr, &head);
if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) {
@@ -1268,7 +1223,7 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb)
u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
{
struct sk_buff *skb1;
- u32 seq_no = buf_seqno(skb);
+ u16 seq_no = buf_seqno(skb);
/* Empty queue ? */
if (skb_queue_empty(list)) {
@@ -1284,7 +1239,7 @@ u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
/* Locate insertion point in queue, then insert; discard if duplicate */
skb_queue_walk(list, skb1) {
- u32 curr_seqno = buf_seqno(skb1);
+ u16 curr_seqno = buf_seqno(skb1);
if (seq_no == curr_seqno) {
kfree_skb(skb);
@@ -1312,14 +1267,14 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
return;
}
- /* Record OOS packet arrival (force mismatch on next timeout) */
- l_ptr->checkpoint--;
+ /* Record OOS packet arrival */
+ l_ptr->silent_intv_cnt = 0;
/*
* Discard packet if a duplicate; otherwise add it to deferred queue
* and notify peer of gap as per protocol specification
*/
- if (less(seq_no, mod(l_ptr->next_in_no))) {
+ if (less(seq_no, l_ptr->rcv_nxt)) {
l_ptr->stats.duplicates++;
kfree_skb(buf);
return;
@@ -1344,6 +1299,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
struct tipc_msg *msg = l_ptr->pmsg;
u32 msg_size = sizeof(l_ptr->proto_msg);
int r_flag;
+ u16 last_rcv;
/* Don't send protocol message during link failover */
if (l_ptr->flags & LINK_FAILINGOVER)
@@ -1360,16 +1316,14 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net));
if (msg_typ == STATE_MSG) {
- u32 next_sent = mod(l_ptr->next_out_no);
+ u16 next_sent = l_ptr->snd_nxt;
if (!tipc_link_is_up(l_ptr))
return;
- if (skb_queue_len(&l_ptr->backlogq))
- next_sent = buf_seqno(skb_peek(&l_ptr->backlogq));
msg_set_next_sent(msg, next_sent);
if (!skb_queue_empty(&l_ptr->deferdq)) {
- u32 rec = buf_seqno(skb_peek(&l_ptr->deferdq));
- gap = mod(rec - mod(l_ptr->next_in_no));
+ last_rcv = buf_seqno(skb_peek(&l_ptr->deferdq));
+ gap = mod(last_rcv - l_ptr->rcv_nxt);
}
msg_set_seq_gap(msg, gap);
if (gap)
@@ -1377,7 +1331,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
msg_set_link_tolerance(msg, tolerance);
msg_set_linkprio(msg, priority);
msg_set_max_pkt(msg, l_ptr->mtu);
- msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
+ msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1));
msg_set_probe(msg, probe_msg != 0);
if (probe_msg)
l_ptr->stats.sent_probes++;
@@ -1397,7 +1351,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
msg_set_linkprio(msg, l_ptr->priority);
msg_set_size(msg, msg_size);
- msg_set_seqno(msg, mod(l_ptr->next_out_no + (0xffff/2)));
+ msg_set_seqno(msg, mod(l_ptr->snd_nxt + (0xffff / 2)));
buf = tipc_buf_acquire(msg_size);
if (!buf)
@@ -1496,17 +1450,15 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
}
/* Record reception; force mismatch at next timeout: */
- l_ptr->checkpoint--;
+ l_ptr->silent_intv_cnt = 0;
link_state_event(l_ptr, TRAFFIC_MSG_EVT);
l_ptr->stats.recv_states++;
if (link_reset_unknown(l_ptr))
break;
- if (less_eq(mod(l_ptr->next_in_no), msg_next_sent(msg))) {
- rec_gap = mod(msg_next_sent(msg) -
- mod(l_ptr->next_in_no));
- }
+ if (less_eq(l_ptr->rcv_nxt, msg_next_sent(msg)))
+ rec_gap = mod(msg_next_sent(msg) - l_ptr->rcv_nxt);
if (msg_probe(msg))
l_ptr->stats.recv_probes++;
@@ -1580,6 +1532,11 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, TUNNEL_PROTOCOL,
FAILOVER_MSG, INT_H_SIZE, l_ptr->addr);
+
+ skb_queue_walk(&l_ptr->backlogq, skb) {
+ msg_set_seqno(buf_msg(skb), l_ptr->snd_nxt);
+ l_ptr->snd_nxt = mod(l_ptr->snd_nxt + 1);
+ }
skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq);
tipc_link_purge_backlog(l_ptr);
msgcount = skb_queue_len(&l_ptr->transmq);
@@ -1640,6 +1597,7 @@ void tipc_link_dup_queue_xmit(struct tipc_link *link,
struct tipc_msg tnl_hdr;
struct sk_buff_head *queue = &link->transmq;
int mcnt;
+ u16 seqno;
tipc_msg_init(link_own_addr(link), &tnl_hdr, TUNNEL_PROTOCOL,
SYNCH_MSG, INT_H_SIZE, link->addr);
@@ -1653,7 +1611,7 @@ tunnel_queue:
struct tipc_msg *msg = buf_msg(skb);
u32 len = msg_size(msg);
- msg_set_ack(msg, mod(link->next_in_no - 1));
+ msg_set_ack(msg, mod(link->rcv_nxt - 1));
msg_set_bcast_ack(msg, link->owner->bclink.last_in);
msg_set_size(&tnl_hdr, len + INT_H_SIZE);
outskb = tipc_buf_acquire(len + INT_H_SIZE);
@@ -1671,6 +1629,11 @@ tunnel_queue:
}
if (queue == &link->backlogq)
return;
+ seqno = link->snd_nxt;
+ skb_queue_walk(&link->backlogq, skb) {
+ msg_set_seqno(buf_msg(skb), seqno);
+ seqno = mod(seqno + 1);
+ }
queue = &link->backlogq;
goto tunnel_queue;
}
@@ -1742,8 +1705,8 @@ static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol)
return;
l_ptr->tolerance = tol;
- l_ptr->cont_intv = msecs_to_jiffies(intv);
- l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->cont_intv) / 4);
+ l_ptr->keepalive_intv = msecs_to_jiffies(intv);
+ l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->keepalive_intv));
}
void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
@@ -1803,8 +1766,8 @@ static struct tipc_node *tipc_link_find_owner(struct net *net,
static void link_reset_statistics(struct tipc_link *l_ptr)
{
memset(&l_ptr->stats, 0, sizeof(l_ptr->stats));
- l_ptr->stats.sent_info = l_ptr->next_out_no;
- l_ptr->stats.recv_info = l_ptr->next_in_no;
+ l_ptr->stats.sent_info = l_ptr->snd_nxt;
+ l_ptr->stats.recv_info = l_ptr->rcv_nxt;
}
static void link_print(struct tipc_link *l_ptr, const char *str)
@@ -1893,6 +1856,9 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
+ if (strcmp(name, tipc_bclink_name) == 0)
+ return tipc_nl_bc_link_set(net, attrs);
+
node = tipc_link_find_owner(net, name, &bearer_id);
if (!node)
return -EINVAL;
@@ -2034,9 +2000,9 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
goto attr_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu))
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->next_in_no))
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->rcv_nxt))
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->next_out_no))
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->snd_nxt))
goto attr_msg_full;
if (tipc_link_is_up(link))
@@ -2175,50 +2141,53 @@ out:
int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
- struct sk_buff *ans_skb;
struct tipc_nl_msg msg;
- struct tipc_link *link;
- struct tipc_node *node;
char *name;
- int bearer_id;
int err;
+ msg.portid = info->snd_portid;
+ msg.seq = info->snd_seq;
+
if (!info->attrs[TIPC_NLA_LINK_NAME])
return -EINVAL;
-
name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]);
- node = tipc_link_find_owner(net, name, &bearer_id);
- if (!node)
- return -EINVAL;
- ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!ans_skb)
+ msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg.skb)
return -ENOMEM;
- msg.skb = ans_skb;
- msg.portid = info->snd_portid;
- msg.seq = info->snd_seq;
-
- tipc_node_lock(node);
- link = node->links[bearer_id];
- if (!link) {
- err = -EINVAL;
- goto err_out;
- }
-
- err = __tipc_nl_add_link(net, &msg, link, 0);
- if (err)
- goto err_out;
+ if (strcmp(name, tipc_bclink_name) == 0) {
+ err = tipc_nl_add_bc_link(net, &msg);
+ if (err) {
+ nlmsg_free(msg.skb);
+ return err;
+ }
+ } else {
+ int bearer_id;
+ struct tipc_node *node;
+ struct tipc_link *link;
- tipc_node_unlock(node);
+ node = tipc_link_find_owner(net, name, &bearer_id);
+ if (!node)
+ return -EINVAL;
- return genlmsg_reply(ans_skb, info);
+ tipc_node_lock(node);
+ link = node->links[bearer_id];
+ if (!link) {
+ tipc_node_unlock(node);
+ nlmsg_free(msg.skb);
+ return -EINVAL;
+ }
-err_out:
- tipc_node_unlock(node);
- nlmsg_free(ans_skb);
+ err = __tipc_nl_add_link(net, &msg, link, 0);
+ tipc_node_unlock(node);
+ if (err) {
+ nlmsg_free(msg.skb);
+ return err;
+ }
+ }
- return err;
+ return genlmsg_reply(msg.skb, info);
}
int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/tipc/link.h b/net/tipc/link.h
index b5b4e3554..ae0a0ea57 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -107,30 +107,29 @@ struct tipc_stats {
* @owner: pointer to peer node
* @refcnt: reference counter for permanent references (owner node & timer)
* @flags: execution state flags for link endpoint instance
- * @checkpoint: reference point for triggering link continuity checking
* @peer_session: link session # being used by peer end of link
* @peer_bearer_id: bearer id used by link's peer endpoint
* @bearer_id: local bearer id used by link
* @tolerance: minimum link continuity loss needed to reset link [in ms]
- * @cont_intv: link continuity testing interval
+ * @keepalive_intv: link keepalive timer interval
* @abort_limit: # of unacknowledged continuity probes needed to reset link
* @state: current state of link FSM
- * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state
+ * @silent_intv_cnt: # of timer intervals without any reception from peer
* @proto_msg: template for control messages generated by link
* @pmsg: convenience pointer to "proto_msg" field
* @priority: current link priority
* @net_plane: current link network plane ('A' through 'H')
* @backlog_limit: backlog queue congestion thresholds (indexed by importance)
* @exp_msg_count: # of tunnelled messages expected during link changeover
- * @reset_checkpoint: seq # of last acknowledged message at time of link reset
+ * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
* @mtu: current maximum packet size for this link
* @advertised_mtu: advertised own mtu when link is being established
* @transmitq: queue for sent, non-acked messages
* @backlogq: queue for messages waiting to be sent
- * @next_out_no: next sequence number to use for outbound messages
+ * @snt_nxt: next sequence number to use for outbound messages
* @last_retransmitted: sequence number of most recently retransmitted message
* @stale_count: # of identical retransmit requests made by peer
- * @next_in_no: next sequence number to expect for inbound messages
+ * @rcv_nxt: next sequence number to expect for inbound messages
* @deferred_queue: deferred queue saved OOS b'cast message received from node
* @unacked_window: # of inbound messages rx'd without ack'ing back to peer
* @inputq: buffer queue for messages to be delivered upwards
@@ -151,15 +150,14 @@ struct tipc_link {
/* Management and link supervision data */
unsigned int flags;
- u32 checkpoint;
u32 peer_session;
u32 peer_bearer_id;
u32 bearer_id;
u32 tolerance;
- unsigned long cont_intv;
+ unsigned long keepalive_intv;
u32 abort_limit;
int state;
- u32 fsm_msg_cnt;
+ u32 silent_intv_cnt;
struct {
unchar hdr[INT_H_SIZE];
unchar body[TIPC_MAX_IF_NAME];
@@ -185,13 +183,13 @@ struct tipc_link {
u16 len;
u16 limit;
} backlog[5];
- u32 next_out_no;
+ u16 snd_nxt;
+ u16 last_retransm;
u32 window;
- u32 last_retransmitted;
u32 stale_count;
/* Reception */
- u32 next_in_no;
+ u16 rcv_nxt;
u32 rcv_unacked;
struct sk_buff_head deferdq;
struct sk_buff_head inputq;
@@ -213,17 +211,16 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
struct tipc_bearer *b_ptr,
const struct tipc_media_addr *media_addr);
void tipc_link_delete(struct tipc_link *link);
-void tipc_link_delete_list(struct net *net, unsigned int bearer_id,
- bool shutting_down);
+void tipc_link_delete_list(struct net *net, unsigned int bearer_id);
void tipc_link_failover_send_queue(struct tipc_link *l_ptr);
void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest);
void tipc_link_reset_fragments(struct tipc_link *l_ptr);
int tipc_link_is_up(struct tipc_link *l_ptr);
int tipc_link_is_active(struct tipc_link *l_ptr);
void tipc_link_purge_queues(struct tipc_link *l_ptr);
+void tipc_link_purge_backlog(struct tipc_link *l);
void tipc_link_reset_all(struct tipc_node *node);
void tipc_link_reset(struct tipc_link *l_ptr);
-void tipc_link_reset_list(struct net *net, unsigned int bearer_id);
int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
u32 selector);
int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest,
@@ -247,39 +244,6 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
void link_prepare_wakeup(struct tipc_link *l);
-/*
- * Link sequence number manipulation routines (uses modulo 2**16 arithmetic)
- */
-static inline u32 buf_seqno(struct sk_buff *buf)
-{
- return msg_seqno(buf_msg(buf));
-}
-
-static inline u32 mod(u32 x)
-{
- return x & 0xffffu;
-}
-
-static inline int less_eq(u32 left, u32 right)
-{
- return mod(right - left) < 32768u;
-}
-
-static inline int more(u32 left, u32 right)
-{
- return !less_eq(left, right);
-}
-
-static inline int less(u32 left, u32 right)
-{
- return less_eq(left, right) && (mod(right) != mod(left));
-}
-
-static inline u32 lesser(u32 left, u32 right)
-{
- return less_eq(left, right) ? left : right;
-}
-
static inline u32 link_own_addr(struct tipc_link *l)
{
return msg_prevnode(l->pmsg);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index c3e96e815..08b4cc7d4 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -331,16 +331,15 @@ error:
/**
* tipc_msg_bundle(): Append contents of a buffer to tail of an existing one
- * @bskb: the buffer to append to ("bundle")
- * @skb: buffer to be appended
+ * @skb: the buffer to append to ("bundle")
+ * @msg: message to be appended
* @mtu: max allowable size for the bundle buffer
* Consumes buffer if successful
* Returns true if bundling could be performed, otherwise false
*/
-bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu)
+bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu)
{
struct tipc_msg *bmsg;
- struct tipc_msg *msg = buf_msg(skb);
unsigned int bsz;
unsigned int msz = msg_size(msg);
u32 start, pad;
@@ -348,9 +347,9 @@ bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu)
if (likely(msg_user(msg) == MSG_FRAGMENTER))
return false;
- if (!bskb)
+ if (!skb)
return false;
- bmsg = buf_msg(bskb);
+ bmsg = buf_msg(skb);
bsz = msg_size(bmsg);
start = align(bsz);
pad = start - bsz;
@@ -359,18 +358,20 @@ bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu)
return false;
if (unlikely(msg_user(msg) == BCAST_PROTOCOL))
return false;
- if (likely(msg_user(bmsg) != MSG_BUNDLER))
+ if (unlikely(msg_user(bmsg) != MSG_BUNDLER))
return false;
- if (unlikely(skb_tailroom(bskb) < (pad + msz)))
+ if (unlikely(skb_tailroom(skb) < (pad + msz)))
return false;
if (unlikely(max < (start + msz)))
return false;
+ if ((msg_importance(msg) < TIPC_SYSTEM_IMPORTANCE) &&
+ (msg_importance(bmsg) == TIPC_SYSTEM_IMPORTANCE))
+ return false;
- skb_put(bskb, pad + msz);
- skb_copy_to_linear_data_offset(bskb, start, skb->data, msz);
+ skb_put(skb, pad + msz);
+ skb_copy_to_linear_data_offset(skb, start, msg, msz);
msg_set_size(bmsg, start + msz);
msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
- kfree_skb(skb);
return true;
}
@@ -416,18 +417,18 @@ none:
/**
* tipc_msg_make_bundle(): Create bundle buf and append message to its tail
- * @list: the buffer chain
- * @skb: buffer to be appended and replaced
+ * @list: the buffer chain, where head is the buffer to replace/append
+ * @skb: buffer to be created, appended to and returned in case of success
+ * @msg: message to be appended
* @mtu: max allowable size for the bundle buffer, inclusive header
* @dnode: destination node for message. (Not always present in header)
- * Replaces buffer if successful
* Returns true if success, otherwise false
*/
-bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode)
+bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
+ u32 mtu, u32 dnode)
{
- struct sk_buff *bskb;
+ struct sk_buff *_skb;
struct tipc_msg *bmsg;
- struct tipc_msg *msg = buf_msg(*skb);
u32 msz = msg_size(msg);
u32 max = mtu - INT_H_SIZE;
@@ -440,19 +441,23 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode)
if (msz > (max / 2))
return false;
- bskb = tipc_buf_acquire(max);
- if (!bskb)
+ _skb = tipc_buf_acquire(max);
+ if (!_skb)
return false;
- skb_trim(bskb, INT_H_SIZE);
- bmsg = buf_msg(bskb);
+ skb_trim(_skb, INT_H_SIZE);
+ bmsg = buf_msg(_skb);
tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
INT_H_SIZE, dnode);
+ if (msg_isdata(msg))
+ msg_set_importance(bmsg, TIPC_CRITICAL_IMPORTANCE);
+ else
+ msg_set_importance(bmsg, TIPC_SYSTEM_IMPORTANCE);
msg_set_seqno(bmsg, msg_seqno(msg));
msg_set_ack(bmsg, msg_ack(msg));
msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
- tipc_msg_bundle(bskb, *skb, mtu);
- *skb = bskb;
+ tipc_msg_bundle(_skb, msg, mtu);
+ *skb = _skb;
return true;
}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index e1d3595e2..19c45fb66 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -313,12 +313,12 @@ static inline void msg_set_lookup_scope(struct tipc_msg *m, u32 n)
msg_set_bits(m, 1, 19, 0x3, n);
}
-static inline u32 msg_bcast_ack(struct tipc_msg *m)
+static inline u16 msg_bcast_ack(struct tipc_msg *m)
{
return msg_bits(m, 1, 0, 0xffff);
}
-static inline void msg_set_bcast_ack(struct tipc_msg *m, u32 n)
+static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 1, 0, 0xffff, n);
}
@@ -327,22 +327,22 @@ static inline void msg_set_bcast_ack(struct tipc_msg *m, u32 n)
/*
* Word 2
*/
-static inline u32 msg_ack(struct tipc_msg *m)
+static inline u16 msg_ack(struct tipc_msg *m)
{
return msg_bits(m, 2, 16, 0xffff);
}
-static inline void msg_set_ack(struct tipc_msg *m, u32 n)
+static inline void msg_set_ack(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 2, 16, 0xffff, n);
}
-static inline u32 msg_seqno(struct tipc_msg *m)
+static inline u16 msg_seqno(struct tipc_msg *m)
{
return msg_bits(m, 2, 0, 0xffff);
}
-static inline void msg_set_seqno(struct tipc_msg *m, u32 n)
+static inline void msg_set_seqno(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 2, 0, 0xffff, n);
}
@@ -352,18 +352,22 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n)
*/
static inline u32 msg_importance(struct tipc_msg *m)
{
- if (unlikely(msg_user(m) == MSG_FRAGMENTER))
+ int usr = msg_user(m);
+
+ if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m)))
+ return usr;
+ if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))
return msg_bits(m, 5, 13, 0x7);
- if (likely(msg_isdata(m) && !msg_errcode(m)))
- return msg_user(m);
return TIPC_SYSTEM_IMPORTANCE;
}
static inline void msg_set_importance(struct tipc_msg *m, u32 i)
{
- if (unlikely(msg_user(m) == MSG_FRAGMENTER))
+ int usr = msg_user(m);
+
+ if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)))
msg_set_bits(m, 5, 13, 0x7, i);
- else if (likely(i < TIPC_SYSTEM_IMPORTANCE))
+ else if (i < TIPC_SYSTEM_IMPORTANCE)
msg_set_user(m, i);
else
pr_warn("Trying to set illegal importance in message\n");
@@ -772,9 +776,9 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
uint data_sz, u32 dnode, u32 onode,
u32 dport, u32 oport, int errcode);
int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
-bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu);
-
-bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode);
+bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu);
+bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
+ u32 mtu, u32 dnode);
bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
@@ -782,6 +786,11 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode,
int *err);
struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
+static inline u16 buf_seqno(struct sk_buff *skb)
+{
+ return msg_seqno(buf_msg(skb));
+}
+
/* tipc_skb_peek(): peek and reserve first buffer in list
* @list: list to be peeked in
* Returns pointer to first buffer in list, if any
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ab0ac62a1..0f47f08bf 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -330,13 +330,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
/* Any subscriptions waiting for notification? */
list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
- tipc_subscr_report_overlap(s,
- publ->lower,
- publ->upper,
- TIPC_PUBLISHED,
- publ->ref,
- publ->node,
- created_subseq);
+ tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
+ TIPC_PUBLISHED, publ->ref,
+ publ->node, created_subseq);
}
return publ;
}
@@ -404,13 +400,9 @@ found:
/* Notify any waiting subscriptions */
list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
- tipc_subscr_report_overlap(s,
- publ->lower,
- publ->upper,
- TIPC_WITHDRAWN,
- publ->ref,
- publ->node,
- removed_subseq);
+ tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
+ TIPC_WITHDRAWN, publ->ref,
+ publ->node, removed_subseq);
}
return publ;
@@ -432,19 +424,17 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
return;
while (sseq != &nseq->sseqs[nseq->first_free]) {
- if (tipc_subscr_overlap(s, sseq->lower, sseq->upper)) {
+ if (tipc_subscrp_check_overlap(s, sseq->lower, sseq->upper)) {
struct publication *crs;
struct name_info *info = sseq->info;
int must_report = 1;
list_for_each_entry(crs, &info->zone_list, zone_list) {
- tipc_subscr_report_overlap(s,
- sseq->lower,
- sseq->upper,
- TIPC_PUBLISHED,
- crs->ref,
- crs->node,
- must_report);
+ tipc_subscrp_report_overlap(s, sseq->lower,
+ sseq->upper,
+ TIPC_PUBLISHED,
+ crs->ref, crs->node,
+ must_report);
must_report = 0;
}
}
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a54f3cbe2..d6d1399ae 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -40,6 +40,7 @@
#include "subscr.h"
#include "socket.h"
#include "node.h"
+#include "bcast.h"
static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
[TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC },
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index ce9121e8e..53e0fee80 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -55,6 +55,7 @@ struct tipc_nl_compat_msg {
int rep_type;
int rep_size;
int req_type;
+ struct net *net;
struct sk_buff *rep;
struct tlv_desc *req;
struct sock *dst_sk;
@@ -68,7 +69,8 @@ struct tipc_nl_compat_cmd_dump {
struct tipc_nl_compat_cmd_doit {
int (*doit)(struct sk_buff *skb, struct genl_info *info);
- int (*transcode)(struct sk_buff *skb, struct tipc_nl_compat_msg *msg);
+ int (*transcode)(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb, struct tipc_nl_compat_msg *msg);
};
static int tipc_skb_tailroom(struct sk_buff *skb)
@@ -281,7 +283,7 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
if (!trans_buf)
return -ENOMEM;
- err = (*cmd->transcode)(trans_buf, msg);
+ err = (*cmd->transcode)(cmd, trans_buf, msg);
if (err)
goto trans_out;
@@ -353,7 +355,8 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
nla_len(bearer[TIPC_NLA_BEARER_NAME]));
}
-static int tipc_nl_compat_bearer_enable(struct sk_buff *skb,
+static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb,
struct tipc_nl_compat_msg *msg)
{
struct nlattr *prop;
@@ -385,7 +388,8 @@ static int tipc_nl_compat_bearer_enable(struct sk_buff *skb,
return 0;
}
-static int tipc_nl_compat_bearer_disable(struct sk_buff *skb,
+static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb,
struct tipc_nl_compat_msg *msg)
{
char *name;
@@ -576,11 +580,81 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
&link_info, sizeof(link_info));
}
-static int tipc_nl_compat_link_set(struct sk_buff *skb,
- struct tipc_nl_compat_msg *msg)
+static int __tipc_add_link_prop(struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg,
+ struct tipc_link_config *lc)
+{
+ switch (msg->cmd) {
+ case TIPC_CMD_SET_LINK_PRI:
+ return nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value));
+ case TIPC_CMD_SET_LINK_TOL:
+ return nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value));
+ case TIPC_CMD_SET_LINK_WINDOW:
+ return nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value));
+ }
+
+ return -EINVAL;
+}
+
+static int tipc_nl_compat_media_set(struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
{
- struct nlattr *link;
struct nlattr *prop;
+ struct nlattr *media;
+ struct tipc_link_config *lc;
+
+ lc = (struct tipc_link_config *)TLV_DATA(msg->req);
+
+ media = nla_nest_start(skb, TIPC_NLA_MEDIA);
+ if (!media)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name))
+ return -EMSGSIZE;
+
+ prop = nla_nest_start(skb, TIPC_NLA_MEDIA_PROP);
+ if (!prop)
+ return -EMSGSIZE;
+
+ __tipc_add_link_prop(skb, msg, lc);
+ nla_nest_end(skb, prop);
+ nla_nest_end(skb, media);
+
+ return 0;
+}
+
+static int tipc_nl_compat_bearer_set(struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
+{
+ struct nlattr *prop;
+ struct nlattr *bearer;
+ struct tipc_link_config *lc;
+
+ lc = (struct tipc_link_config *)TLV_DATA(msg->req);
+
+ bearer = nla_nest_start(skb, TIPC_NLA_BEARER);
+ if (!bearer)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name))
+ return -EMSGSIZE;
+
+ prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP);
+ if (!prop)
+ return -EMSGSIZE;
+
+ __tipc_add_link_prop(skb, msg, lc);
+ nla_nest_end(skb, prop);
+ nla_nest_end(skb, bearer);
+
+ return 0;
+}
+
+static int __tipc_nl_compat_link_set(struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
+{
+ struct nlattr *prop;
+ struct nlattr *link;
struct tipc_link_config *lc;
lc = (struct tipc_link_config *)TLV_DATA(msg->req);
@@ -596,24 +670,40 @@ static int tipc_nl_compat_link_set(struct sk_buff *skb,
if (!prop)
return -EMSGSIZE;
- if (msg->cmd == TIPC_CMD_SET_LINK_PRI) {
- if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value)))
- return -EMSGSIZE;
- } else if (msg->cmd == TIPC_CMD_SET_LINK_TOL) {
- if (nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value)))
- return -EMSGSIZE;
- } else if (msg->cmd == TIPC_CMD_SET_LINK_WINDOW) {
- if (nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value)))
- return -EMSGSIZE;
- }
-
+ __tipc_add_link_prop(skb, msg, lc);
nla_nest_end(skb, prop);
nla_nest_end(skb, link);
return 0;
}
-static int tipc_nl_compat_link_reset_stats(struct sk_buff *skb,
+static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
+{
+ struct tipc_link_config *lc;
+ struct tipc_bearer *bearer;
+ struct tipc_media *media;
+
+ lc = (struct tipc_link_config *)TLV_DATA(msg->req);
+
+ media = tipc_media_find(lc->name);
+ if (media) {
+ cmd->doit = &tipc_nl_media_set;
+ return tipc_nl_compat_media_set(skb, msg);
+ }
+
+ bearer = tipc_bearer_find(msg->net, lc->name);
+ if (bearer) {
+ cmd->doit = &tipc_nl_bearer_set;
+ return tipc_nl_compat_bearer_set(skb, msg);
+ }
+
+ return __tipc_nl_compat_link_set(skb, msg);
+}
+
+static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb,
struct tipc_nl_compat_msg *msg)
{
char *name;
@@ -851,7 +941,8 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg,
sizeof(node_info));
}
-static int tipc_nl_compat_net_set(struct sk_buff *skb,
+static int tipc_nl_compat_net_set(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb,
struct tipc_nl_compat_msg *msg)
{
u32 val;
@@ -1007,7 +1098,6 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
struct nlmsghdr *req_nlh;
struct nlmsghdr *rep_nlh;
struct tipc_genlmsghdr *req_userhdr = info->userhdr;
- struct net *net = genl_info_net(info);
memset(&msg, 0, sizeof(msg));
@@ -1015,6 +1105,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
msg.cmd = req_userhdr->cmd;
msg.dst_sk = info->dst_sk;
+ msg.net = genl_info_net(info);
if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) {
msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN);
@@ -1030,7 +1121,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
}
err = tipc_nl_compat_handle(&msg);
- if (err == -EOPNOTSUPP)
+ if ((err == -EOPNOTSUPP) || (err == -EPERM))
msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
else if (err == -EINVAL)
msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR);
@@ -1043,7 +1134,7 @@ send:
rep_nlh = nlmsg_hdr(msg.rep);
memcpy(rep_nlh, info->nlhdr, len);
rep_nlh->nlmsg_len = msg.rep->len;
- genlmsg_unicast(net, msg.rep, NETLINK_CB(skb).portid);
+ genlmsg_unicast(msg.net, msg.rep, NETLINK_CB(skb).portid);
return err;
}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 22c059ad2..0b1d61a5f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1,7 +1,7 @@
/*
* net/tipc/node.c: TIPC node management routines
*
- * Copyright (c) 2000-2006, 2012-2014, Ericsson AB
+ * Copyright (c) 2000-2006, 2012-2015, Ericsson AB
* Copyright (c) 2005-2006, 2010-2014, Wind River Systems
* All rights reserved.
*
@@ -39,6 +39,7 @@
#include "node.h"
#include "name_distr.h"
#include "socket.h"
+#include "bcast.h"
static void node_lost_contact(struct tipc_node *n_ptr);
static void node_established_contact(struct tipc_node *n_ptr);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 02d5c20dc..5a834cf14 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -45,8 +45,6 @@
/* Out-of-range value for node signature */
#define INVALID_NODE_SIG 0x10000
-#define NODE_HTABLE_SIZE 512
-
/* Flags used to take different actions according to flag type
* TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down
* TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 77ff03ed1..922e04a43 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -309,6 +309,10 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
/* Notify that new connection is incoming */
newcon->usr_data = s->tipc_conn_new(newcon->conid);
+ if (!newcon->usr_data) {
+ sock_release(newsock);
+ return -ENOMEM;
+ }
/* Wake up receive process in case of 'SYN+' message */
newsock->sk->sk_data_ready(newsock->sk);
@@ -321,7 +325,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
struct socket *sock = NULL;
int ret;
- ret = __sock_create(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock, 1);
+ ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock);
if (ret < 0)
return NULL;
ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f485600c4..3a7567f69 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -41,6 +41,7 @@
#include "link.h"
#include "name_distr.h"
#include "socket.h"
+#include "bcast.h"
#define SS_LISTENING -1 /* socket is listening */
#define SS_READY -2 /* socket is connectionless */
@@ -342,7 +343,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
}
/* Allocate socket's protocol area */
- sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
+ sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
if (sk == NULL)
return -ENOMEM;
@@ -409,7 +410,7 @@ static int tipc_release(struct socket *sock)
struct net *net;
struct tipc_sock *tsk;
struct sk_buff *skb;
- u32 dnode, probing_state;
+ u32 dnode;
/*
* Exit if socket isn't fully initialized (occurs when a failed accept()
@@ -447,10 +448,7 @@ static int tipc_release(struct socket *sock)
}
tipc_sk_withdraw(tsk, 0, NULL);
- probing_state = tsk->probing_state;
- if (del_timer_sync(&sk->sk_timer) &&
- probing_state != TIPC_CONN_PROBING)
- sock_put(sk);
+ sk_stop_timer(sk, &sk->sk_timer);
tipc_sk_remove(tsk);
if (tsk->connected) {
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
@@ -2009,6 +2007,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
if (res)
goto exit;
+ security_sk_clone(sock->sk, new_sock->sk);
new_sk = new_sock->sk;
new_tsock = tipc_sk(new_sk);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 1c147c869..350cca33e 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -40,16 +40,21 @@
/**
* struct tipc_subscriber - TIPC network topology subscriber
+ * @kref: reference counter to tipc_subscription object
* @conid: connection identifier to server connecting to subscriber
* @lock: control access to subscriber
- * @subscription_list: list of subscription objects for this subscriber
+ * @subscrp_list: list of subscription objects for this subscriber
*/
struct tipc_subscriber {
+ struct kref kref;
int conid;
spinlock_t lock;
- struct list_head subscription_list;
+ struct list_head subscrp_list;
};
+static void tipc_subscrp_delete(struct tipc_subscription *sub);
+static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
+
/**
* htohl - convert value to endianness used by destination
* @in: value to convert
@@ -62,9 +67,9 @@ static u32 htohl(u32 in, int swap)
return swap ? swab32(in) : in;
}
-static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper, u32 event, u32 port_ref,
- u32 node)
+static void tipc_subscrp_send_event(struct tipc_subscription *sub,
+ u32 found_lower, u32 found_upper,
+ u32 event, u32 port_ref, u32 node)
{
struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
struct tipc_subscriber *subscriber = sub->subscriber;
@@ -82,12 +87,13 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower,
}
/**
- * tipc_subscr_overlap - test for subscription overlap with the given values
+ * tipc_subscrp_check_overlap - test for subscription overlap with the
+ * given values
*
* Returns 1 if there is overlap, otherwise 0.
*/
-int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper)
+int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower,
+ u32 found_upper)
{
if (found_lower < sub->seq.lower)
found_lower = sub->seq.lower;
@@ -98,138 +104,121 @@ int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower,
return 1;
}
-/**
- * tipc_subscr_report_overlap - issue event if there is subscription overlap
- *
- * Protected by nameseq.lock in name_table.c
- */
-void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper, u32 event, u32 port_ref,
- u32 node, int must)
+void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
+ u32 found_upper, u32 event, u32 port_ref,
+ u32 node, int must)
{
- if (!tipc_subscr_overlap(sub, found_lower, found_upper))
+ if (!tipc_subscrp_check_overlap(sub, found_lower, found_upper))
return;
if (!must && !(sub->filter & TIPC_SUB_PORTS))
return;
- subscr_send_event(sub, found_lower, found_upper, event, port_ref, node);
+ tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
+ node);
}
-static void subscr_timeout(unsigned long data)
+static void tipc_subscrp_timeout(unsigned long data)
{
struct tipc_subscription *sub = (struct tipc_subscription *)data;
struct tipc_subscriber *subscriber = sub->subscriber;
- struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
- /* The spin lock per subscriber is used to protect its members */
- spin_lock_bh(&subscriber->lock);
+ /* Notify subscriber of timeout */
+ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
+ TIPC_SUBSCR_TIMEOUT, 0, 0);
- /* Validate timeout (in case subscription is being cancelled) */
- if (sub->timeout == TIPC_WAIT_FOREVER) {
- spin_unlock_bh(&subscriber->lock);
- return;
- }
+ spin_lock_bh(&subscriber->lock);
+ tipc_subscrp_delete(sub);
+ spin_unlock_bh(&subscriber->lock);
- /* Unlink subscription from name table */
- tipc_nametbl_unsubscribe(sub);
+ tipc_subscrb_put(subscriber);
+}
- /* Unlink subscription from subscriber */
- list_del(&sub->subscription_list);
+static void tipc_subscrb_kref_release(struct kref *kref)
+{
+ struct tipc_subscriber *subcriber = container_of(kref,
+ struct tipc_subscriber, kref);
- spin_unlock_bh(&subscriber->lock);
+ kfree(subcriber);
+}
- /* Notify subscriber of timeout */
- subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
- TIPC_SUBSCR_TIMEOUT, 0, 0);
+static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
+{
+ kref_put(&subscriber->kref, tipc_subscrb_kref_release);
+}
- /* Now destroy subscription */
- kfree(sub);
- atomic_dec(&tn->subscription_count);
+static void tipc_subscrb_get(struct tipc_subscriber *subscriber)
+{
+ kref_get(&subscriber->kref);
}
-/**
- * subscr_del - delete a subscription within a subscription list
- *
- * Called with subscriber lock held.
- */
-static void subscr_del(struct tipc_subscription *sub)
+static struct tipc_subscriber *tipc_subscrb_create(int conid)
{
- struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
+ struct tipc_subscriber *subscriber;
- tipc_nametbl_unsubscribe(sub);
- list_del(&sub->subscription_list);
- kfree(sub);
- atomic_dec(&tn->subscription_count);
+ subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC);
+ if (!subscriber) {
+ pr_warn("Subscriber rejected, no memory\n");
+ return NULL;
+ }
+ kref_init(&subscriber->kref);
+ INIT_LIST_HEAD(&subscriber->subscrp_list);
+ subscriber->conid = conid;
+ spin_lock_init(&subscriber->lock);
+
+ return subscriber;
}
-static void subscr_release(struct tipc_subscriber *subscriber)
+static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
{
- struct tipc_subscription *sub;
- struct tipc_subscription *sub_temp;
+ struct tipc_subscription *sub, *temp;
spin_lock_bh(&subscriber->lock);
-
/* Destroy any existing subscriptions for subscriber */
- list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
- subscription_list) {
- if (sub->timeout != TIPC_WAIT_FOREVER) {
- spin_unlock_bh(&subscriber->lock);
- del_timer_sync(&sub->timer);
- spin_lock_bh(&subscriber->lock);
+ list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
+ subscrp_list) {
+ if (del_timer(&sub->timer)) {
+ tipc_subscrp_delete(sub);
+ tipc_subscrb_put(subscriber);
}
- subscr_del(sub);
}
spin_unlock_bh(&subscriber->lock);
- /* Now destroy subscriber */
- kfree(subscriber);
+ tipc_subscrb_put(subscriber);
}
-/**
- * subscr_cancel - handle subscription cancellation request
- *
- * Called with subscriber lock held. Routine must temporarily release lock
- * to enable the subscription timeout routine to finish without deadlocking;
- * the lock is then reclaimed to allow caller to release it upon return.
- *
- * Note that fields of 's' use subscriber's endianness!
- */
-static void subscr_cancel(struct tipc_subscr *s,
- struct tipc_subscriber *subscriber)
+static void tipc_subscrp_delete(struct tipc_subscription *sub)
{
- struct tipc_subscription *sub;
- struct tipc_subscription *sub_temp;
- int found = 0;
+ struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
+
+ tipc_nametbl_unsubscribe(sub);
+ list_del(&sub->subscrp_list);
+ kfree(sub);
+ atomic_dec(&tn->subscription_count);
+}
+static void tipc_subscrp_cancel(struct tipc_subscr *s,
+ struct tipc_subscriber *subscriber)
+{
+ struct tipc_subscription *sub, *temp;
+
+ spin_lock_bh(&subscriber->lock);
/* Find first matching subscription, exit if not found */
- list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
- subscription_list) {
+ list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
+ subscrp_list) {
if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
- found = 1;
+ if (del_timer(&sub->timer)) {
+ tipc_subscrp_delete(sub);
+ tipc_subscrb_put(subscriber);
+ }
break;
}
}
- if (!found)
- return;
-
- /* Cancel subscription timer (if used), then delete subscription */
- if (sub->timeout != TIPC_WAIT_FOREVER) {
- sub->timeout = TIPC_WAIT_FOREVER;
- spin_unlock_bh(&subscriber->lock);
- del_timer_sync(&sub->timer);
- spin_lock_bh(&subscriber->lock);
- }
- subscr_del(sub);
+ spin_unlock_bh(&subscriber->lock);
}
-/**
- * subscr_subscribe - create subscription for subscriber
- *
- * Called with subscriber lock held.
- */
-static int subscr_subscribe(struct net *net, struct tipc_subscr *s,
- struct tipc_subscriber *subscriber,
- struct tipc_subscription **sub_p)
+static int tipc_subscrp_create(struct net *net, struct tipc_subscr *s,
+ struct tipc_subscriber *subscriber,
+ struct tipc_subscription **sub_p)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_subscription *sub;
@@ -241,7 +230,7 @@ static int subscr_subscribe(struct net *net, struct tipc_subscr *s,
/* Detect & process a subscription cancellation request */
if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
- subscr_cancel(s, subscriber);
+ tipc_subscrp_cancel(s, subscriber);
return 0;
}
@@ -273,62 +262,51 @@ static int subscr_subscribe(struct net *net, struct tipc_subscr *s,
kfree(sub);
return -EINVAL;
}
- list_add(&sub->subscription_list, &subscriber->subscription_list);
+ spin_lock_bh(&subscriber->lock);
+ list_add(&sub->subscrp_list, &subscriber->subscrp_list);
+ spin_unlock_bh(&subscriber->lock);
sub->subscriber = subscriber;
sub->swap = swap;
- memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
+ memcpy(&sub->evt.s, s, sizeof(*s));
atomic_inc(&tn->subscription_count);
- if (sub->timeout != TIPC_WAIT_FOREVER) {
- setup_timer(&sub->timer, subscr_timeout, (unsigned long)sub);
- mod_timer(&sub->timer, jiffies + sub->timeout);
- }
+ setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
+ if (sub->timeout != TIPC_WAIT_FOREVER)
+ sub->timeout += jiffies;
+ if (!mod_timer(&sub->timer, sub->timeout))
+ tipc_subscrb_get(subscriber);
*sub_p = sub;
return 0;
}
/* Handle one termination request for the subscriber */
-static void subscr_conn_shutdown_event(int conid, void *usr_data)
+static void tipc_subscrb_shutdown_cb(int conid, void *usr_data)
{
- subscr_release((struct tipc_subscriber *)usr_data);
+ tipc_subscrb_delete((struct tipc_subscriber *)usr_data);
}
/* Handle one request to create a new subscription for the subscriber */
-static void subscr_conn_msg_event(struct net *net, int conid,
- struct sockaddr_tipc *addr, void *usr_data,
- void *buf, size_t len)
+static void tipc_subscrb_rcv_cb(struct net *net, int conid,
+ struct sockaddr_tipc *addr, void *usr_data,
+ void *buf, size_t len)
{
struct tipc_subscriber *subscriber = usr_data;
struct tipc_subscription *sub = NULL;
struct tipc_net *tn = net_generic(net, tipc_net_id);
- spin_lock_bh(&subscriber->lock);
- subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub);
+ tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub);
if (sub)
tipc_nametbl_subscribe(sub);
else
tipc_conn_terminate(tn->topsrv, subscriber->conid);
- spin_unlock_bh(&subscriber->lock);
}
/* Handle one request to establish a new subscriber */
-static void *subscr_named_msg_event(int conid)
+static void *tipc_subscrb_connect_cb(int conid)
{
- struct tipc_subscriber *subscriber;
-
- /* Create subscriber object */
- subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC);
- if (subscriber == NULL) {
- pr_warn("Subscriber rejected, no memory\n");
- return NULL;
- }
- INIT_LIST_HEAD(&subscriber->subscription_list);
- subscriber->conid = conid;
- spin_lock_init(&subscriber->lock);
-
- return (void *)subscriber;
+ return (void *)tipc_subscrb_create(conid);
}
-int tipc_subscr_start(struct net *net)
+int tipc_topsrv_start(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
const char name[] = "topology_server";
@@ -355,9 +333,9 @@ int tipc_subscr_start(struct net *net)
topsrv->imp = TIPC_CRITICAL_IMPORTANCE;
topsrv->type = SOCK_SEQPACKET;
topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr);
- topsrv->tipc_conn_recvmsg = subscr_conn_msg_event;
- topsrv->tipc_conn_new = subscr_named_msg_event;
- topsrv->tipc_conn_shutdown = subscr_conn_shutdown_event;
+ topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb;
+ topsrv->tipc_conn_new = tipc_subscrb_connect_cb;
+ topsrv->tipc_conn_shutdown = tipc_subscrb_shutdown_cb;
strncpy(topsrv->name, name, strlen(name) + 1);
tn->topsrv = topsrv;
@@ -366,7 +344,7 @@ int tipc_subscr_start(struct net *net)
return tipc_server_start(topsrv);
}
-void tipc_subscr_stop(struct net *net)
+void tipc_topsrv_stop(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_server *topsrv = tn->topsrv;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 33488bd9f..92ee18cc5 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -54,7 +54,7 @@ struct tipc_subscriber;
* @filter: event filtering to be done for subscription
* @timer: timer governing subscription duration (optional)
* @nameseq_list: adjacent subscriptions in name sequence's subscription list
- * @subscription_list: adjacent subscriptions in subscriber's subscription list
+ * @subscrp_list: adjacent subscriptions in subscriber's subscription list
* @server_ref: object reference of server port associated with subscription
* @swap: indicates if subscriber uses opposite endianness in its messages
* @evt: template for events generated by subscription
@@ -67,17 +67,17 @@ struct tipc_subscription {
u32 filter;
struct timer_list timer;
struct list_head nameseq_list;
- struct list_head subscription_list;
+ struct list_head subscrp_list;
int swap;
struct tipc_event evt;
};
-int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper);
-void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper, u32 event, u32 port_ref,
- u32 node, int must);
-int tipc_subscr_start(struct net *net);
-void tipc_subscr_stop(struct net *net);
+int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower,
+ u32 found_upper);
+void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
+ u32 found_lower, u32 found_upper, u32 event,
+ u32 port_ref, u32 node, int must);
+int tipc_topsrv_start(struct net *net);
+void tipc_topsrv_stop(struct net *net);
#endif
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 06430598c..03ee4d359 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -140,12 +140,17 @@ static struct hlist_head *unix_sockets_unbound(void *addr)
#ifdef CONFIG_SECURITY_NETWORK
static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{
- memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
+ UNIXCB(skb).secid = scm->secid;
}
static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{
- scm->secid = *UNIXSID(skb);
+ scm->secid = UNIXCB(skb).secid;
+}
+
+static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ return (scm->secid == UNIXCB(skb).secid);
}
#else
static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
@@ -153,6 +158,11 @@ static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{ }
+
+static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ return true;
+}
#endif /* CONFIG_SECURITY_NETWORK */
/*
@@ -518,6 +528,11 @@ static int unix_ioctl(struct socket *, unsigned int, unsigned long);
static int unix_shutdown(struct socket *, int);
static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
+static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
+ size_t size, int flags);
+static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
+ struct pipe_inode_info *, size_t size,
+ unsigned int flags);
static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
@@ -558,7 +573,8 @@ static const struct proto_ops unix_stream_ops = {
.sendmsg = unix_stream_sendmsg,
.recvmsg = unix_stream_recvmsg,
.mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
+ .sendpage = unix_stream_sendpage,
+ .splice_read = unix_stream_splice_read,
.set_peek_off = unix_set_peek_off,
};
@@ -620,7 +636,7 @@ static struct proto unix_proto = {
*/
static struct lock_class_key af_unix_sk_receive_queue_lock_key;
-static struct sock *unix_create1(struct net *net, struct socket *sock)
+static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
{
struct sock *sk = NULL;
struct unix_sock *u;
@@ -629,7 +645,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
- sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
+ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
if (!sk)
goto out;
@@ -688,7 +704,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- return unix_create1(net, sock) ? 0 : -ENOMEM;
+ return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
}
static int unix_release(struct socket *sock)
@@ -1088,7 +1104,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
err = -ENOMEM;
/* create new sock for complete connection */
- newsk = unix_create1(sock_net(sk), NULL);
+ newsk = unix_create1(sock_net(sk), NULL, 0);
if (newsk == NULL)
goto out;
@@ -1408,6 +1424,7 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
UNIXCB(skb).uid = scm->creds.uid;
UNIXCB(skb).gid = scm->creds.gid;
UNIXCB(skb).fp = NULL;
+ unix_get_secdata(scm, skb);
if (scm->fp && send_fds)
err = unix_attach_fds(scm, skb);
@@ -1503,7 +1520,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
if (err < 0)
goto out_free;
max_level = err + 1;
- unix_get_secdata(&scm, skb);
skb_put(skb, len - data_len);
skb->data_len = data_len;
@@ -1720,6 +1736,101 @@ out_err:
return sent ? : err;
}
+static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
+ int offset, size_t size, int flags)
+{
+ int err = 0;
+ bool send_sigpipe = true;
+ struct sock *other, *sk = socket->sk;
+ struct sk_buff *skb, *newskb = NULL, *tail = NULL;
+
+ if (flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ other = unix_peer(sk);
+ if (!other || sk->sk_state != TCP_ESTABLISHED)
+ return -ENOTCONN;
+
+ if (false) {
+alloc_skb:
+ unix_state_unlock(other);
+ mutex_unlock(&unix_sk(other)->readlock);
+ newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
+ &err, 0);
+ if (!newskb)
+ return err;
+ }
+
+ /* we must acquire readlock as we modify already present
+ * skbs in the sk_receive_queue and mess with skb->len
+ */
+ err = mutex_lock_interruptible(&unix_sk(other)->readlock);
+ if (err) {
+ err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
+ send_sigpipe = false;
+ goto err;
+ }
+
+ if (sk->sk_shutdown & SEND_SHUTDOWN) {
+ err = -EPIPE;
+ goto err_unlock;
+ }
+
+ unix_state_lock(other);
+
+ if (sock_flag(other, SOCK_DEAD) ||
+ other->sk_shutdown & RCV_SHUTDOWN) {
+ err = -EPIPE;
+ goto err_state_unlock;
+ }
+
+ skb = skb_peek_tail(&other->sk_receive_queue);
+ if (tail && tail == skb) {
+ skb = newskb;
+ } else if (!skb) {
+ if (newskb)
+ skb = newskb;
+ else
+ goto alloc_skb;
+ } else if (newskb) {
+ /* this is fast path, we don't necessarily need to
+ * call to kfree_skb even though with newskb == NULL
+ * this - does no harm
+ */
+ consume_skb(newskb);
+ }
+
+ if (skb_append_pagefrags(skb, page, offset, size)) {
+ tail = skb;
+ goto alloc_skb;
+ }
+
+ skb->len += size;
+ skb->data_len += size;
+ skb->truesize += size;
+ atomic_add(size, &sk->sk_wmem_alloc);
+
+ if (newskb)
+ __skb_queue_tail(&other->sk_receive_queue, newskb);
+
+ unix_state_unlock(other);
+ mutex_unlock(&unix_sk(other)->readlock);
+
+ other->sk_data_ready(other);
+
+ return size;
+
+err_state_unlock:
+ unix_state_unlock(other);
+err_unlock:
+ mutex_unlock(&unix_sk(other)->readlock);
+err:
+ kfree_skb(newskb);
+ if (send_sigpipe && !(flags & MSG_NOSIGNAL))
+ send_sig(SIGPIPE, current, 0);
+ return err;
+}
+
static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
@@ -1860,8 +1971,9 @@ out:
* Sleep until more data has arrived. But check for races..
*/
static long unix_stream_data_wait(struct sock *sk, long timeo,
- struct sk_buff *last)
+ struct sk_buff *last, unsigned int last_len)
{
+ struct sk_buff *tail;
DEFINE_WAIT(wait);
unix_state_lock(sk);
@@ -1869,7 +1981,9 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (skb_peek_tail(&sk->sk_receive_queue) != last ||
+ tail = skb_peek_tail(&sk->sk_receive_queue);
+ if (tail != last ||
+ (tail && tail->len != last_len) ||
sk->sk_err ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -1897,38 +2011,50 @@ static unsigned int unix_skb_len(const struct sk_buff *skb)
return skb->len - UNIXCB(skb).consumed;
}
-static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
+struct unix_stream_read_state {
+ int (*recv_actor)(struct sk_buff *, int, int,
+ struct unix_stream_read_state *);
+ struct socket *socket;
+ struct msghdr *msg;
+ struct pipe_inode_info *pipe;
+ size_t size;
+ int flags;
+ unsigned int splice_flags;
+};
+
+static int unix_stream_read_generic(struct unix_stream_read_state *state)
{
struct scm_cookie scm;
+ struct socket *sock = state->socket;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
- DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
int copied = 0;
+ int flags = state->flags;
int noblock = flags & MSG_DONTWAIT;
- int check_creds = 0;
+ bool check_creds = false;
int target;
int err = 0;
long timeo;
int skip;
+ size_t size = state->size;
+ unsigned int last_len;
err = -EINVAL;
if (sk->sk_state != TCP_ESTABLISHED)
goto out;
err = -EOPNOTSUPP;
- if (flags&MSG_OOB)
+ if (flags & MSG_OOB)
goto out;
- target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
+ target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
timeo = sock_rcvtimeo(sk, noblock);
+ memset(&scm, 0, sizeof(scm));
+
/* Lock the socket to prevent queue disordering
* while sleeps in memcpy_tomsg
*/
-
- memset(&scm, 0, sizeof(scm));
-
err = mutex_lock_interruptible(&u->readlock);
if (unlikely(err)) {
/* recvmsg() in non blocking mode is supposed to return -EAGAIN
@@ -1948,6 +2074,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
goto unlock;
}
last = skb = skb_peek(&sk->sk_receive_queue);
+ last_len = last ? last->len : 0;
again:
if (skb == NULL) {
unix_sk(sk)->recursion_level = 0;
@@ -1970,16 +2097,17 @@ again:
break;
mutex_unlock(&u->readlock);
- timeo = unix_stream_data_wait(sk, timeo, last);
+ timeo = unix_stream_data_wait(sk, timeo, last,
+ last_len);
- if (signal_pending(current)
- || mutex_lock_interruptible(&u->readlock)) {
+ if (signal_pending(current) ||
+ mutex_lock_interruptible(&u->readlock)) {
err = sock_intr_errno(timeo);
goto out;
}
continue;
- unlock:
+unlock:
unix_state_unlock(sk);
break;
}
@@ -1988,6 +2116,7 @@ again:
while (skip >= unix_skb_len(skb)) {
skip -= unix_skb_len(skb);
last = skb;
+ last_len = skb->len;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (!skb)
goto again;
@@ -1999,23 +2128,27 @@ again:
/* Never glue messages from different writers */
if ((UNIXCB(skb).pid != scm.pid) ||
!uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
- !gid_eq(UNIXCB(skb).gid, scm.creds.gid))
+ !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
+ !unix_secdata_eq(&scm, skb))
break;
} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
/* Copy credentials */
scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
- check_creds = 1;
+ unix_set_secdata(&scm, skb);
+ check_creds = true;
}
/* Copy address just once */
- if (sunaddr) {
- unix_copy_addr(msg, skb->sk);
+ if (state->msg && state->msg->msg_name) {
+ DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
+ state->msg->msg_name);
+ unix_copy_addr(state->msg, skb->sk);
sunaddr = NULL;
}
chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
- if (skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
- msg, chunk)) {
+ chunk = state->recv_actor(skb, skip, chunk, state);
+ if (chunk < 0) {
if (copied == 0)
copied = -EFAULT;
break;
@@ -2053,11 +2186,85 @@ again:
} while (size);
mutex_unlock(&u->readlock);
- scm_recv(sock, msg, &scm, flags);
+ if (state->msg)
+ scm_recv(sock, state->msg, &scm, flags);
+ else
+ scm_destroy(&scm);
out:
return copied ? : err;
}
+static int unix_stream_read_actor(struct sk_buff *skb,
+ int skip, int chunk,
+ struct unix_stream_read_state *state)
+{
+ int ret;
+
+ ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
+ state->msg, chunk);
+ return ret ?: chunk;
+}
+
+static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
+{
+ struct unix_stream_read_state state = {
+ .recv_actor = unix_stream_read_actor,
+ .socket = sock,
+ .msg = msg,
+ .size = size,
+ .flags = flags
+ };
+
+ return unix_stream_read_generic(&state);
+}
+
+static ssize_t skb_unix_socket_splice(struct sock *sk,
+ struct pipe_inode_info *pipe,
+ struct splice_pipe_desc *spd)
+{
+ int ret;
+ struct unix_sock *u = unix_sk(sk);
+
+ mutex_unlock(&u->readlock);
+ ret = splice_to_pipe(pipe, spd);
+ mutex_lock(&u->readlock);
+
+ return ret;
+}
+
+static int unix_stream_splice_actor(struct sk_buff *skb,
+ int skip, int chunk,
+ struct unix_stream_read_state *state)
+{
+ return skb_splice_bits(skb, state->socket->sk,
+ UNIXCB(skb).consumed + skip,
+ state->pipe, chunk, state->splice_flags,
+ skb_unix_socket_splice);
+}
+
+static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t size, unsigned int flags)
+{
+ struct unix_stream_read_state state = {
+ .recv_actor = unix_stream_splice_actor,
+ .socket = sock,
+ .pipe = pipe,
+ .size = size,
+ .splice_flags = flags,
+ };
+
+ if (unlikely(*ppos))
+ return -ESPIPE;
+
+ if (sock->file->f_flags & O_NONBLOCK ||
+ flags & SPLICE_F_NONBLOCK)
+ state.flags = MSG_DONTWAIT;
+
+ return unix_stream_read_generic(&state);
+}
+
static int unix_shutdown(struct socket *sock, int mode)
{
struct sock *sk = sock->sk;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 2ec86e652..df5fc6b34 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -581,13 +581,14 @@ struct sock *__vsock_create(struct net *net,
struct socket *sock,
struct sock *parent,
gfp_t priority,
- unsigned short type)
+ unsigned short type,
+ int kern)
{
struct sock *sk;
struct vsock_sock *psk;
struct vsock_sock *vsk;
- sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto);
+ sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto, kern);
if (!sk)
return NULL;
@@ -1866,7 +1867,7 @@ static int vsock_create(struct net *net, struct socket *sock,
sock->state = SS_UNCONNECTED;
- return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM;
+ return __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern) ? 0 : -ENOMEM;
}
static const struct net_proto_family vsock_family_ops = {
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index c294da095..1f63daff3 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1022,7 +1022,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
}
pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
- sk->sk_type);
+ sk->sk_type, 0);
if (!pending) {
vmci_transport_send_reset(sk, pkt);
return -ENOMEM;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 7aaf7415d..59cabc9bc 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -698,19 +698,20 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
EXPORT_SYMBOL(cfg80211_chandef_usable);
/*
- * For GO only, check if the channel can be used under permissive conditions
- * mandated by the some regulatory bodies, i.e., the channel is marked with
- * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface
+ * Check if the channel can be used under permissive conditions mandated by
+ * some regulatory bodies, i.e., the channel is marked with
+ * IEEE80211_CHAN_IR_CONCURRENT and there is an additional station interface
* associated to an AP on the same channel or on the same UNII band
* (assuming that the AP is an authorized master).
- * In addition allow the GO to operate on a channel on which indoor operation is
+ * In addition allow operation on a channel on which indoor operation is
* allowed, iff we are currently operating in an indoor environment.
*/
-static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
+static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy,
+ enum nl80211_iftype iftype,
struct ieee80211_channel *chan)
{
- struct wireless_dev *wdev_iter;
- struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx);
+ struct wireless_dev *wdev;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
ASSERT_RTNL();
@@ -718,32 +719,48 @@ static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
!(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR))
return false;
+ /* only valid for GO and TDLS off-channel (station/p2p-CL) */
+ if (iftype != NL80211_IFTYPE_P2P_GO &&
+ iftype != NL80211_IFTYPE_STATION &&
+ iftype != NL80211_IFTYPE_P2P_CLIENT)
+ return false;
+
if (regulatory_indoor_allowed() &&
(chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
return true;
- if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT))
+ if (!(chan->flags & IEEE80211_CHAN_IR_CONCURRENT))
return false;
/*
* Generally, it is possible to rely on another device/driver to allow
- * the GO concurrent relaxation, however, since the device can further
+ * the IR concurrent relaxation, however, since the device can further
* enforce the relaxation (by doing a similar verifications as this),
* and thus fail the GO instantiation, consider only the interfaces of
* the current registered device.
*/
- list_for_each_entry(wdev_iter, &rdev->wdev_list, list) {
+ list_for_each_entry(wdev, &rdev->wdev_list, list) {
struct ieee80211_channel *other_chan = NULL;
int r1, r2;
- if (wdev_iter->iftype != NL80211_IFTYPE_STATION ||
- !netif_running(wdev_iter->netdev))
- continue;
-
- wdev_lock(wdev_iter);
- if (wdev_iter->current_bss)
- other_chan = wdev_iter->current_bss->pub.channel;
- wdev_unlock(wdev_iter);
+ wdev_lock(wdev);
+ if (wdev->iftype == NL80211_IFTYPE_STATION &&
+ wdev->current_bss)
+ other_chan = wdev->current_bss->pub.channel;
+
+ /*
+ * If a GO already operates on the same GO_CONCURRENT channel,
+ * this one (maybe the same one) can beacon as well. We allow
+ * the operation even if the station we relied on with
+ * GO_CONCURRENT is disconnected now. But then we must make sure
+ * we're not outdoor on an indoor-only channel.
+ */
+ if (iftype == NL80211_IFTYPE_P2P_GO &&
+ wdev->iftype == NL80211_IFTYPE_P2P_GO &&
+ wdev->beacon_interval &&
+ !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
+ other_chan = wdev->chandef.chan;
+ wdev_unlock(wdev);
if (!other_chan)
continue;
@@ -780,25 +797,18 @@ static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
return false;
}
-bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
- struct cfg80211_chan_def *chandef,
- enum nl80211_iftype iftype)
+static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ enum nl80211_iftype iftype,
+ bool check_no_ir)
{
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
bool res;
u32 prohibited_flags = IEEE80211_CHAN_DISABLED |
IEEE80211_CHAN_RADAR;
- trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype);
+ trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir);
- /*
- * Under certain conditions suggested by the some regulatory bodies
- * a GO can operate on channels marked with IEEE80211_NO_IR
- * so set this flag only if such relaxations are not enabled and
- * the conditions are not met.
- */
- if (iftype != NL80211_IFTYPE_P2P_GO ||
- !cfg80211_go_permissive_chan(rdev, chandef->chan))
+ if (check_no_ir)
prohibited_flags |= IEEE80211_CHAN_NO_IR;
if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 &&
@@ -812,8 +822,36 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
trace_cfg80211_return_bool(res);
return res;
}
+
+bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ enum nl80211_iftype iftype)
+{
+ return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, true);
+}
EXPORT_SYMBOL(cfg80211_reg_can_beacon);
+bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ enum nl80211_iftype iftype)
+{
+ bool check_no_ir;
+
+ ASSERT_RTNL();
+
+ /*
+ * Under certain conditions suggested by some regulatory bodies a
+ * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag
+ * only if such relaxations are not enabled and the conditions are not
+ * met.
+ */
+ check_no_ir = !cfg80211_ir_permissive_chan(wiphy, iftype,
+ chandef->chan);
+
+ return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir);
+}
+EXPORT_SYMBOL(cfg80211_reg_can_beacon_relax);
+
int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef)
{
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 801cd49c5..311eef26b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -222,6 +222,7 @@ struct cfg80211_event {
const u8 *ie;
size_t ie_len;
u16 reason;
+ bool locally_generated;
} dc;
struct {
u8 bssid[ETH_ALEN];
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index dd78445c7..76b41578a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -639,8 +639,8 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY))
goto nla_put_failure;
- if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) &&
- nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT))
+ if ((chan->flags & IEEE80211_CHAN_IR_CONCURRENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_IR_CONCURRENT))
goto nla_put_failure;
if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ))
@@ -2003,7 +2003,8 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
switch (iftype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
- if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) {
+ if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef,
+ iftype)) {
result = -EINVAL;
break;
}
@@ -3403,8 +3404,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
} else if (!nl80211_get_ap_channel(rdev, &params))
return -EINVAL;
- if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef,
- wdev->iftype))
+ if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params.chandef,
+ wdev->iftype))
return -EINVAL;
if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
@@ -4061,7 +4062,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
return -EINVAL;
break;
case CFG80211_STA_MESH_PEER_USER:
- if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION)
+ if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION &&
+ params->plink_action != NL80211_PLINK_ACTION_BLOCK)
return -EINVAL;
break;
}
@@ -6491,8 +6493,8 @@ skip_beacons:
if (err)
return err;
- if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef,
- wdev->iftype))
+ if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params.chandef,
+ wdev->iftype))
return -EINVAL;
err = cfg80211_chandef_dfs_required(wdev->wiphy,
@@ -10169,7 +10171,8 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb,
return -EINVAL;
/* we will be active on the TDLS link */
- if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, wdev->iftype))
+ if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef,
+ wdev->iftype))
return -EINVAL;
/* don't allow switching to DFS channels */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0e347f888..aa2d75482 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -544,15 +544,15 @@ static int call_crda(const char *alpha2)
reg_regdb_query(alpha2);
if (reg_crda_timeouts > REG_MAX_CRDA_TIMEOUTS) {
- pr_info("Exceeded CRDA call max attempts. Not calling CRDA\n");
+ pr_debug("Exceeded CRDA call max attempts. Not calling CRDA\n");
return -EINVAL;
}
if (!is_world_regdom((char *) alpha2))
- pr_info("Calling CRDA for country: %c%c\n",
+ pr_debug("Calling CRDA for country: %c%c\n",
alpha2[0], alpha2[1]);
else
- pr_info("Calling CRDA to update world regulatory domain\n");
+ pr_debug("Calling CRDA to update world regulatory domain\n");
return kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, env);
}
@@ -989,8 +989,8 @@ static u32 map_regdom_flags(u32 rd_flags)
channel_flags |= IEEE80211_CHAN_NO_OFDM;
if (rd_flags & NL80211_RRF_NO_OUTDOOR)
channel_flags |= IEEE80211_CHAN_INDOOR_ONLY;
- if (rd_flags & NL80211_RRF_GO_CONCURRENT)
- channel_flags |= IEEE80211_CHAN_GO_CONCURRENT;
+ if (rd_flags & NL80211_RRF_IR_CONCURRENT)
+ channel_flags |= IEEE80211_CHAN_IR_CONCURRENT;
if (rd_flags & NL80211_RRF_NO_HT40MINUS)
channel_flags |= IEEE80211_CHAN_NO_HT40MINUS;
if (rd_flags & NL80211_RRF_NO_HT40PLUS)
@@ -1589,7 +1589,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
case NL80211_IFTYPE_ADHOC:
- return cfg80211_reg_can_beacon(wiphy, &chandef, iftype);
+ return cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype);
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
return cfg80211_chandef_usable(wiphy, &chandef,
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index d11454f87..8020b5b09 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -938,7 +938,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
}
void cfg80211_disconnected(struct net_device *dev, u16 reason,
- const u8 *ie, size_t ie_len, gfp_t gfp)
+ const u8 *ie, size_t ie_len,
+ bool locally_generated, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -954,6 +955,7 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason,
ev->dc.ie_len = ie_len;
memcpy((void *)ev->dc.ie, ie, ie_len);
ev->dc.reason = reason;
+ ev->dc.locally_generated = locally_generated;
spin_lock_irqsave(&wdev->event_lock, flags);
list_add_tail(&ev->list, &wdev->event_list);
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 9ee6bc1a7..9cee02206 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -86,7 +86,7 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
return 0;
}
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
static void cfg80211_leave_all(struct cfg80211_registered_device *rdev)
{
struct wireless_dev *wdev;
@@ -95,7 +95,7 @@ static void cfg80211_leave_all(struct cfg80211_registered_device *rdev)
cfg80211_leave(rdev, wdev);
}
-static int wiphy_suspend(struct device *dev, pm_message_t state)
+static int wiphy_suspend(struct device *dev)
{
struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
int ret = 0;
@@ -136,6 +136,11 @@ static int wiphy_resume(struct device *dev)
return ret;
}
+
+static SIMPLE_DEV_PM_OPS(wiphy_pm_ops, wiphy_suspend, wiphy_resume);
+#define WIPHY_PM_OPS (&wiphy_pm_ops)
+#else
+#define WIPHY_PM_OPS NULL
#endif
static const void *wiphy_namespace(struct device *d)
@@ -151,10 +156,7 @@ struct class ieee80211_class = {
.dev_release = wiphy_dev_release,
.dev_groups = ieee80211_groups,
.dev_uevent = wiphy_uevent,
-#ifdef CONFIG_PM
- .suspend = wiphy_suspend,
- .resume = wiphy_resume,
-#endif
+ .pm = WIPHY_PM_OPS,
.ns_type = &net_ns_type_operations,
.namespace = wiphy_namespace,
};
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index af3617c98..a808279a4 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2358,20 +2358,23 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify,
TRACE_EVENT(cfg80211_reg_can_beacon,
TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef,
- enum nl80211_iftype iftype),
- TP_ARGS(wiphy, chandef, iftype),
+ enum nl80211_iftype iftype, bool check_no_ir),
+ TP_ARGS(wiphy, chandef, iftype, check_no_ir),
TP_STRUCT__entry(
WIPHY_ENTRY
CHAN_DEF_ENTRY
__field(enum nl80211_iftype, iftype)
+ __field(bool, check_no_ir)
),
TP_fast_assign(
WIPHY_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
__entry->iftype = iftype;
+ __entry->check_no_ir = check_no_ir;
),
- TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d",
- WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype)
+ TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d check_no_ir=%s",
+ WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype,
+ BOOL_TO_STR(__entry->check_no_ir))
);
TRACE_EVENT(cfg80211_chandef_dfs_required,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7e4e3fffe..baf7218ce 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -887,7 +887,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
case EVENT_DISCONNECTED:
__cfg80211_disconnected(wdev->netdev,
ev->dc.ie, ev->dc.ie_len,
- ev->dc.reason, true);
+ ev->dc.reason,
+ !ev->dc.locally_generated);
break;
case EVENT_IBSS_JOINED:
__cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid,
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index c3ab230e4..a750f330b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -515,10 +515,10 @@ static struct proto x25_proto = {
.obj_size = sizeof(struct x25_sock),
};
-static struct sock *x25_alloc_socket(struct net *net)
+static struct sock *x25_alloc_socket(struct net *net, int kern)
{
struct x25_sock *x25;
- struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto);
+ struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, kern);
if (!sk)
goto out;
@@ -553,7 +553,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol,
goto out;
rc = -ENOBUFS;
- if ((sk = x25_alloc_socket(net)) == NULL)
+ if ((sk = x25_alloc_socket(net, kern)) == NULL)
goto out;
x25 = x25_sk(sk);
@@ -602,7 +602,7 @@ static struct sock *x25_make_new(struct sock *osk)
if (osk->sk_type != SOCK_SEQPACKET)
goto out;
- if ((sk = x25_alloc_socket(sock_net(osk))) == NULL)
+ if ((sk = x25_alloc_socket(sock_net(osk), 0)) == NULL)
goto out;
x25 = x25_sk(sk);
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 12e82a5e4..42f7c76cf 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -31,6 +31,7 @@ static struct xfrm_algo_desc aead_list[] = {
.uinfo = {
.aead = {
+ .geniv = "seqniv",
.icv_truncbits = 64,
}
},
@@ -49,6 +50,7 @@ static struct xfrm_algo_desc aead_list[] = {
.uinfo = {
.aead = {
+ .geniv = "seqniv",
.icv_truncbits = 96,
}
},
@@ -67,6 +69,7 @@ static struct xfrm_algo_desc aead_list[] = {
.uinfo = {
.aead = {
+ .geniv = "seqniv",
.icv_truncbits = 128,
}
},
@@ -85,6 +88,7 @@ static struct xfrm_algo_desc aead_list[] = {
.uinfo = {
.aead = {
+ .geniv = "seqniv",
.icv_truncbits = 64,
}
},
@@ -103,6 +107,7 @@ static struct xfrm_algo_desc aead_list[] = {
.uinfo = {
.aead = {
+ .geniv = "seqniv",
.icv_truncbits = 96,
}
},
@@ -121,6 +126,7 @@ static struct xfrm_algo_desc aead_list[] = {
.uinfo = {
.aead = {
+ .geniv = "seqniv",
.icv_truncbits = 128,
}
},
@@ -139,6 +145,7 @@ static struct xfrm_algo_desc aead_list[] = {
.uinfo = {
.aead = {
+ .geniv = "seqiv",
.icv_truncbits = 128,
}
},
@@ -152,6 +159,18 @@ static struct xfrm_algo_desc aead_list[] = {
.sadb_alg_maxbits = 256
}
},
+{
+ .name = "rfc7539esp(chacha20,poly1305)",
+
+ .uinfo = {
+ .aead = {
+ .geniv = "seqniv",
+ .icv_truncbits = 128,
+ }
+ },
+
+ .pfkey_supported = 0,
+},
};
static struct xfrm_algo_desc aalg_list[] = {
@@ -353,6 +372,7 @@ static struct xfrm_algo_desc ealg_list[] = {
.uinfo = {
.encr = {
+ .geniv = "echainiv",
.blockbits = 64,
.defkeybits = 64,
}
@@ -373,6 +393,7 @@ static struct xfrm_algo_desc ealg_list[] = {
.uinfo = {
.encr = {
+ .geniv = "echainiv",
.blockbits = 64,
.defkeybits = 192,
}
@@ -393,6 +414,7 @@ static struct xfrm_algo_desc ealg_list[] = {
.uinfo = {
.encr = {
+ .geniv = "echainiv",
.blockbits = 64,
.defkeybits = 128,
}
@@ -413,6 +435,7 @@ static struct xfrm_algo_desc ealg_list[] = {
.uinfo = {
.encr = {
+ .geniv = "echainiv",
.blockbits = 64,
.defkeybits = 128,
}
@@ -433,6 +456,7 @@ static struct xfrm_algo_desc ealg_list[] = {
.uinfo = {
.encr = {
+ .geniv = "echainiv",
.blockbits = 128,
.defkeybits = 128,
}
@@ -453,6 +477,7 @@ static struct xfrm_algo_desc ealg_list[] = {
.uinfo = {
.encr = {
+ .geniv = "echainiv",
.blockbits = 128,
.defkeybits = 128,
}
@@ -473,6 +498,7 @@ static struct xfrm_algo_desc ealg_list[] = {
.uinfo = {
.encr = {
+ .geniv = "echainiv",
.blockbits = 128,
.defkeybits = 128,
}
@@ -493,6 +519,7 @@ static struct xfrm_algo_desc ealg_list[] = {
.uinfo = {
.encr = {
+ .geniv = "echainiv",
.blockbits = 128,
.defkeybits = 128,
}
@@ -512,6 +539,7 @@ static struct xfrm_algo_desc ealg_list[] = {
.uinfo = {
.encr = {
+ .geniv = "seqiv",
.blockbits = 128,
.defkeybits = 160, /* 128-bit key + 32-bit nonce */
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index b58286ecd..60ce7014e 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -31,7 +31,7 @@ int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo)
return -EAFNOSUPPORT;
spin_lock_bh(&xfrm_input_afinfo_lock);
if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL))
- err = -ENOBUFS;
+ err = -EEXIST;
else
rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo);
spin_unlock_bh(&xfrm_input_afinfo_lock);
@@ -254,13 +254,13 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
skb->sp->xvec[skb->sp->len++] = x;
spin_lock(&x->lock);
- if (unlikely(x->km.state == XFRM_STATE_ACQ)) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
- goto drop_unlock;
- }
if (unlikely(x->km.state != XFRM_STATE_VALID)) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID);
+ if (x->km.state == XFRM_STATE_ACQ)
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
+ else
+ XFRM_INC_STATS(net,
+ LINUX_MIB_XFRMINSTATEINVALID);
goto drop_unlock;
}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index fbcedbe33..68ada2ca4 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -38,6 +38,18 @@ static int xfrm_skb_check_space(struct sk_buff *skb)
return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC);
}
+/* Children define the path of the packet through the
+ * Linux networking. Thus, destinations are stackable.
+ */
+
+static struct dst_entry *skb_dst_pop(struct sk_buff *skb)
+{
+ struct dst_entry *child = dst_clone(skb_dst(skb)->child);
+
+ skb_dst_drop(skb);
+ return child;
+}
+
static int xfrm_output_one(struct sk_buff *skb, int err)
{
struct dst_entry *dst = skb_dst(skb);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 638af0655..18cead764 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -315,14 +315,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
}
EXPORT_SYMBOL(xfrm_policy_destroy);
-static void xfrm_queue_purge(struct sk_buff_head *list)
-{
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(list)) != NULL)
- kfree_skb(skb);
-}
-
/* Rule must be locked. Release descentant resources, announce
* entry dead. The rule must be unlinked from lists to the moment.
*/
@@ -335,7 +327,7 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
if (del_timer(&policy->polq.hold_timer))
xfrm_pol_put(policy);
- xfrm_queue_purge(&policy->polq.hold_queue);
+ skb_queue_purge(&policy->polq.hold_queue);
if (del_timer(&policy->timer))
xfrm_pol_put(policy);
@@ -708,6 +700,9 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
struct xfrm_policy_queue *pq = &old->polq;
struct sk_buff_head list;
+ if (skb_queue_empty(&pq->hold_queue))
+ return;
+
__skb_queue_head_init(&list);
spin_lock_bh(&pq->hold_queue.lock);
@@ -716,9 +711,6 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
xfrm_pol_put(old);
spin_unlock_bh(&pq->hold_queue.lock);
- if (skb_queue_empty(&list))
- return;
-
pq = &new->polq;
spin_lock_bh(&pq->hold_queue.lock);
@@ -1012,7 +1004,9 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
if (list_empty(&walk->walk.all))
x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
else
- x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
+ x = list_first_entry(&walk->walk.all,
+ struct xfrm_policy_walk_entry, all);
+
list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
if (x->dead)
continue;
@@ -1120,6 +1114,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
}
chain = &net->xfrm.policy_inexact[dir];
hlist_for_each_entry(pol, chain, bydst) {
+ if ((pol->priority >= priority) && ret)
+ break;
+
err = xfrm_policy_match(pol, fl, type, family, dir);
if (err) {
if (err == -ESRCH)
@@ -1128,13 +1125,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
ret = ERR_PTR(err);
goto fail;
}
- } else if (pol->priority < priority) {
+ } else {
ret = pol;
break;
}
}
- if (ret)
- xfrm_pol_hold(ret);
+
+ xfrm_pol_hold(ret);
fail:
read_unlock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1955,7 +1952,7 @@ out:
purge_queue:
pq->timeout = 0;
- xfrm_queue_purge(&pq->hold_queue);
+ skb_queue_purge(&pq->hold_queue);
xfrm_pol_put(pol);
}
@@ -2814,7 +2811,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
return -EAFNOSUPPORT;
spin_lock(&xfrm_policy_afinfo_lock);
if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
- err = -ENOBUFS;
+ err = -EEXIST;
else {
struct dst_ops *dst_ops = afinfo->dst_ops;
if (likely(dst_ops->kmem_cachep == NULL))
@@ -3209,16 +3206,17 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
}
chain = &net->xfrm.policy_inexact[dir];
hlist_for_each_entry(pol, chain, bydst) {
+ if ((pol->priority >= priority) && ret)
+ break;
+
if (xfrm_migrate_selector_match(sel, &pol->selector) &&
- pol->type == type &&
- pol->priority < priority) {
+ pol->type == type) {
ret = pol;
break;
}
}
- if (ret)
- xfrm_pol_hold(ret);
+ xfrm_pol_hold(ret);
read_unlock_bh(&net->xfrm.xfrm_policy_lock);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 96688cd0f..9895a8c56 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1626,7 +1626,7 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
if (list_empty(&walk->all))
x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
else
- x = list_entry(&walk->all, struct xfrm_state_walk, all);
+ x = list_first_entry(&walk->all, struct xfrm_state_walk, all);
list_for_each_entry_from(x, &net->xfrm.state_all, all) {
if (x->state == XFRM_STATE_DEAD)
continue;
@@ -1908,7 +1908,7 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
return -EAFNOSUPPORT;
spin_lock_bh(&xfrm_state_afinfo_lock);
if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
- err = -ENOBUFS;
+ err = -EEXIST;
else
rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
spin_unlock_bh(&xfrm_state_afinfo_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 209166429..bd16c6c7e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -289,6 +289,31 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
return 0;
}
+static int attach_crypt(struct xfrm_state *x, struct nlattr *rta)
+{
+ struct xfrm_algo *p, *ualg;
+ struct xfrm_algo_desc *algo;
+
+ if (!rta)
+ return 0;
+
+ ualg = nla_data(rta);
+
+ algo = xfrm_ealg_get_byname(ualg->alg_name, 1);
+ if (!algo)
+ return -ENOSYS;
+ x->props.ealgo = algo->desc.sadb_alg_id;
+
+ p = kmemdup(ualg, xfrm_alg_len(ualg), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ strcpy(p->alg_name, algo->name);
+ x->ealg = p;
+ x->geniv = algo->uinfo.encr.geniv;
+ return 0;
+}
+
static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props,
struct nlattr *rta)
{
@@ -349,8 +374,7 @@ static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props,
return 0;
}
-static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props,
- struct nlattr *rta)
+static int attach_aead(struct xfrm_state *x, struct nlattr *rta)
{
struct xfrm_algo_aead *p, *ualg;
struct xfrm_algo_desc *algo;
@@ -363,14 +387,15 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props,
algo = xfrm_aead_get_byname(ualg->alg_name, ualg->alg_icv_len, 1);
if (!algo)
return -ENOSYS;
- *props = algo->desc.sadb_alg_id;
+ x->props.ealgo = algo->desc.sadb_alg_id;
p = kmemdup(ualg, aead_len(ualg), GFP_KERNEL);
if (!p)
return -ENOMEM;
strcpy(p->alg_name, algo->name);
- *algpp = p;
+ x->aead = p;
+ x->geniv = algo->uinfo.aead.geniv;
return 0;
}
@@ -515,8 +540,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
if (attrs[XFRMA_SA_EXTRA_FLAGS])
x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
- if ((err = attach_aead(&x->aead, &x->props.ealgo,
- attrs[XFRMA_ALG_AEAD])))
+ if ((err = attach_aead(x, attrs[XFRMA_ALG_AEAD])))
goto error;
if ((err = attach_auth_trunc(&x->aalg, &x->props.aalgo,
attrs[XFRMA_ALG_AUTH_TRUNC])))
@@ -526,9 +550,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
attrs[XFRMA_ALG_AUTH])))
goto error;
}
- if ((err = attach_one_algo(&x->ealg, &x->props.ealgo,
- xfrm_ealg_get_byname,
- attrs[XFRMA_ALG_CRYPT])))
+ if ((err = attach_crypt(x, attrs[XFRMA_ALG_CRYPT])))
goto error;
if ((err = attach_one_algo(&x->calg, &x->props.calgo,
xfrm_calg_get_byname,