diff options
author | David S. Miller <davem@davemloft.net> | 2015-07-31 17:07:12 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-07-31 17:07:12 -0700 |
commit | 17f901e8915cb922c2ca710835ef34f166f53ee9 (patch) | |
tree | 127409645c4ae5f127756751b593c6fb14428625 /net | |
parent | db316d57b63c07f2ebea596d7e21aa56549f54ab (diff) | |
parent | b56774163f994efce3f5603f35aa4e677c3e725a (diff) |
Merge branch 'ipv6-auto-flow-labels'
Tom Herbert says:
====================
ipv6: Turn on auto IPv6 flow labels by default
BSD (MacOS) has already turned on flow labels by default and this does
not seem to be causing any problems in the Internet. Let's go ahead
and turn them on by default. We'll continue to monitor for any devices
start choking on them.
Flow labels are important since they are the desired solution for
network devices to perform ECMP and RSS (RFC6437 and RFC6438).
Traditionally, devices perform a 5-tuple hash on packets that
includes port numbers. For the most part, these devices can only
compute 5-tuple hashes for TCP and UDP. This severely limits our ability
to get good network load balancing for other protocols (IPIP, GRE,ESP,
etc.), and hence we are limited in using other protocols. Unfortunately,
this method is accepted as the de facto standard to the extent that
there are several proposals to encapsulate protocols in UDP _just_ for
the purposes for getting ECMP to work. With hosts generating flow labels
and devices taking them as input into ECMP (several already do), we can
start to fix this fundamental problem.
This patch set:
- Changes IPV6_FLOWINFO sockopt to be opt-out of flow labels for
connections rather than opt-in
- Disable flow label state ranges sysctl by default
- Enable auto flow labels sysctl by default
v2:
- Added functions to create an skb->hash based on flowi4 and flowi6.
These are called in output path when creating a packet
- Call skb_get_hash_flowi6 in ip6_make_flowlabel
- Implement the auto_flowlabels sysctl as a mode for auto flowlabels.
There are four modes which correspond to flow labels being enabled
and whether socket option can be used to opt in or opt out of
using them
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/flow_dissector.c | 58 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 5 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 5 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 4 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 2 | ||||
-rw-r--r-- | net/ipv6/sysctl_net_ipv6.c | 7 |
6 files changed, 69 insertions, 12 deletions
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2a834c6179b9..11e6540fa386 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -590,6 +590,15 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, } EXPORT_SYMBOL(make_flow_keys_digest); +static inline void __skb_set_sw_hash(struct sk_buff *skb, u32 hash, + struct flow_keys *keys) +{ + if (keys->ports.ports) + skb->l4_hash = 1; + skb->sw_hash = 1; + skb->hash = hash; +} + /** * __skb_get_hash: calculate a flow hash * @skb: sk_buff to calculate flow hash from @@ -609,10 +618,8 @@ void __skb_get_hash(struct sk_buff *skb) hash = ___skb_get_hash(skb, &keys, hashrnd); if (!hash) return; - if (keys.ports.ports) - skb->l4_hash = 1; - skb->sw_hash = 1; - skb->hash = hash; + + __skb_set_sw_hash(skb, hash, &keys); } EXPORT_SYMBOL(__skb_get_hash); @@ -624,6 +631,49 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) } EXPORT_SYMBOL(skb_get_hash_perturb); +__u32 __skb_get_hash_flowi6(struct sk_buff *skb, struct flowi6 *fl6) +{ + struct flow_keys keys; + + memset(&keys, 0, sizeof(keys)); + + memcpy(&keys.addrs.v6addrs.src, &fl6->saddr, + sizeof(keys.addrs.v6addrs.src)); + memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr, + sizeof(keys.addrs.v6addrs.dst)); + keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + keys.ports.src = fl6->fl6_sport; + keys.ports.dst = fl6->fl6_dport; + keys.keyid.keyid = fl6->fl6_gre_key; + keys.tags.flow_label = (__force u32)fl6->flowlabel; + keys.basic.ip_proto = fl6->flowi6_proto; + + __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), &keys); + + return skb->hash; +} +EXPORT_SYMBOL(__skb_get_hash_flowi6); + +__u32 __skb_get_hash_flowi4(struct sk_buff *skb, struct flowi4 *fl4) +{ + struct flow_keys keys; + + memset(&keys, 0, sizeof(keys)); + + keys.addrs.v4addrs.src = fl4->saddr; + keys.addrs.v4addrs.dst = fl4->daddr; + keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + keys.ports.src = fl4->fl4_sport; + keys.ports.dst = fl4->fl4_dport; + keys.keyid.keyid = fl4->fl4_gre_key; + keys.basic.ip_proto = fl4->flowi4_proto; + + __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), &keys); + + return skb->hash; +} +EXPORT_SYMBOL(__skb_get_hash_flowi4); + u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7bc92ea4ae8f..44bb66bde0e2 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -197,6 +197,7 @@ lookup_protocol: np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; + np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk)); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets @@ -767,10 +768,10 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; - net->ipv6.sysctl.auto_flowlabels = 0; + net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; - net->ipv6.sysctl.flowlabel_state_ranges = 1; + net->ipv6.sysctl.flowlabel_state_ranges = 0; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a38d3ac0f18f..34f121812a14 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -728,7 +728,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, */ ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), - ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); + ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -1182,7 +1182,8 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb, - t->fl.u.ip6.flowlabel, false)); + t->fl.u.ip6.flowlabel, true, + &t->fl.u.ip6)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 92b7cf0dc1f9..26ea47930740 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -207,7 +207,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel)); + np->autoflowlabel, fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1649,7 +1649,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel)); + np->autoflowlabel, fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2e67b660118b..b0ab420612bc 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1095,7 +1095,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), - ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); + ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index db48aebd9c47..45243bbe5253 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -17,6 +17,9 @@ #include <net/inet_frag.h> static int one = 1; +static int auto_flowlabels_min; +static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; + static struct ctl_table ipv6_table_template[] = { { @@ -45,7 +48,9 @@ static struct ctl_table ipv6_table_template[] = { .data = &init_net.ipv6.sysctl.auto_flowlabels, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &auto_flowlabels_min, + .extra2 = &auto_flowlabels_max }, { .procname = "fwmark_reflect", |