summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@google.com>2019-02-08 11:47:29 +0100
committerGreg Kroah-Hartman <gregkh@google.com>2019-02-08 11:47:29 +0100
commit62872f952d6b6d44c30616195a1eba5fe1766815 (patch)
treea0e057c916d16d1ce6c4d01c4c4d6f523ff6b26f /include
parent64b564428faa872686215f9a410558ebb1b7ccf4 (diff)
parentdc5e8c99975bb1a1561de884a83b3c19e4ac7ada (diff)
Merge 4.4.174 into android-4.4
Changes in 4.4.174 inet: frags: change inet_frags_init_net() return value inet: frags: add a pointer to struct netns_frags inet: frags: refactor ipfrag_init() inet: frags: refactor ipv6_frag_init() inet: frags: refactor lowpan_net_frag_init() rhashtable: add rhashtable_lookup_get_insert_key() rhashtable: Add rhashtable_lookup() rhashtable: add schedule points inet: frags: use rhashtables for reassembly units net: ieee802154: 6lowpan: fix frag reassembly ipfrag: really prevent allocation on netns exit inet: frags: remove some helpers inet: frags: get rif of inet_frag_evicting() inet: frags: remove inet_frag_maybe_warn_overflow() inet: frags: break the 2GB limit for frags storage inet: frags: do not clone skb in ip_expire() ipv6: frags: rewrite ip6_expire_frag_queue() rhashtable: reorganize struct rhashtable layout inet: frags: reorganize struct netns_frags inet: frags: get rid of ipfrag_skb_cb/FRAG_CB inet: frags: fix ip6frag_low_thresh boundary ip: discard IPv4 datagrams with overlapping segments. net: modify skb_rbtree_purge to return the truesize of all purged skbs. ipv6: defrag: drop non-last frags smaller than min mtu net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends ip: use rb trees for IP frag queue. ip: add helpers to process in-order fragments faster. ip: process in-order fragments efficiently ip: frags: fix crash in ip_do_fragment() ipv4: frags: precedence bug in ip_expire() inet: frags: better deal with smp races net: fix pskb_trim_rcsum_slow() with odd trim offset net: ipv4: do not handle duplicate fragments as overlapping rcu: Force boolean subscript for expedited stall warnings Linux 4.4.174 Change-Id: I47eace4f47ffe0bf16b29615d09ed903c40a272b Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Diffstat (limited to 'include')
-rw-r--r--include/linux/rhashtable.h143
-rw-r--r--include/linux/skbuff.h16
-rw-r--r--include/net/inet_frag.h133
-rw-r--r--include/net/ip.h1
-rw-r--r--include/net/ipv6.h26
-rw-r--r--include/uapi/linux/snmp.h1
6 files changed, 186 insertions, 134 deletions
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index e50b31d18462..e97cdfd6cba9 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -133,23 +133,23 @@ struct rhashtable_params {
/**
* struct rhashtable - Hash table handle
* @tbl: Bucket table
- * @nelems: Number of elements in table
* @key_len: Key length for hashfn
* @elasticity: Maximum chain length before rehash
* @p: Configuration parameters
* @run_work: Deferred worker to expand/shrink asynchronously
* @mutex: Mutex to protect current/future table swapping
* @lock: Spin lock to protect walker list
+ * @nelems: Number of elements in table
*/
struct rhashtable {
struct bucket_table __rcu *tbl;
- atomic_t nelems;
unsigned int key_len;
unsigned int elasticity;
struct rhashtable_params p;
struct work_struct run_work;
struct mutex mutex;
spinlock_t lock;
+ atomic_t nelems;
};
/**
@@ -343,7 +343,8 @@ int rhashtable_init(struct rhashtable *ht,
struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
const void *key,
struct rhash_head *obj,
- struct bucket_table *old_tbl);
+ struct bucket_table *old_tbl,
+ void **data);
int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
@@ -514,18 +515,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
}
-/**
- * rhashtable_lookup_fast - search hash table, inlined version
- * @ht: hash table
- * @key: the pointer to the key
- * @params: hash table parameters
- *
- * Computes the hash value for the key and traverses the bucket chain looking
- * for a entry with an identical key. The first matching entry is returned.
- *
- * Returns the first entry on which the compare function returned true.
- */
-static inline void *rhashtable_lookup_fast(
+/* Internal function, do not use. */
+static inline struct rhash_head *__rhashtable_lookup(
struct rhashtable *ht, const void *key,
const struct rhashtable_params params)
{
@@ -537,8 +528,6 @@ static inline void *rhashtable_lookup_fast(
struct rhash_head *he;
unsigned int hash;
- rcu_read_lock();
-
tbl = rht_dereference_rcu(ht->tbl, ht);
restart:
hash = rht_key_hashfn(ht, tbl, key, params);
@@ -547,8 +536,7 @@ restart:
params.obj_cmpfn(&arg, rht_obj(ht, he)) :
rhashtable_compare(&arg, rht_obj(ht, he)))
continue;
- rcu_read_unlock();
- return rht_obj(ht, he);
+ return he;
}
/* Ensure we see any new tables. */
@@ -557,13 +545,64 @@ restart:
tbl = rht_dereference_rcu(tbl->future_tbl, ht);
if (unlikely(tbl))
goto restart;
- rcu_read_unlock();
return NULL;
}
-/* Internal function, please use rhashtable_insert_fast() instead */
-static inline int __rhashtable_insert_fast(
+/**
+ * rhashtable_lookup - search hash table
+ * @ht: hash table
+ * @key: the pointer to the key
+ * @params: hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. The first matching entry is returned.
+ *
+ * This must only be called under the RCU read lock.
+ *
+ * Returns the first entry on which the compare function returned true.
+ */
+static inline void *rhashtable_lookup(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params)
+{
+ struct rhash_head *he = __rhashtable_lookup(ht, key, params);
+
+ return he ? rht_obj(ht, he) : NULL;
+}
+
+/**
+ * rhashtable_lookup_fast - search hash table, without RCU read lock
+ * @ht: hash table
+ * @key: the pointer to the key
+ * @params: hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. The first matching entry is returned.
+ *
+ * Only use this function when you have other mechanisms guaranteeing
+ * that the object won't go away after the RCU read lock is released.
+ *
+ * Returns the first entry on which the compare function returned true.
+ */
+static inline void *rhashtable_lookup_fast(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params)
+{
+ void *obj;
+
+ rcu_read_lock();
+ obj = rhashtable_lookup(ht, key, params);
+ rcu_read_unlock();
+
+ return obj;
+}
+
+/* Internal function, please use rhashtable_insert_fast() instead. This
+ * function returns the existing element already in hashes in there is a clash,
+ * otherwise it returns an error via ERR_PTR().
+ */
+static inline void *__rhashtable_insert_fast(
struct rhashtable *ht, const void *key, struct rhash_head *obj,
const struct rhashtable_params params)
{
@@ -576,6 +615,7 @@ static inline int __rhashtable_insert_fast(
spinlock_t *lock;
unsigned int elasticity;
unsigned int hash;
+ void *data = NULL;
int err;
restart:
@@ -600,11 +640,14 @@ restart:
new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
if (unlikely(new_tbl)) {
- tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
+ tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data);
if (!IS_ERR_OR_NULL(tbl))
goto slow_path;
err = PTR_ERR(tbl);
+ if (err == -EEXIST)
+ err = 0;
+
goto out;
}
@@ -618,25 +661,25 @@ slow_path:
err = rhashtable_insert_rehash(ht, tbl);
rcu_read_unlock();
if (err)
- return err;
+ return ERR_PTR(err);
goto restart;
}
- err = -EEXIST;
+ err = 0;
elasticity = ht->elasticity;
rht_for_each(head, tbl, hash) {
if (key &&
unlikely(!(params.obj_cmpfn ?
params.obj_cmpfn(&arg, rht_obj(ht, head)) :
- rhashtable_compare(&arg, rht_obj(ht, head)))))
+ rhashtable_compare(&arg, rht_obj(ht, head))))) {
+ data = rht_obj(ht, head);
goto out;
+ }
if (!--elasticity)
goto slow_path;
}
- err = 0;
-
head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
RCU_INIT_POINTER(obj->next, head);
@@ -651,7 +694,7 @@ out:
spin_unlock_bh(lock);
rcu_read_unlock();
- return err;
+ return err ? ERR_PTR(err) : data;
}
/**
@@ -674,7 +717,13 @@ static inline int rhashtable_insert_fast(
struct rhashtable *ht, struct rhash_head *obj,
const struct rhashtable_params params)
{
- return __rhashtable_insert_fast(ht, NULL, obj, params);
+ void *ret;
+
+ ret = __rhashtable_insert_fast(ht, NULL, obj, params);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
}
/**
@@ -703,11 +752,15 @@ static inline int rhashtable_lookup_insert_fast(
const struct rhashtable_params params)
{
const char *key = rht_obj(ht, obj);
+ void *ret;
BUG_ON(ht->p.obj_hashfn);
- return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
- params);
+ ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
}
/**
@@ -736,6 +789,32 @@ static inline int rhashtable_lookup_insert_key(
struct rhashtable *ht, const void *key, struct rhash_head *obj,
const struct rhashtable_params params)
{
+ void *ret;
+
+ BUG_ON(!ht->p.obj_hashfn || !key);
+
+ ret = __rhashtable_insert_fast(ht, key, obj, params);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
+}
+
+/**
+ * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
+ * @ht: hash table
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ * @data: pointer to element data already in hashes
+ *
+ * Just like rhashtable_lookup_insert_key(), but this function returns the
+ * object if it exists, NULL if it does not and the insertion was successful,
+ * and an ERR_PTR otherwise.
+ */
+static inline void *rhashtable_lookup_get_insert_key(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
BUG_ON(!ht->p.obj_hashfn || !key);
return __rhashtable_insert_fast(ht, key, obj, params);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6d39d81d3c38..502787c29ce9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -556,9 +556,14 @@ struct sk_buff {
struct skb_mstamp skb_mstamp;
};
};
- struct rb_node rbnode; /* used in netem & tcp stack */
+ struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */
};
- struct sock *sk;
+
+ union {
+ struct sock *sk;
+ int ip_defrag_offset;
+ };
+
struct net_device *dev;
/*
@@ -2273,7 +2278,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
kfree_skb(skb);
}
-void skb_rbtree_purge(struct rb_root *root);
+unsigned int skb_rbtree_purge(struct rb_root *root);
void *netdev_alloc_frag(unsigned int fragsz);
@@ -2791,6 +2796,7 @@ static inline unsigned char *skb_push_rcsum(struct sk_buff *skb,
return skb->data;
}
+int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
/**
* pskb_trim_rcsum - trim received skb and update checksum
* @skb: buffer to trim
@@ -2805,9 +2811,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
{
if (likely(len >= skb->len))
return 0;
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = CHECKSUM_NONE;
- return __pskb_trim(skb, len);
+ return pskb_trim_rcsum_slow(skb, len);
}
#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index c26a6e4dc306..6260ec146142 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -1,13 +1,19 @@
#ifndef __NET_FRAG_H__
#define __NET_FRAG_H__
+#include <linux/rhashtable.h>
+
struct netns_frags {
- /* Keep atomic mem on separate cachelines in structs that include it */
- atomic_t mem ____cacheline_aligned_in_smp;
/* sysctls */
+ long high_thresh;
+ long low_thresh;
int timeout;
- int high_thresh;
- int low_thresh;
+ struct inet_frags *f;
+
+ struct rhashtable rhashtable ____cacheline_aligned_in_smp;
+
+ /* Keep atomic mem on separate cachelines in structs that include it */
+ atomic_long_t mem ____cacheline_aligned_in_smp;
};
/**
@@ -23,74 +29,68 @@ enum {
INET_FRAG_COMPLETE = BIT(2),
};
+struct frag_v4_compare_key {
+ __be32 saddr;
+ __be32 daddr;
+ u32 user;
+ u32 vif;
+ __be16 id;
+ u16 protocol;
+};
+
+struct frag_v6_compare_key {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ u32 user;
+ __be32 id;
+ u32 iif;
+};
+
/**
* struct inet_frag_queue - fragment queue
*
- * @lock: spinlock protecting the queue
+ * @node: rhash node
+ * @key: keys identifying this frag.
* @timer: queue expiration timer
- * @list: hash bucket list
+ * @lock: spinlock protecting this frag
* @refcnt: reference count of the queue
* @fragments: received fragments head
+ * @rb_fragments: received fragments rb-tree root
* @fragments_tail: received fragments tail
+ * @last_run_head: the head of the last "run". see ip_fragment.c
* @stamp: timestamp of the last received fragment
* @len: total length of the original datagram
* @meat: length of received fragments so far
* @flags: fragment queue flags
* @max_size: maximum received fragment size
* @net: namespace that this frag belongs to
- * @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
+ * @rcu: rcu head for freeing deferall
*/
struct inet_frag_queue {
- spinlock_t lock;
+ struct rhash_head node;
+ union {
+ struct frag_v4_compare_key v4;
+ struct frag_v6_compare_key v6;
+ } key;
struct timer_list timer;
- struct hlist_node list;
+ spinlock_t lock;
atomic_t refcnt;
- struct sk_buff *fragments;
+ struct sk_buff *fragments; /* Used in IPv6. */
+ struct rb_root rb_fragments; /* Used in IPv4. */
struct sk_buff *fragments_tail;
+ struct sk_buff *last_run_head;
ktime_t stamp;
int len;
int meat;
__u8 flags;
u16 max_size;
- struct netns_frags *net;
- struct hlist_node list_evictor;
-};
-
-#define INETFRAGS_HASHSZ 1024
-
-/* averaged:
- * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ /
- * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
- * struct frag_queue))
- */
-#define INETFRAGS_MAXDEPTH 128
-
-struct inet_frag_bucket {
- struct hlist_head chain;
- spinlock_t chain_lock;
+ struct netns_frags *net;
+ struct rcu_head rcu;
};
struct inet_frags {
- struct inet_frag_bucket hash[INETFRAGS_HASHSZ];
-
- struct work_struct frags_work;
- unsigned int next_bucket;
- unsigned long last_rebuild_jiffies;
- bool rebuild;
-
- /* The first call to hashfn is responsible to initialize
- * rnd. This is best done with net_get_random_once.
- *
- * rnd_seqlock is used to let hash insertion detect
- * when it needs to re-lookup the hash chain to use.
- */
- u32 rnd;
- seqlock_t rnd_seqlock;
int qsize;
- unsigned int (*hashfn)(const struct inet_frag_queue *);
- bool (*match)(const struct inet_frag_queue *q,
- const void *arg);
void (*constructor)(struct inet_frag_queue *q,
const void *arg);
void (*destructor)(struct inet_frag_queue *);
@@ -98,56 +98,47 @@ struct inet_frags {
void (*frag_expire)(unsigned long data);
struct kmem_cache *frags_cachep;
const char *frags_cache_name;
+ struct rhashtable_params rhash_params;
};
int inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *);
-static inline void inet_frags_init_net(struct netns_frags *nf)
+static inline int inet_frags_init_net(struct netns_frags *nf)
{
- atomic_set(&nf->mem, 0);
+ atomic_long_set(&nf->mem, 0);
+ return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
}
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
+void inet_frags_exit_net(struct netns_frags *nf);
-void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f);
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key, unsigned int hash);
+void inet_frag_kill(struct inet_frag_queue *q);
+void inet_frag_destroy(struct inet_frag_queue *q);
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
- const char *prefix);
+/* Free all skbs in the queue; return the sum of their truesizes. */
+unsigned int inet_frag_rbtree_purge(struct rb_root *root);
-static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
+static inline void inet_frag_put(struct inet_frag_queue *q)
{
if (atomic_dec_and_test(&q->refcnt))
- inet_frag_destroy(q, f);
-}
-
-static inline bool inet_frag_evicting(struct inet_frag_queue *q)
-{
- return !hlist_unhashed(&q->list_evictor);
+ inet_frag_destroy(q);
}
/* Memory Tracking Functions. */
-static inline int frag_mem_limit(struct netns_frags *nf)
-{
- return atomic_read(&nf->mem);
-}
-
-static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
+static inline long frag_mem_limit(const struct netns_frags *nf)
{
- atomic_sub(i, &nf->mem);
+ return atomic_long_read(&nf->mem);
}
-static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
{
- atomic_add(i, &nf->mem);
+ atomic_long_sub(val, &nf->mem);
}
-static inline int sum_frag_mem_limit(struct netns_frags *nf)
+static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
{
- return atomic_read(&nf->mem);
+ atomic_long_add(val, &nf->mem);
}
/* RFC 3168 support :
diff --git a/include/net/ip.h b/include/net/ip.h
index 10664a684acf..47651d8b51de 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -525,7 +525,6 @@ static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *s
return skb;
}
#endif
-int ip_frag_mem(struct net *net);
/*
* Functions provided by ip_forward.c
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 0e01d570fa22..c07cf9596b6f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -320,13 +320,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev)
idev->cnf.accept_ra;
}
-#if IS_ENABLED(CONFIG_IPV6)
-static inline int ip6_frag_mem(struct net *net)
-{
- return sum_frag_mem_limit(&net->ipv6.frags);
-}
-#endif
-
#define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */
#define IPV6_FRAG_LOW_THRESH (3 * 1024*1024) /* 3145728 */
#define IPV6_FRAG_TIMEOUT (60 * HZ) /* 60 seconds */
@@ -505,17 +498,8 @@ enum ip6_defrag_users {
__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
};
-struct ip6_create_arg {
- __be32 id;
- u32 user;
- const struct in6_addr *src;
- const struct in6_addr *dst;
- int iif;
- u8 ecn;
-};
-
void ip6_frag_init(struct inet_frag_queue *q, const void *a);
-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
+extern const struct rhashtable_params ip6_rhash_params;
/*
* Equivalent of ipv4 struct ip
@@ -523,19 +507,13 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
struct frag_queue {
struct inet_frag_queue q;
- __be32 id; /* fragment id */
- u32 user;
- struct in6_addr saddr;
- struct in6_addr daddr;
-
int iif;
unsigned int csum;
__u16 nhoffset;
u8 ecn;
};
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
- struct inet_frags *frags);
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
static inline bool ipv6_addr_any(const struct in6_addr *a)
{
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 25a9ad8bcef1..9de808ebce05 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -55,6 +55,7 @@ enum
IPSTATS_MIB_ECT1PKTS, /* InECT1Pkts */
IPSTATS_MIB_ECT0PKTS, /* InECT0Pkts */
IPSTATS_MIB_CEPKTS, /* InCEPkts */
+ IPSTATS_MIB_REASM_OVERLAPS, /* ReasmOverlaps */
__IPSTATS_MIB_MAX
};