From fe6bea7f1f3a09fc06d835446d34d3b3b6a543fb Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 21 Jul 2015 16:31:53 -0700 Subject: sch_plug: purge buffered packets during reset Otherwise the skbuff related structures are not correctly refcount'ed. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_plug.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/sched') diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 89f8fcf73f18..ade9445a55ab 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -216,6 +216,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = { .peek = qdisc_peek_head, .init = plug_init, .change = plug_change, + .reset = qdisc_reset_queue, .owner = THIS_MODULE, }; -- cgit v1.2.3 From 77e62da6e60c7772971f813f588d372a7f1a4167 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 21 Jul 2015 16:52:43 -0700 Subject: sch_choke: drop all packets in queue during reset Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_choke.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net/sched') diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 93d5742dc7e0..6a783afe4960 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -385,6 +385,19 @@ static void choke_reset(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); + while (q->head != q->tail) { + struct sk_buff *skb = q->tab[q->head]; + + q->head = (q->head + 1) & q->tab_mask; + if (!skb) + continue; + qdisc_qstats_backlog_dec(sch, skb); + --sch->q.qlen; + qdisc_drop(skb, sch); + } + + memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); + q->head = q->tail = 0; red_restart(&q->vars); } -- cgit v1.2.3 From f4eaed28c7834fc049c754f63e6988bbd73778d9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 29 Jul 2015 18:40:56 +0200 Subject: act_bpf: fix memory leaks when replacing bpf programs We currently trigger multiple memory leaks when replacing bpf actions, besides others: comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s) hex dump (first 32 bytes): 01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................ 18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] __vmalloc_node_range+0x1bd/0x2c0 [] __vmalloc+0x4a/0x50 [] bpf_prog_alloc+0x3a/0xa0 [] bpf_prog_create+0x44/0xa0 [] tcf_bpf_init+0x28b/0x3c0 [act_bpf] [] tcf_action_init_1+0x191/0x1b0 [] tcf_action_init+0x82/0xf0 [] tcf_exts_validate+0xb2/0xc0 [] cls_bpf_modify_existing+0x98/0x340 [cls_bpf] [] cls_bpf_change+0x1a6/0x274 [cls_bpf] [] tc_ctl_tfilter+0x335/0x910 [] rtnetlink_rcv_msg+0x95/0x240 [] netlink_rcv_skb+0xaf/0xc0 [] rtnetlink_rcv+0x2e/0x40 [] netlink_unicast+0xef/0x1b0 Issue is that the old content from tcf_bpf is allocated and needs to be released when we replace it. We seem to do that since the beginning of act_bpf on the filter and insns, later on the name as well. Example test case, after patch: # FOO="1,6 0 0 4294967295," # BAR="1,6 0 0 4294967294," # tc actions add action bpf bytecode "$FOO" index 2 # tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 2 ref 1 bind 0 # tc actions replace action bpf bytecode "$BAR" index 2 # tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 2 ref 1 bind 0 # tc actions replace action bpf bytecode "$FOO" index 2 # tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 2 ref 1 bind 0 # tc actions del action bpf index 2 [...] # echo "scan" > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l 0 Fixes: d23b8ad8ab23 ("tc: add BPF based action") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 53 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 18 deletions(-) (limited to 'net/sched') diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1df78289e248..d0edeb7a1950 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -27,9 +27,10 @@ struct tcf_bpf_cfg { struct bpf_prog *filter; struct sock_filter *bpf_ops; - char *bpf_name; + const char *bpf_name; u32 bpf_fd; u16 bpf_num_ops; + bool is_ebpf; }; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, @@ -207,6 +208,7 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_ops = bpf_ops; cfg->bpf_num_ops = bpf_num_ops; cfg->filter = fp; + cfg->is_ebpf = false; return 0; } @@ -241,18 +243,40 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_fd = bpf_fd; cfg->bpf_name = name; cfg->filter = fp; + cfg->is_ebpf = true; return 0; } +static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) +{ + if (cfg->is_ebpf) + bpf_prog_put(cfg->filter); + else + bpf_prog_destroy(cfg->filter); + + kfree(cfg->bpf_ops); + kfree(cfg->bpf_name); +} + +static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, + struct tcf_bpf_cfg *cfg) +{ + cfg->is_ebpf = tcf_bpf_is_ebpf(prog); + cfg->filter = prog->filter; + + cfg->bpf_ops = prog->bpf_ops; + cfg->bpf_name = prog->bpf_name; +} + static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *act, int replace, int bind) { struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; - struct tcf_bpf_cfg cfg; bool is_bpf, is_ebpf; int ret; @@ -301,6 +325,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, prog = to_bpf(act); spin_lock_bh(&prog->tcf_lock); + if (ret != ACT_P_CREATED) + tcf_bpf_prog_fill_cfg(prog, &old); + prog->bpf_ops = cfg.bpf_ops; prog->bpf_name = cfg.bpf_name; @@ -316,32 +343,22 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (ret == ACT_P_CREATED) tcf_hash_insert(act); + else + tcf_bpf_cfg_cleanup(&old); return ret; destroy_fp: - if (is_ebpf) - bpf_prog_put(cfg.filter); - else - bpf_prog_destroy(cfg.filter); - - kfree(cfg.bpf_ops); - kfree(cfg.bpf_name); - + tcf_bpf_cfg_cleanup(&cfg); return ret; } static void tcf_bpf_cleanup(struct tc_action *act, int bind) { - const struct tcf_bpf *prog = act->priv; - - if (tcf_bpf_is_ebpf(prog)) - bpf_prog_put(prog->filter); - else - bpf_prog_destroy(prog->filter); + struct tcf_bpf_cfg tmp; - kfree(prog->bpf_ops); - kfree(prog->bpf_name); + tcf_bpf_prog_fill_cfg(act->priv, &tmp); + tcf_bpf_cfg_cleanup(&tmp); } static struct tc_action_ops act_bpf_ops __read_mostly = { -- cgit v1.2.3 From 28e6b67f0b292f557468c139085303b15f1a678f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 29 Jul 2015 23:35:25 +0200 Subject: net: sched: fix refcount imbalance in actions Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action outside"), we end up with a wrong reference count for a tc action. Test case 1: FOO="1,6 0 0 4294967295," BAR="1,6 0 0 4294967294," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \ action bpf bytecode "$FOO" tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 1 bind 1 tc actions replace action bpf bytecode "$BAR" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 1 ref 2 bind 1 tc actions replace action bpf bytecode "$FOO" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 3 bind 1 Test case 2: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 2 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 3 bind 1 What happens is that in tcf_hash_check(), we check tcf_common for a given index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've found an existing action. Now there are the following cases: 1) We do a late binding of an action. In that case, we leave the tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init() handler. This is correctly handeled. 2) We replace the given action, or we try to add one without replacing and find out that the action at a specific index already exists (thus, we go out with error in that case). In case of 2), we have to undo the reference count increase from tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to do so because of the 'tcfc_bindcnt > 0' check which bails out early with an -EPERM error. Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an already classifier-bound action to drop the reference count (which could then become negative, wrap around etc), this restriction only accounts for invocations outside a specific action's ->init() handler. One possible solution would be to add a flag thus we possibly trigger the -EPERM ony in situations where it is indeed relevant. After the patch, above test cases have correct reference count again. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Daniel Borkmann Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 8 +++++++- net/sched/act_api.c | 11 ++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'net/sched') diff --git a/include/net/act_api.h b/include/net/act_api.h index 3ee4c92afd1b..931738bc5bba 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -99,7 +99,6 @@ struct tc_action_ops { int tcf_hash_search(struct tc_action *a, u32 index); void tcf_hash_destroy(struct tc_action *a); -int tcf_hash_release(struct tc_action *a, int bind); u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); int tcf_hash_check(u32 index, struct tc_action *a, int bind); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, @@ -107,6 +106,13 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); void tcf_hash_insert(struct tc_action *a); +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); + +static inline int tcf_hash_release(struct tc_action *a, bool bind) +{ + return __tcf_hash_release(a, bind, false); +} + int tcf_register_action(struct tc_action_ops *a, unsigned int mask); int tcf_unregister_action(struct tc_action_ops *a); int tcf_action_destroy(struct list_head *actions, int bind); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index af427a3dbcba..43ec92680ae8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -45,7 +45,7 @@ void tcf_hash_destroy(struct tc_action *a) } EXPORT_SYMBOL(tcf_hash_destroy); -int tcf_hash_release(struct tc_action *a, int bind) +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict) { struct tcf_common *p = a->priv; int ret = 0; @@ -53,7 +53,7 @@ int tcf_hash_release(struct tc_action *a, int bind) if (p) { if (bind) p->tcfc_bindcnt--; - else if (p->tcfc_bindcnt > 0) + else if (strict && p->tcfc_bindcnt > 0) return -EPERM; p->tcfc_refcnt--; @@ -64,9 +64,10 @@ int tcf_hash_release(struct tc_action *a, int bind) ret = 1; } } + return ret; } -EXPORT_SYMBOL(tcf_hash_release); +EXPORT_SYMBOL(__tcf_hash_release); static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, struct tc_action *a) @@ -136,7 +137,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; hlist_for_each_entry_safe(p, n, head, tcfc_head) { a->priv = p; - ret = tcf_hash_release(a, 0); + ret = __tcf_hash_release(a, false, true); if (ret == ACT_P_DELETED) { module_put(a->ops->owner); n_i++; @@ -408,7 +409,7 @@ int tcf_action_destroy(struct list_head *actions, int bind) int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = tcf_hash_release(a, bind); + ret = __tcf_hash_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(a->ops->owner); else if (ret < 0) -- cgit v1.2.3 From 5175f7106cc55a1bcf97bf7d5ba0900017ebcef8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 30 Jul 2015 17:12:21 -0700 Subject: act_pedit: check binding before calling tcf_hash_release() When we share an action within a filter, the bind refcnt should increase, therefore we should not call tcf_hash_release(). Fixes: 1a29321ed045 ("net_sched: act: Dont increment refcnt on replace") Cc: Jamal Hadi Salim Cc: Daniel Borkmann Signed-off-by: Cong Wang Signed-off-by: Cong Wang Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/sched') diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 17e6d6669c7f..ff8b466a73f6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -68,13 +68,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; } else { - p = to_pedit(a); - tcf_hash_release(a, bind); if (bind) return 0; + tcf_hash_release(a, bind); if (!ovr) return -EEXIST; - + p = to_pedit(a); if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) -- cgit v1.2.3