From 8afd351c772d1815347d5c46716b099fde00a579 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sun, 16 Dec 2007 13:29:36 -0800 Subject: [NETNS]: Add the netns_ipv4 struct The ipv4 will store its parameters inside this structure. This one is empty now, but it will be eventually filled. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/net/netns/ipv4.h (limited to 'include/net/netns/ipv4.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h new file mode 100644 index 000000000000..ce830d57c35c --- /dev/null +++ b/include/net/netns/ipv4.h @@ -0,0 +1,9 @@ +/* + * ipv4 in net namespaces + */ + +#ifndef __NETNS_IPV4_H__ +#define __NETNS_IPV4_H__ +struct netns_ipv4 { +}; +#endif -- cgit v1.2.3 From 752d14dc6aa9d0fc8f3b25e5052596fb549e5157 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sun, 16 Dec 2007 13:31:47 -0800 Subject: [IPV4]: Move the devinet pointers on the struct net This is the core. Add all and default pointers on the netns_ipv4 and register a new pernet subsys to initialize them. Also add the ctl_table_header to register the net.ipv4.ip_forward ctl. I don't allocate additional memory for init_net, but use global devinets. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 6 +++ net/ipv4/devinet.c | 105 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 102 insertions(+), 9 deletions(-) (limited to 'include/net/netns/ipv4.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ce830d57c35c..e06d7cfd6d64 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -4,6 +4,12 @@ #ifndef __NETNS_IPV4_H__ #define __NETNS_IPV4_H__ +struct ctl_table_header; +struct ipv4_devconf; + struct netns_ipv4 { + struct ctl_table_header *forw_hdr; + struct ipv4_devconf *devconf_all; + struct ipv4_devconf *devconf_dflt; }; #endif diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 8b2a44c4f82a..a2d48173828a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -62,6 +62,7 @@ #include #include #include +#include struct ipv4_devconf ipv4_devconf = { .data = { @@ -1497,7 +1498,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name; - t->sysctl_header = register_sysctl_paths(devinet_ctl_path, + t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, t->devinet_vars); if (!t->sysctl_header) goto free_procname; @@ -1557,27 +1558,113 @@ static struct ctl_table ctl_forward_entry[] = { { }, }; -static __initdata struct ctl_path net_ipv4_path[] = { +static __net_initdata struct ctl_path net_ipv4_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "ipv4", .ctl_name = NET_IPV4, }, { }, }; +static __net_init int devinet_init_net(struct net *net) +{ + int err; + struct ctl_table *tbl; + struct ipv4_devconf *all, *dflt; + struct ctl_table_header *forw_hdr; + + err = -ENOMEM; + all = &ipv4_devconf; + dflt = &ipv4_devconf_dflt; + tbl = ctl_forward_entry; + + if (net != &init_net) { + all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); + if (all == NULL) + goto err_alloc_all; + + dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); + if (dflt == NULL) + goto err_alloc_dflt; + + tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); + if (tbl == NULL) + goto err_alloc_ctl; + + tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1]; + tbl[0].extra1 = all; + tbl[0].extra2 = net; + } + +#ifdef CONFIG_SYSCTL + err = __devinet_sysctl_register(net, "all", + NET_PROTO_CONF_ALL, all); + if (err < 0) + goto err_reg_all; + + err = __devinet_sysctl_register(net, "default", + NET_PROTO_CONF_DEFAULT, dflt); + if (err < 0) + goto err_reg_dflt; + + err = -ENOMEM; + forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl); + if (forw_hdr == NULL) + goto err_reg_ctl; +#endif + + net->ipv4.forw_hdr = forw_hdr; + net->ipv4.devconf_all = all; + net->ipv4.devconf_dflt = dflt; + return 0; + +#ifdef CONFIG_SYSCTL +err_reg_ctl: + __devinet_sysctl_unregister(dflt); +err_reg_dflt: + __devinet_sysctl_unregister(all); +err_reg_all: + if (tbl != ctl_forward_entry) + kfree(tbl); +#endif +err_alloc_ctl: + if (dflt != &ipv4_devconf_dflt) + kfree(dflt); +err_alloc_dflt: + if (all != &ipv4_devconf) + kfree(all); +err_alloc_all: + return err; +} + +static __net_exit void devinet_exit_net(struct net *net) +{ + struct ctl_table *tbl; + + tbl = net->ipv4.forw_hdr->ctl_table_arg; +#ifdef CONFIG_SYSCTL + unregister_net_sysctl_table(net->ipv4.forw_hdr); + __devinet_sysctl_unregister(net->ipv4.devconf_dflt); + __devinet_sysctl_unregister(net->ipv4.devconf_all); +#endif + kfree(tbl); + kfree(net->ipv4.devconf_dflt); + kfree(net->ipv4.devconf_all); +} + +static __net_initdata struct pernet_operations devinet_ops = { + .init = devinet_init_net, + .exit = devinet_exit_net, +}; + void __init devinet_init(void) { + register_pernet_subsys(&devinet_ops); + register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); -#ifdef CONFIG_SYSCTL - __devinet_sysctl_register(&init_net, "all", NET_PROTO_CONF_ALL, - &ipv4_devconf); - __devinet_sysctl_register(&init_net, "default", NET_PROTO_CONF_DEFAULT, - &ipv4_devconf_dflt); - register_sysctl_paths(net_ipv4_path, ctl_forward_entry); -#endif } EXPORT_SYMBOL(in_dev_finish_destroy); -- cgit v1.2.3 From 2a75de0c1de2dde9ef41aeb45a21048681421b8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 5 Jan 2008 23:08:49 -0800 Subject: [NETNS]: Should build with CONFIG_SYSCTL=n Previous NETNS patches broke CONFIG_SYSCTL=n case Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ net/ipv4/devinet.c | 17 ++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include/net/netns/ipv4.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e06d7cfd6d64..61a28ff9440a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -8,7 +8,9 @@ struct ctl_table_header; struct ipv4_devconf; struct netns_ipv4 { +#ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; +#endif struct ipv4_devconf *devconf_all; struct ipv4_devconf *devconf_dflt; }; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 44cb252d2f61..03db15b10309 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1542,7 +1542,6 @@ static void devinet_sysctl_unregister(struct in_device *idev) __devinet_sysctl_unregister(&idev->cnf); neigh_sysctl_unregister(idev->arp_parms); } -#endif static struct ctl_table ctl_forward_entry[] = { { @@ -1565,18 +1564,20 @@ static __net_initdata struct ctl_path net_ipv4_path[] = { { .procname = "ipv4", .ctl_name = NET_IPV4, }, { }, }; +#endif static __net_init int devinet_init_net(struct net *net) { int err; - struct ctl_table *tbl; struct ipv4_devconf *all, *dflt; +#ifdef CONFIG_SYSCTL + struct ctl_table *tbl = ctl_forward_entry; struct ctl_table_header *forw_hdr; +#endif err = -ENOMEM; all = &ipv4_devconf; dflt = &ipv4_devconf_dflt; - tbl = ctl_forward_entry; if (net != &init_net) { all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); @@ -1587,6 +1588,7 @@ static __net_init int devinet_init_net(struct net *net) if (dflt == NULL) goto err_alloc_dflt; +#ifdef CONFIG_SYSCTL tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); if (tbl == NULL) goto err_alloc_ctl; @@ -1594,6 +1596,7 @@ static __net_init int devinet_init_net(struct net *net) tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1]; tbl[0].extra1 = all; tbl[0].extra2 = net; +#endif } #ifdef CONFIG_SYSCTL @@ -1611,9 +1614,9 @@ static __net_init int devinet_init_net(struct net *net) forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl); if (forw_hdr == NULL) goto err_reg_ctl; + net->ipv4.forw_hdr = forw_hdr; #endif - net->ipv4.forw_hdr = forw_hdr; net->ipv4.devconf_all = all; net->ipv4.devconf_dflt = dflt; return 0; @@ -1626,8 +1629,8 @@ err_reg_dflt: err_reg_all: if (tbl != ctl_forward_entry) kfree(tbl); -#endif err_alloc_ctl: +#endif if (dflt != &ipv4_devconf_dflt) kfree(dflt); err_alloc_dflt: @@ -1639,15 +1642,15 @@ err_alloc_all: static __net_exit void devinet_exit_net(struct net *net) { +#ifdef CONFIG_SYSCTL struct ctl_table *tbl; tbl = net->ipv4.forw_hdr->ctl_table_arg; -#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(net->ipv4.forw_hdr); __devinet_sysctl_unregister(net->ipv4.devconf_dflt); __devinet_sysctl_unregister(net->ipv4.devconf_all); -#endif kfree(tbl); +#endif kfree(net->ipv4.devconf_dflt); kfree(net->ipv4.devconf_all); } -- cgit v1.2.3 From e4e4971c5f8b70daccdd401132a81b723dc8337e Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 10 Jan 2008 03:27:51 -0800 Subject: [NETNS]: Namespacing IPv4 fib rules. The final trick for rules: place fib4_rules_ops into struct net and modify initialization path for this. Acked-by: Benjamin Thery Acked-by: Daniel Lezcano Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 5 +++++ net/ipv4/fib_rules.c | 44 ++++++++++++++++++++++++-------------------- 2 files changed, 29 insertions(+), 20 deletions(-) (limited to 'include/net/netns/ipv4.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 61a28ff9440a..a7bd5d83e43e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -4,8 +4,10 @@ #ifndef __NETNS_IPV4_H__ #define __NETNS_IPV4_H__ + struct ctl_table_header; struct ipv4_devconf; +struct fib_rules_ops; struct netns_ipv4 { #ifdef CONFIG_SYSCTL @@ -13,5 +15,8 @@ struct netns_ipv4 { #endif struct ipv4_devconf *devconf_all; struct ipv4_devconf *devconf_dflt; +#ifdef CONFIG_IP_MULTIPLE_TABLES + struct fib_rules_ops *rules_ops; +#endif }; #endif diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 49819fe7e4c3..72232ab4ecb1 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -32,8 +32,6 @@ #include #include -static struct fib_rules_ops fib4_rules_ops; - struct fib4_rule { struct fib_rule common; @@ -63,7 +61,7 @@ int fib_lookup(struct flowi *flp, struct fib_result *res) }; int err; - err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(init_net.ipv4.rules_ops, flp, 0, &arg); res->r = arg.rule; return err; @@ -149,6 +147,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlmsghdr *nlh, struct fib_rule_hdr *frh, struct nlattr **tb) { + struct net *net = skb->sk->sk_net; int err = -EINVAL; struct fib4_rule *rule4 = (struct fib4_rule *) rule; @@ -159,7 +158,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (rule->action == FR_ACT_TO_TBL) { struct fib_table *table; - table = fib_empty_table(&init_net); + table = fib_empty_table(net); if (table == NULL) { err = -ENOBUFS; goto errout; @@ -250,9 +249,9 @@ static u32 fib4_rule_default_pref(struct fib_rules_ops *ops) struct list_head *pos; struct fib_rule *rule; - if (!list_empty(&fib4_rules_ops.rules_list)) { - pos = fib4_rules_ops.rules_list.next; - if (pos->next != &fib4_rules_ops.rules_list) { + if (!list_empty(&ops->rules_list)) { + pos = ops->rules_list.next; + if (pos->next != &ops->rules_list) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; @@ -274,7 +273,7 @@ static void fib4_rule_flush_cache(void) rt_cache_flush(-1); } -static struct fib_rules_ops fib4_rules_ops = { +static struct fib_rules_ops fib4_rules_ops_template = { .family = AF_INET, .rule_size = sizeof(struct fib4_rule), .addr_size = sizeof(u32), @@ -288,24 +287,20 @@ static struct fib_rules_ops fib4_rules_ops = { .flush_cache = fib4_rule_flush_cache, .nlgroup = RTNLGRP_IPV4_RULE, .policy = fib4_rule_policy, - .rules_list = LIST_HEAD_INIT(fib4_rules_ops.rules_list), .owner = THIS_MODULE, }; -static int __init fib_default_rules_init(void) +static int fib_default_rules_init(struct fib_rules_ops *ops) { int err; - err = fib_default_rule_add(&fib4_rules_ops, 0, - RT_TABLE_LOCAL, FIB_RULE_PERMANENT); + err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, FIB_RULE_PERMANENT); if (err < 0) return err; - err = fib_default_rule_add(&fib4_rules_ops, 0x7FFE, - RT_TABLE_MAIN, 0); + err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0); if (err < 0) return err; - err = fib_default_rule_add(&fib4_rules_ops, 0x7FFF, - RT_TABLE_DEFAULT, 0); + err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT, 0); if (err < 0) return err; return 0; @@ -314,20 +309,29 @@ static int __init fib_default_rules_init(void) int __net_init fib4_rules_init(struct net *net) { int err; + struct fib_rules_ops *ops; + + ops = kmemdup(&fib4_rules_ops_template, sizeof(*ops), GFP_KERNEL); + if (ops == NULL) + return -ENOMEM; + INIT_LIST_HEAD(&ops->rules_list); + fib_rules_register(net, ops); - fib_rules_register(net, &fib4_rules_ops); - err = fib_default_rules_init(); + err = fib_default_rules_init(ops); if (err < 0) goto fail; + net->ipv4.rules_ops = ops; return 0; fail: /* also cleans all rules already added */ - fib_rules_unregister(net, &fib4_rules_ops); + fib_rules_unregister(net, ops); + kfree(ops); return err; } void __net_exit fib4_rules_exit(struct net *net) { - fib_rules_unregister(net, &fib4_rules_ops); + fib_rules_unregister(net, net->ipv4.rules_ops); + kfree(net->ipv4.rules_ops); } -- cgit v1.2.3 From e4aef8aea31e6fc61b33a57120968a6e9824d138 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 10 Jan 2008 03:28:24 -0800 Subject: [NETNS]: Place fib tables into netns. The preparatory work has been done. All we need is to substitute fib_table_hash with net->ipv4.fib_table_hash. Netns context is available when required. Acked-by: Benjamin Thery Acked-by: Daniel Lezcano Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ip_fib.h | 6 ++---- include/net/netns/ipv4.h | 2 ++ net/ipv4/fib_frontend.c | 36 ++++++++++++++++++++++++------------ 3 files changed, 28 insertions(+), 16 deletions(-) (limited to 'include/net/netns/ipv4.h') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index dfb95d732aa5..025b225181e1 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -120,8 +120,6 @@ struct fib_result_nl { int err; }; -extern struct hlist_head fib_table_hash[]; - #ifdef CONFIG_IP_ROUTE_MULTIPATH #define FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel]) @@ -170,8 +168,8 @@ static inline struct fib_table *fib_get_table(struct net *net, u32 id) struct hlist_head *ptr; ptr = id == RT_TABLE_LOCAL ? - &fib_table_hash[TABLE_LOCAL_INDEX] : - &fib_table_hash[TABLE_MAIN_INDEX]; + &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX] : + &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]; return hlist_entry(ptr->first, struct fib_table, tb_hlist); } diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a7bd5d83e43e..2dd31937d022 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -8,6 +8,7 @@ struct ctl_table_header; struct ipv4_devconf; struct fib_rules_ops; +struct hlist_head; struct netns_ipv4 { #ifdef CONFIG_SYSCTL @@ -18,5 +19,6 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; #endif + struct hlist_head *fib_table_hash; }; #endif diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 90aa05f47f8e..7fe54a3c168b 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -50,7 +50,6 @@ #define FFprint(a...) printk(KERN_DEBUG a) static struct sock *fibnl; -struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -67,9 +66,9 @@ static int __net_init fib4_rules_init(struct net *net) goto fail; hlist_add_head_rcu(&local_table->tb_hlist, - &fib_table_hash[TABLE_LOCAL_INDEX]); + &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]); hlist_add_head_rcu(&main_table->tb_hlist, - &fib_table_hash[TABLE_MAIN_INDEX]); + &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]); return 0; fail: @@ -92,7 +91,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (!tb) return NULL; h = id & (FIB_TABLE_HASHSZ - 1); - hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); + hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); return tb; } @@ -100,13 +99,16 @@ struct fib_table *fib_get_table(struct net *net, u32 id) { struct fib_table *tb; struct hlist_node *node; + struct hlist_head *head; unsigned int h; if (id == 0) id = RT_TABLE_MAIN; h = id & (FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); - hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { + head = &net->ipv4.fib_table_hash[h]; + hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { if (tb->tb_id == id) { rcu_read_unlock(); return tb; @@ -117,15 +119,17 @@ struct fib_table *fib_get_table(struct net *net, u32 id) } #endif /* CONFIG_IP_MULTIPLE_TABLES */ -static void fib_flush(void) +static void fib_flush(struct net *net) { int flushed = 0; struct fib_table *tb; struct hlist_node *node; + struct hlist_head *head; unsigned int h; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) + head = &net->ipv4.fib_table_hash[h]; + hlist_for_each_entry(tb, node, head, tb_hlist) flushed += tb->tb_flush(tb); } @@ -620,6 +624,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) unsigned int e = 0, s_e; struct fib_table *tb; struct hlist_node *node; + struct hlist_head *head; int dumped = 0; if (net != &init_net) @@ -634,7 +639,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { + head = &net->ipv4.fib_table_hash[h]; + hlist_for_each_entry(tb, node, head, tb_hlist) { if (e < s_e) goto next; if (dumped) @@ -797,7 +803,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) for stray nexthop entries, then ignite fib_flush. */ if (fib_sync_down(ifa->ifa_local, NULL, 0)) - fib_flush(); + fib_flush(&init_net); } } #undef LOCAL_OK @@ -882,7 +888,7 @@ static void nl_fib_lookup_exit(struct net *net) static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down(0, dev, force)) - fib_flush(); + fib_flush(&init_net); rt_cache_flush(0); arp_ifdown(dev); } @@ -963,8 +969,13 @@ static int __net_init ip_fib_net_init(struct net *net) { unsigned int i; + net->ipv4.fib_table_hash = kzalloc( + sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); + if (net->ipv4.fib_table_hash == NULL) + return -ENOMEM; + for (i = 0; i < FIB_TABLE_HASHSZ; i++) - INIT_HLIST_HEAD(&fib_table_hash[i]); + INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); return fib4_rules_init(net); } @@ -982,13 +993,14 @@ static void __net_exit ip_fib_net_exit(struct net *net) struct hlist_head *head; struct hlist_node *node, *tmp; - head = &fib_table_hash[i]; + head = &net->ipv4.fib_table_hash[i]; hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { hlist_del(node); tb->tb_flush(tb); kfree(tb); } } + kfree(net->ipv4.fib_table_hash); } static int __net_init fib_net_init(struct net *net) -- cgit v1.2.3 From 6bd48fcf73019219495f7599028296c65b749bb4 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 10 Jan 2008 03:28:55 -0800 Subject: [NETNS]: Provide correct namespace for fibnl netlink socket. This patch makes the netlink socket to be per namespace. That allows to have each namespace its own socket for routing queries. Acked-by: Benjamin Thery Acked-by: Daniel Lezcano Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ net/ipv4/fib_frontend.c | 24 ++++++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include/net/netns/ipv4.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2dd31937d022..3872aa7e294b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -9,6 +9,7 @@ struct ctl_table_header; struct ipv4_devconf; struct fib_rules_ops; struct hlist_head; +struct sock; struct netns_ipv4 { #ifdef CONFIG_SYSCTL @@ -20,5 +21,6 @@ struct netns_ipv4 { struct fib_rules_ops *rules_ops; #endif struct hlist_head *fib_table_hash; + struct sock *fibnl; }; #endif diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7fe54a3c168b..a5e81677a2b5 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -49,8 +49,6 @@ #define FFprint(a...) printk(KERN_DEBUG a) -static struct sock *fibnl; - #ifndef CONFIG_IP_MULTIPLE_TABLES static int __net_init fib4_rules_init(struct net *net) @@ -845,11 +843,13 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) static void nl_fib_input(struct sk_buff *skb) { + struct net *net; struct fib_result_nl *frn; struct nlmsghdr *nlh; struct fib_table *tb; u32 pid; + net = skb->sk->sk_net; nlh = nlmsg_hdr(skb); if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) @@ -861,28 +861,36 @@ static void nl_fib_input(struct sk_buff *skb) nlh = nlmsg_hdr(skb); frn = (struct fib_result_nl *) NLMSG_DATA(nlh); - tb = fib_get_table(&init_net, frn->tb_id_in); + tb = fib_get_table(net, frn->tb_id_in); nl_fib_lookup(frn, tb); pid = NETLINK_CB(skb).pid; /* pid of sending process */ NETLINK_CB(skb).pid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT); + netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); } static int nl_fib_lookup_init(struct net *net) { - fibnl = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, - nl_fib_input, NULL, THIS_MODULE); - if (fibnl == NULL) + struct sock *sk; + sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, + nl_fib_input, NULL, THIS_MODULE); + if (sk == NULL) return -EAFNOSUPPORT; + /* Don't hold an extra reference on the namespace */ + put_net(sk->sk_net); + net->ipv4.fibnl = sk; return 0; } static void nl_fib_lookup_exit(struct net *net) { - sock_put(fibnl); + /* At the last minute lie and say this is a socket for the + * initial network namespace. So the socket will be safe to free. + */ + net->ipv4.fibnl->sk_net = get_net(&init_net); + sock_put(net->ipv4.fibnl); } static void fib_disable_ip(struct net_device *dev, int force) -- cgit v1.2.3 From ac18e7509e7df327e30d6e073a787d922eaf211d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 06:02:14 -0800 Subject: [NETNS][FRAGS]: Make the inet_frag_queue lookup work in namespaces. Since fragment management code is consolidated, we cannot have the pointer from inet_frag_queue to struct net, since we must know what king of fragment this is. So, I introduce the netns_frags structure. This one is currently empty, but will be eventually filled with per-namespace attributes. Each inet_frag_queue is tagged with this one. The conntrack_reasm is not "netns-izated", so it has one static netns_frags instance to keep working in init namespace. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 8 ++++++-- include/net/netns/ipv4.h | 4 ++++ include/net/netns/ipv6.h | 1 + net/ipv4/inet_fragment.c | 27 +++++++++++++++------------ net/ipv4/ip_fragment.c | 8 +++++--- net/ipv6/netfilter/nf_conntrack_reasm.c | 3 ++- net/ipv6/reassembly.c | 8 +++++--- 7 files changed, 38 insertions(+), 21 deletions(-) (limited to 'include/net/netns/ipv4.h') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 954def408975..8ab6df64a12a 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -1,8 +1,12 @@ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ +struct netns_frags { +}; + struct inet_frag_queue { struct hlist_node list; + struct netns_frags *net; struct list_head lru_list; /* lru list member */ spinlock_t lock; atomic_t refcnt; @@ -55,8 +59,8 @@ void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, int *work); int inet_frag_evictor(struct inet_frags *f); -struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, - unsigned int hash); +struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, + struct inet_frags *f, void *key, unsigned int hash); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3872aa7e294b..80680e09443c 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -5,6 +5,8 @@ #ifndef __NETNS_IPV4_H__ #define __NETNS_IPV4_H__ +#include + struct ctl_table_header; struct ipv4_devconf; struct fib_rules_ops; @@ -22,5 +24,7 @@ struct netns_ipv4 { #endif struct hlist_head *fib_table_hash; struct sock *fibnl; + + struct netns_frags frags; }; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 06b4dc034bbd..057c8e473a75 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -30,5 +30,6 @@ struct netns_ipv6 { struct netns_sysctl_ipv6 sysctl; struct ipv6_devconf *devconf_all; struct ipv6_devconf *devconf_dflt; + struct netns_frags frags; }; #endif diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 737910767ff1..158c5f60d023 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -174,8 +174,9 @@ int inet_frag_evictor(struct inet_frags *f) } EXPORT_SYMBOL(inet_frag_evictor); -static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, - struct inet_frags *f, unsigned int hash, void *arg) +static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, + struct inet_frag_queue *qp_in, struct inet_frags *f, + unsigned int hash, void *arg) { struct inet_frag_queue *qp; #ifdef CONFIG_SMP @@ -189,7 +190,7 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, * promoted read lock to write lock. */ hlist_for_each_entry(qp, n, &f->hash[hash], list) { - if (f->match(qp, arg)) { + if (qp->net == nf && f->match(qp, arg)) { atomic_inc(&qp->refcnt); write_unlock(&f->lock); qp_in->last_in |= COMPLETE; @@ -210,7 +211,8 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, return qp; } -static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg) +static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, + struct inet_frags *f, void *arg) { struct inet_frag_queue *q; @@ -223,31 +225,32 @@ static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg) setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); + q->net = nf; return q; } -static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, - void *arg, unsigned int hash) +static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, + struct inet_frags *f, void *arg, unsigned int hash) { struct inet_frag_queue *q; - q = inet_frag_alloc(f, arg); + q = inet_frag_alloc(nf, f, arg); if (q == NULL) return NULL; - return inet_frag_intern(q, f, hash, arg); + return inet_frag_intern(nf, q, f, hash, arg); } -struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, - unsigned int hash) +struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, + struct inet_frags *f, void *key, unsigned int hash) { struct inet_frag_queue *q; struct hlist_node *n; read_lock(&f->lock); hlist_for_each_entry(q, n, &f->hash[hash], list) { - if (f->match(q, key)) { + if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); read_unlock(&f->lock); return q; @@ -255,6 +258,6 @@ struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, } read_unlock(&f->lock); - return inet_frag_create(f, key, hash); + return inet_frag_create(nf, f, key, hash); } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a53463e594b9..56211ef46eed 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -236,7 +236,7 @@ out: /* Find the correct entry in the "incomplete datagrams" queue for * this IP datagram, and create new one, if nothing is found. */ -static inline struct ipq *ip_find(struct iphdr *iph, u32 user) +static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) { struct inet_frag_queue *q; struct ip4_create_arg arg; @@ -246,7 +246,7 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) arg.user = user; hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); - q = inet_frag_find(&ip4_frags, &arg, hash); + q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); if (q == NULL) goto out_nomem; @@ -582,15 +582,17 @@ out_fail: int ip_defrag(struct sk_buff *skb, u32 user) { struct ipq *qp; + struct net *net; IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); + net = skb->dev->nd_net; /* Start by cleaning up the memory. */ if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh) ip_evictor(); /* Lookup (or create) queue header */ - if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { + if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { int ret; spin_lock(&qp->q.lock); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index d631631189b6..18accd4eab06 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -78,6 +78,7 @@ static struct inet_frags_ctl nf_frags_ctl __read_mostly = { }; static struct inet_frags nf_frags; +static struct netns_frags nf_init_frags; #ifdef CONFIG_SYSCTL struct ctl_table nf_ct_ipv6_sysctl_table[] = { @@ -212,7 +213,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.dst = dst; hash = ip6qhashfn(id, src, dst); - q = inet_frag_find(&nf_frags, &arg, hash); + q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); if (q == NULL) goto oom; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 1815ff0cf628..ab2d53b81b70 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -234,7 +234,7 @@ out: } static __inline__ struct frag_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, +fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, struct inet6_dev *idev) { struct inet_frag_queue *q; @@ -246,7 +246,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, arg.dst = dst; hash = ip6qhashfn(id, src, dst); - q = inet_frag_find(&ip6_frags, &arg, hash); + q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); if (q == NULL) goto oom; @@ -568,6 +568,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); + struct net *net; IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); @@ -598,10 +599,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } + net = skb->dev->nd_net; if (atomic_read(&ip6_frags.mem) > init_net.ipv6.sysctl.frags.high_thresh) ip6_evictor(ip6_dst_idev(skb->dst)); - if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, + if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_dst_idev(skb->dst))) != NULL) { int ret; -- cgit v1.2.3 From e4a2d5c2bccd5bd29de5ae4f14ff4448fac9cfc8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 22 Jan 2008 06:08:36 -0800 Subject: [NETNS][FRAGS]: Duplicate sysctl tables for new namespaces. Each namespace has to have own tables to tune their different parameters, so duplicate the tables and register them. All the tables in sub-namespaces are temporarily made read-only. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/netns/ipv6.h | 1 + net/ipv4/ip_fragment.c | 42 +++++++++++++++++++++++++++++++++++++++--- net/ipv6/reassembly.c | 41 ++++++++++++++++++++++++++++++++++++++--- 4 files changed, 79 insertions(+), 6 deletions(-) (limited to 'include/net/netns/ipv4.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 80680e09443c..15a0b052df22 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -16,6 +16,7 @@ struct sock; struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; + struct ctl_table_header *frags_hdr; #endif struct ipv4_devconf *devconf_all; struct ipv4_devconf *devconf_dflt; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 057c8e473a75..87ab56ab93fc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -12,6 +12,7 @@ struct ctl_table_header; struct netns_sysctl_ipv6 { #ifdef CONFIG_SYSCTL struct ctl_table_header *table; + struct ctl_table_header *frags_hdr; #endif struct inet_frags_ctl frags; int bindv6only; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 4f013343cef7..c51e1a11dc6b 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -661,17 +661,53 @@ static struct ctl_table ip4_frags_ctl_table[] = { static int ip4_frags_ctl_register(struct net *net) { + struct ctl_table *table; struct ctl_table_header *hdr; - hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, - ip4_frags_ctl_table); - return hdr == NULL ? -ENOMEM : 0; + table = ip4_frags_ctl_table; + if (net != &init_net) { + table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].mode &= ~0222; + table[1].mode &= ~0222; + table[2].mode &= ~0222; + table[3].mode &= ~0222; + table[4].mode &= ~0222; + } + + hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); + if (hdr == NULL) + goto err_reg; + + net->ipv4.frags_hdr = hdr; + return 0; + +err_reg: + if (net != &init_net) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void ip4_frags_ctl_unregister(struct net *net) +{ + struct ctl_table *table; + + table = net->ipv4.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv4.frags_hdr); + kfree(table); } #else static inline int ip4_frags_ctl_register(struct net *net) { return 0; } + +static inline void ip4_frags_ctl_unregister(struct net *net) +{ +} #endif static int ipv4_frags_init_net(struct net *net) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 241b2cc49bf5..0300dcbf1a75 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -670,17 +670,52 @@ static struct ctl_table ip6_frags_ctl_table[] = { static int ip6_frags_sysctl_register(struct net *net) { + struct ctl_table *table; struct ctl_table_header *hdr; - hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, - ip6_frags_ctl_table); - return hdr == NULL ? -ENOMEM : 0; + table = ip6_frags_ctl_table; + if (net != &init_net) { + table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].mode &= ~0222; + table[1].mode &= ~0222; + table[2].mode &= ~0222; + table[3].mode &= ~0222; + } + + hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table); + if (hdr == NULL) + goto err_reg; + + net->ipv6.sysctl.frags_hdr = hdr; + return 0; + +err_reg: + if (net != &init_net) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void ip6_frags_sysctl_unregister(struct net *net) +{ + struct ctl_table *table; + + table = net->ipv6.sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr); + kfree(table); } #else static inline int ip6_frags_sysctl_register(struct net *net) { return 0; } + +static inline void ip6_frags_sysctl_unregister(struct net *net) +{ +} #endif static int ipv6_frags_init_net(struct net *net) -- cgit v1.2.3