diff options
Diffstat (limited to 'net/ipv4')
43 files changed, 684 insertions, 1185 deletions
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f70fac612596..7f9e337e3908 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1234,6 +1234,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) write_lock_bh(&in_dev->mc_list_lock); im->next=in_dev->mc_list; in_dev->mc_list=im; + in_dev->mc_count++; write_unlock_bh(&in_dev->mc_list_lock); #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); @@ -1282,6 +1283,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) if (--i->users == 0) { write_lock_bh(&in_dev->mc_list_lock); *ip = i->next; + in_dev->mc_count--; write_unlock_bh(&in_dev->mc_list_lock); igmp_group_dropped(i); @@ -1330,6 +1332,7 @@ void ip_mc_init_dev(struct in_device *in_dev) setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, (unsigned long)in_dev); in_dev->mr_ifc_count = 0; + in_dev->mc_count = 0; setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; @@ -1369,8 +1372,8 @@ void ip_mc_destroy_dev(struct in_device *in_dev) write_lock_bh(&in_dev->mc_list_lock); while ((i = in_dev->mc_list) != NULL) { in_dev->mc_list = i->next; + in_dev->mc_count--; write_unlock_bh(&in_dev->mc_list_lock); - igmp_group_dropped(i); ip_ma_put(i); @@ -2383,7 +2386,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) if (state->in_dev->mc_list == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", - state->dev->ifindex, state->dev->name, state->dev->mc_count, querier); + state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } seq_printf(seq, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 21fcc5a9045f..bd1278a2d828 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -30,20 +30,22 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif /* - * This array holds the first and last local port number. + * This struct holds the first and last local port number. */ -int sysctl_local_port_range[2] = { 32768, 61000 }; -DEFINE_SEQLOCK(sysctl_port_range_lock); +struct local_ports sysctl_local_ports __read_mostly = { + .lock = SEQLOCK_UNLOCKED, + .range = { 32768, 61000 }, +}; void inet_get_local_port_range(int *low, int *high) { unsigned seq; do { - seq = read_seqbegin(&sysctl_port_range_lock); + seq = read_seqbegin(&sysctl_local_ports.lock); - *low = sysctl_local_port_range[0]; - *high = sysctl_local_port_range[1]; - } while (read_seqretry(&sysctl_port_range_lock, seq)); + *low = sysctl_local_ports.range[0]; + *high = sysctl_local_ports.range[1]; + } while (read_seqretry(&sysctl_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 01671ad51ed3..6efdb70b3eb2 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -12,6 +12,7 @@ /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) { + struct net *net = dev_net(skb->dst->dev); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi fl = {}; @@ -19,7 +20,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) unsigned int hh_len; unsigned int type; - type = inet_addr_type(&init_net, iph->saddr); + type = inet_addr_type(net, iph->saddr); if (skb->sk && inet_sk(skb->sk)->transparent) type = RTN_LOCAL; if (addr_type == RTN_UNSPEC) @@ -36,7 +37,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + if (ip_route_output_key(net, &rt, &fl) != 0) return -1; /* Drop old route. */ @@ -46,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ fl.nl_u.ip4_u.daddr = iph->saddr; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + if (ip_route_output_key(net, &rt, &fl) != 0) return -1; odst = skb->dst; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 90eb7cb47e77..3816e1dc9295 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -5,10 +5,15 @@ menu "IP: Netfilter Configuration" depends on INET && NETFILTER +config NF_DEFRAG_IPV4 + tristate + default n + config NF_CONNTRACK_IPV4 tristate "IPv4 connection tracking support (required for NAT)" depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n + select NF_DEFRAG_IPV4 ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -56,23 +61,30 @@ config IP_NF_IPTABLES To compile it as a module, choose M here. If unsure, say N. +if IP_NF_IPTABLES + # The matches. -config IP_NF_MATCH_RECENT - tristate '"recent" match support' - depends on IP_NF_IPTABLES +config IP_NF_MATCH_ADDRTYPE + tristate '"addrtype" address type match support' depends on NETFILTER_ADVANCED help - This match is used for creating one or many lists of recently - used addresses and then matching against that/those list(s). + This option allows you to match what routing thinks of an address, + eg. UNICAST, LOCAL, BROADCAST, ... - Short options are available by using 'iptables -m recent -h' - Official Website: <http://snowman.net/projects/ipt_recent/> + If you want to compile it as a module, say M here and read + <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. + +config IP_NF_MATCH_AH + tristate '"ah" match support' + depends on NETFILTER_ADVANCED + help + This match extension allows you to match a range of SPIs + inside AH header of IPSec packets. To compile it as a module, choose M here. If unsure, say N. config IP_NF_MATCH_ECN tristate '"ecn" match support' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This option adds a `ECN' match, which allows you to match against @@ -80,19 +92,8 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_AH - tristate '"ah" match support' - depends on IP_NF_IPTABLES - depends on NETFILTER_ADVANCED - help - This match extension allows you to match a range of SPIs - inside AH header of IPSec packets. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_MATCH_TTL tristate '"ttl" match support' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user @@ -100,21 +101,9 @@ config IP_NF_MATCH_TTL To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_ADDRTYPE - tristate '"addrtype" address type match support' - depends on IP_NF_IPTABLES - depends on NETFILTER_ADVANCED - help - This option allows you to match what routing thinks of an address, - eg. UNICAST, LOCAL, BROADCAST, ... - - If you want to compile it as a module, say M here and read - <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. - # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help Packet filtering defines a table `filter', which has a series of @@ -136,7 +125,6 @@ config IP_NF_TARGET_REJECT config IP_NF_TARGET_LOG tristate "LOG target support" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This option adds a `LOG' target, which allows you to create rules in @@ -146,7 +134,6 @@ config IP_NF_TARGET_LOG config IP_NF_TARGET_ULOG tristate "ULOG target support" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n ---help--- @@ -167,7 +154,7 @@ config IP_NF_TARGET_ULOG # NAT + specific targets: nf_conntrack config NF_NAT tristate "Full NAT" - depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4 + depends on NF_CONNTRACK_IPV4 default m if NETFILTER_ADVANCED=n help The Full NAT option allows masquerading, port forwarding and other @@ -194,26 +181,26 @@ config IP_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_REDIRECT - tristate "REDIRECT target support" +config IP_NF_TARGET_NETMAP + tristate "NETMAP target support" depends on NF_NAT depends on NETFILTER_ADVANCED help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_NETMAP - tristate "NETMAP target support" +config IP_NF_TARGET_REDIRECT + tristate "REDIRECT target support" depends on NF_NAT depends on NETFILTER_ADVANCED help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. To compile it as a module, choose M here. If unsure, say N. @@ -262,44 +249,43 @@ config NF_NAT_PROTO_SCTP config NF_NAT_FTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_FTP config NF_NAT_IRC tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_IRC config NF_NAT_TFTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_TFTP config NF_NAT_AMANDA tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_AMANDA config NF_NAT_PPTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_PPTP select NF_NAT_PROTO_GRE config NF_NAT_H323 tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_H323 config NF_NAT_SIP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_SIP # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This option adds a `mangle' table to iptables: see the man page for @@ -308,6 +294,19 @@ config IP_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_CLUSTERIP + tristate "CLUSTERIP target support (EXPERIMENTAL)" + depends on IP_NF_MANGLE && EXPERIMENTAL + depends on NF_CONNTRACK_IPV4 + depends on NETFILTER_ADVANCED + select NF_CONNTRACK_MARK + help + The CLUSTERIP target allows you to build load-balancing clusters of + network servers without having a dedicated load-balancing + router/server/switch. + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_TARGET_ECN tristate "ECN target support" depends on IP_NF_MANGLE @@ -338,23 +337,9 @@ config IP_NF_TARGET_TTL To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_CLUSTERIP - tristate "CLUSTERIP target support (EXPERIMENTAL)" - depends on IP_NF_MANGLE && EXPERIMENTAL - depends on NF_CONNTRACK_IPV4 - depends on NETFILTER_ADVANCED - select NF_CONNTRACK_MARK - help - The CLUSTERIP target allows you to build load-balancing clusters of - network servers without having a dedicated load-balancing - router/server/switch. - - To compile it as a module, choose M here. If unsure, say N. - # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This option adds a `raw' table to iptables. This table is the very @@ -367,7 +352,6 @@ config IP_NF_RAW # security table for MAC policy config IP_NF_SECURITY tristate "Security table" - depends on IP_NF_IPTABLES depends on SECURITY depends on NETFILTER_ADVANCED help @@ -376,6 +360,8 @@ config IP_NF_SECURITY If unsure, say N. +endif # IP_NF_IPTABLES + # ARP tables config IP_NF_ARPTABLES tristate "ARP tables support" @@ -388,9 +374,10 @@ config IP_NF_ARPTABLES To compile it as a module, choose M here. If unsure, say N. +if IP_NF_ARPTABLES + config IP_NF_ARPFILTER tristate "ARP packet filtering" - depends on IP_NF_ARPTABLES help ARP packet filtering defines a table `filter', which has a series of rules for simple ARP packet filtering at local input and @@ -401,10 +388,11 @@ config IP_NF_ARPFILTER config IP_NF_ARP_MANGLE tristate "ARP payload mangling" - depends on IP_NF_ARPTABLES help Allows altering the ARP packet payload: source and destination hardware and network addresses. +endif # IP_NF_ARPTABLES + endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3f31291f37ce..5f9b650d90fc 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -18,6 +18,9 @@ obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o obj-$(CONFIG_NF_NAT) += nf_nat.o +# defrag +obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o + # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o @@ -48,7 +51,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o # targets diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 03e83a65aec5..8d70d29f1ccf 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -200,15 +200,12 @@ static inline int arp_checkentry(const struct arpt_arp *arp) return 1; } -static unsigned int arpt_error(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +arpt_error(struct sk_buff *skb, const struct xt_target_param *par) { if (net_ratelimit()) - printk("arp_tables: error: '%s'\n", (char *)targinfo); + printk("arp_tables: error: '%s'\n", + (const char *)par->targinfo); return NF_DROP; } @@ -232,6 +229,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const char *indev, *outdev; void *table_base; const struct xt_table_info *private; + struct xt_target_param tgpar; if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; @@ -245,6 +243,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); + tgpar.in = in; + tgpar.out = out; + tgpar.hooknum = hook; + tgpar.family = NFPROTO_ARP; + arp = arp_hdr(skb); do { if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { @@ -290,11 +293,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, /* Targets which reenter must return * abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, - in, out, - hook, - t->u.kernel.target, - t->data); + &tgpar); /* Target might have changed stuff. */ arp = arp_hdr(skb); @@ -456,23 +458,24 @@ static inline int check_entry(struct arpt_entry *e, const char *name) static inline int check_target(struct arpt_entry *e, const char *name) { - struct arpt_entry_target *t; - struct xt_target *target; + struct arpt_entry_target *t = arpt_get_target(e); int ret; - - t = arpt_get_target(e); - target = t->u.kernel.target; - - ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t), - name, e->comefrom, 0, 0); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_ARP, + }; + + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); + if (ret < 0) { duprintf("arp_tables: check failed for `%s'.\n", t->u.kernel.target->name); - ret = -EINVAL; + return ret; } - return ret; + return 0; } static inline int @@ -488,7 +491,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, return ret; t = arpt_get_target(e); - target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name, + target = try_then_request_module(xt_find_target(NFPROTO_ARP, + t->u.user.name, t->u.user.revision), "arpt_%s", t->u.user.name); if (IS_ERR(target) || !target) { @@ -554,15 +558,19 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) { + struct xt_tgdtor_param par; struct arpt_entry_target *t; if (i && (*i)-- == 0) return 1; t = arpt_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_ARP; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -788,7 +796,7 @@ static void compat_standard_from_user(void *dst, void *src) int v = *(compat_int_t *)src; if (v > 0) - v += xt_compat_calc_jump(NF_ARP, v); + v += xt_compat_calc_jump(NFPROTO_ARP, v); memcpy(dst, &v, sizeof(v)); } @@ -797,7 +805,7 @@ static int compat_standard_to_user(void __user *dst, void *src) compat_int_t cv = *(int *)src; if (cv > 0) - cv -= xt_compat_calc_jump(NF_ARP, cv); + cv -= xt_compat_calc_jump(NFPROTO_ARP, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } @@ -815,7 +823,7 @@ static int compat_calc_entry(struct arpt_entry *e, t = arpt_get_target(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; - ret = xt_compat_add_offset(NF_ARP, entry_offset, off); + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); if (ret) return ret; @@ -866,9 +874,9 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_COMPAT if (compat) - xt_compat_lock(NF_ARP); + xt_compat_lock(NFPROTO_ARP); #endif - t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); if (t && !IS_ERR(t)) { struct arpt_getinfo info; @@ -878,7 +886,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (compat) { struct xt_table_info tmp; ret = compat_table_info(private, &tmp); - xt_compat_flush_offsets(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); private = &tmp; } #endif @@ -901,7 +909,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) ret = t ? PTR_ERR(t) : -ENOENT; #ifdef CONFIG_COMPAT if (compat) - xt_compat_unlock(NF_ARP); + xt_compat_unlock(NFPROTO_ARP); #endif return ret; } @@ -925,7 +933,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, return -EINVAL; } - t = xt_find_table_lock(net, NF_ARP, get.name); + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (t && !IS_ERR(t)) { const struct xt_table_info *private = t->private; @@ -967,7 +975,7 @@ static int __do_replace(struct net *net, const char *name, goto out; } - t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1134,7 +1142,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - t = xt_find_table_lock(net, NF_ARP, name); + t = xt_find_table_lock(net, NFPROTO_ARP, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1218,7 +1226,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, entry_offset = (void *)e - (void *)base; t = compat_arpt_get_target(e); - target = try_then_request_module(xt_find_target(NF_ARP, + target = try_then_request_module(xt_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision), "arpt_%s", t->u.user.name); @@ -1232,7 +1240,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, off += xt_compat_target_offset(target); *size += off; - ret = xt_compat_add_offset(NF_ARP, entry_offset, off); + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); if (ret) goto release_target; @@ -1333,7 +1341,7 @@ static int translate_compat_table(const char *name, duprintf("translate_compat_table: size %u\n", info->size); j = 0; - xt_compat_lock(NF_ARP); + xt_compat_lock(NFPROTO_ARP); /* Walk through entries, checking offsets. */ ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, check_compat_entry_size_and_hooks, @@ -1383,8 +1391,8 @@ static int translate_compat_table(const char *name, ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_copy_entry_from_user, &pos, &size, name, newinfo, entry1); - xt_compat_flush_offsets(NF_ARP); - xt_compat_unlock(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); if (ret) goto free_newinfo; @@ -1420,8 +1428,8 @@ out: COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); return ret; out_unlock: - xt_compat_flush_offsets(NF_ARP); - xt_compat_unlock(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); goto out; } @@ -1607,8 +1615,8 @@ static int compat_get_entries(struct net *net, return -EINVAL; } - xt_compat_lock(NF_ARP); - t = xt_find_table_lock(net, NF_ARP, get.name); + xt_compat_lock(NFPROTO_ARP); + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (t && !IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1623,13 +1631,13 @@ static int compat_get_entries(struct net *net, private->size, get.size); ret = -EAGAIN; } - xt_compat_flush_offsets(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); module_put(t->me); xt_table_unlock(t); } else ret = t ? PTR_ERR(t) : -ENOENT; - xt_compat_unlock(NF_ARP); + xt_compat_unlock(NFPROTO_ARP); return ret; } @@ -1709,7 +1717,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len break; } - try_then_request_module(xt_find_revision(NF_ARP, rev.name, + try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, rev.revision, 1, &ret), "arpt_%s", rev.name); break; @@ -1787,7 +1795,7 @@ void arpt_unregister_table(struct xt_table *table) static struct xt_target arpt_standard_target __read_mostly = { .name = ARPT_STANDARD_TARGET, .targetsize = sizeof(int), - .family = NF_ARP, + .family = NFPROTO_ARP, #ifdef CONFIG_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, @@ -1799,7 +1807,7 @@ static struct xt_target arpt_error_target __read_mostly = { .name = ARPT_ERROR_TARGET, .target = arpt_error, .targetsize = ARPT_FUNCTION_MAXNAMELEN, - .family = NF_ARP, + .family = NFPROTO_ARP, }; static struct nf_sockopt_ops arpt_sockopts = { @@ -1821,12 +1829,12 @@ static struct nf_sockopt_ops arpt_sockopts = { static int __net_init arp_tables_net_init(struct net *net) { - return xt_proto_init(net, NF_ARP); + return xt_proto_init(net, NFPROTO_ARP); } static void __net_exit arp_tables_net_exit(struct net *net) { - xt_proto_fini(net, NF_ARP); + xt_proto_fini(net, NFPROTO_ARP); } static struct pernet_operations arp_tables_net_ops = { diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a385959d2655..b0d5b1d0a769 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -9,12 +9,9 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int -target(struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - unsigned int hooknum, const struct xt_target *target, - const void *targinfo) +target(struct sk_buff *skb, const struct xt_target_param *par) { - const struct arpt_mangle *mangle = targinfo; + const struct arpt_mangle *mangle = par->targinfo; const struct arphdr *arp; unsigned char *arpptr; int pln, hln; @@ -57,11 +54,9 @@ target(struct sk_buff *skb, return mangle->target; } -static bool -checkentry(const char *tablename, const void *e, const struct xt_target *target, - void *targinfo, unsigned int hook_mask) +static bool checkentry(const struct xt_tgchk_param *par) { - const struct arpt_mangle *mangle = targinfo; + const struct arpt_mangle *mangle = par->targinfo; if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) @@ -75,7 +70,7 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target, static struct xt_target arpt_mangle_reg __read_mostly = { .name = "mangle", - .family = NF_ARP, + .family = NFPROTO_ARP, .target = target, .targetsize = sizeof(struct arpt_mangle), .checkentry = checkentry, diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 082f5dd3156c..bee3d117661a 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -51,7 +51,7 @@ static struct xt_table packet_filter = { .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), .private = NULL, .me = THIS_MODULE, - .af = NF_ARP, + .af = NFPROTO_ARP, }; /* The work comes in here from netfilter.c */ @@ -89,21 +89,21 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = { { .hook = arpt_in_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_IN, .priority = NF_IP_PRI_FILTER, }, { .hook = arpt_out_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, .priority = NF_IP_PRI_FILTER, }, { .hook = arpt_forward_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_FORWARD, .priority = NF_IP_PRI_FILTER, }, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4e7c719445c2..213fb27debc1 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -171,31 +171,25 @@ ip_checkentry(const struct ipt_ip *ip) } static unsigned int -ipt_error(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +ipt_error(struct sk_buff *skb, const struct xt_target_param *par) { if (net_ratelimit()) - printk("ip_tables: error: `%s'\n", (char *)targinfo); + printk("ip_tables: error: `%s'\n", + (const char *)par->targinfo); return NF_DROP; } /* Performance critical - called for every packet */ static inline bool -do_match(struct ipt_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - bool *hotdrop) +do_match(struct ipt_entry_match *m, const struct sk_buff *skb, + struct xt_match_param *par) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + /* Stop iteration if it doesn't match */ - if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, - offset, ip_hdrlen(skb), hotdrop)) + if (!m->u.kernel.match->match(skb, par)) return true; else return false; @@ -326,7 +320,6 @@ ipt_do_table(struct sk_buff *skb, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); - u_int16_t offset; const struct iphdr *ip; u_int16_t datalen; bool hotdrop = false; @@ -336,6 +329,8 @@ ipt_do_table(struct sk_buff *skb, void *table_base; struct ipt_entry *e, *back; struct xt_table_info *private; + struct xt_match_param mtpar; + struct xt_target_param tgpar; /* Initialization */ ip = ip_hdr(skb); @@ -348,7 +343,13 @@ ipt_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ - offset = ntohs(ip->frag_off) & IP_OFFSET; + mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; + mtpar.thoff = ip_hdrlen(skb); + mtpar.hotdrop = &hotdrop; + mtpar.in = tgpar.in = in; + mtpar.out = tgpar.out = out; + mtpar.family = tgpar.family = NFPROTO_IPV4; + tgpar.hooknum = hook; read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); @@ -362,12 +363,11 @@ ipt_do_table(struct sk_buff *skb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { + if (ip_packet_match(ip, indev, outdev, + &e->ip, mtpar.fragoff)) { struct ipt_entry_target *t; - if (IPT_MATCH_ITERATE(e, do_match, - skb, in, out, - offset, &hotdrop) != 0) + if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) goto no_match; ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); @@ -413,16 +413,14 @@ ipt_do_table(struct sk_buff *skb, } else { /* Targets which reenter must return abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; #ifdef CONFIG_NETFILTER_DEBUG ((struct ipt_entry *)table_base)->comefrom = 0xeeeeeeec; #endif verdict = t->u.kernel.target->target(skb, - in, out, - hook, - t->u.kernel.target, - t->data); - + &tgpar); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec @@ -575,12 +573,17 @@ mark_source_chains(struct xt_table_info *newinfo, static int cleanup_match(struct ipt_entry_match *m, unsigned int *i) { + struct xt_mtdtor_param par; + if (i && (*i)-- == 0) return 1; - if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data); - module_put(m->u.kernel.match->me); + par.match = m->u.kernel.match; + par.matchinfo = m->data; + par.family = NFPROTO_IPV4; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); return 0; } @@ -606,34 +609,28 @@ check_entry(struct ipt_entry *e, const char *name) } static int -check_match(struct ipt_entry_match *m, const char *name, - const struct ipt_ip *ip, - unsigned int hookmask, unsigned int *i) +check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, + unsigned int *i) { - struct xt_match *match; + const struct ipt_ip *ip = par->entryinfo; int ret; - match = m->u.kernel.match; - ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); - if (!ret && m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, m->data, - hookmask)) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + + ret = xt_check_match(par, m->u.match_size - sizeof(*m), + ip->proto, ip->invflags & IPT_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; + par.match->name); + return ret; } - if (!ret) - (*i)++; - return ret; + ++*i; + return 0; } static int -find_check_match(struct ipt_entry_match *m, - const char *name, - const struct ipt_ip *ip, - unsigned int hookmask, +find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, unsigned int *i) { struct xt_match *match; @@ -648,7 +645,7 @@ find_check_match(struct ipt_entry_match *m, } m->u.kernel.match = match; - ret = check_match(m, name, ip, hookmask, i); + ret = check_match(m, par, i); if (ret) goto err; @@ -660,23 +657,25 @@ err: static int check_target(struct ipt_entry *e, const char *name) { - struct ipt_entry_target *t; - struct xt_target *target; + struct ipt_entry_target *t = ipt_get_target(e); + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_IPV4, + }; int ret; - t = ipt_get_target(e); - target = t->u.kernel.target; - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ip.proto, - e->ip.invflags & IPT_INV_PROTO); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ip.proto, e->ip.invflags & IPT_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); - ret = -EINVAL; + return ret; } - return ret; + return 0; } static int @@ -687,14 +686,18 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, struct xt_target *target; int ret; unsigned int j; + struct xt_mtchk_param mtpar; ret = check_entry(e, name); if (ret) return ret; j = 0; - ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j); if (ret != 0) goto cleanup_matches; @@ -769,6 +772,7 @@ check_entry_size_and_hooks(struct ipt_entry *e, static int cleanup_entry(struct ipt_entry *e, unsigned int *i) { + struct xt_tgdtor_param par; struct ipt_entry_target *t; if (i && (*i)-- == 0) @@ -777,9 +781,13 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) /* Cleanup all matches */ IPT_MATCH_ITERATE(e, cleanup_match, NULL); t = ipt_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_IPV4; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -1648,12 +1656,16 @@ static int compat_check_entry(struct ipt_entry *e, const char *name, unsigned int *i) { + struct xt_mtchk_param mtpar; unsigned int j; int ret; j = 0; - ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j); if (ret) goto cleanup_matches; @@ -2121,30 +2133,23 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, } static bool -icmp_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +icmp_match(const struct sk_buff *skb, const struct xt_match_param *par) { const struct icmphdr *ic; struct icmphdr _icmph; - const struct ipt_icmp *icmpinfo = matchinfo; + const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (ic == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -2155,15 +2160,9 @@ icmp_match(const struct sk_buff *skb, !!(icmpinfo->invflags&IPT_ICMP_INV)); } -/* Called when user tries to insert an entry of this type. */ -static bool -icmp_checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool icmp_checkentry(const struct xt_mtchk_param *par) { - const struct ipt_icmp *icmpinfo = matchinfo; + const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(icmpinfo->invflags & ~IPT_ICMP_INV); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index fafe8ebb4c55..7ac1677419a9 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -281,11 +281,9 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) ***********************************************************************/ static unsigned int -clusterip_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int32_t hash; @@ -349,13 +347,10 @@ clusterip_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -clusterip_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool clusterip_tg_check(const struct xt_tgchk_param *par) { - struct ipt_clusterip_tgt_info *cipinfo = targinfo; - const struct ipt_entry *e = e_void; + struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; struct clusterip_config *config; @@ -406,9 +401,9 @@ clusterip_tg_check(const char *tablename, const void *e_void, } cipinfo->config = config; - if (nf_ct_l3proto_try_module_get(target->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", target->family); + "proto=%u\n", par->target->family); return false; } @@ -416,9 +411,9 @@ clusterip_tg_check(const char *tablename, const void *e_void, } /* drop reference count of cluster config when rule is deleted */ -static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) +static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) { - const struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; /* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ @@ -426,7 +421,7 @@ static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) clusterip_config_put(cipinfo->config); - nf_ct_l3proto_module_put(target->family); + nf_ct_l3proto_module_put(par->target->family); } #ifdef CONFIG_COMPAT @@ -445,7 +440,7 @@ struct compat_ipt_clusterip_tgt_info static struct xt_target clusterip_tg_reg __read_mostly = { .name = "CLUSTERIP", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = clusterip_tg, .checkentry = clusterip_tg_check, .destroy = clusterip_tg_destroy, @@ -546,7 +541,7 @@ arp_mangle(unsigned int hook, static struct nf_hook_ops cip_arp_ops __read_mostly = { .hook = arp_mangle, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, .priority = -1 }; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index d60139c134ca..f7e2fa0974dc 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -77,11 +77,9 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) } static unsigned int -ecn_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ecn_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_ECN_info *einfo = targinfo; + const struct ipt_ECN_info *einfo = par->targinfo; if (einfo->operation & IPT_ECN_OP_SET_IP) if (!set_ect_ip(skb, einfo)) @@ -95,13 +93,10 @@ ecn_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -ecn_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool ecn_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_ECN_info *einfo = targinfo; - const struct ipt_entry *e = e_void; + const struct ipt_ECN_info *einfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (einfo->operation & IPT_ECN_OP_MASK) { printk(KERN_WARNING "ECN: unsupported ECN operation %x\n", @@ -124,7 +119,7 @@ ecn_tg_check(const char *tablename, const void *e_void, static struct xt_target ecn_tg_reg __read_mostly = { .name = "ECN", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ecn_tg, .targetsize = sizeof(struct ipt_ECN_info), .table = "mangle", diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 0af14137137b..fc6ce04a3e35 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -375,7 +375,7 @@ static struct nf_loginfo default_loginfo = { }; static void -ipt_log_packet(unsigned int pf, +ipt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -426,28 +426,23 @@ ipt_log_packet(unsigned int pf, } static unsigned int -log_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +log_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_log_info *loginfo = targinfo; + const struct ipt_log_info *loginfo = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ipt_log_packet(PF_INET, hooknum, skb, in, out, &li, + ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li, loginfo->prefix); return XT_CONTINUE; } -static bool -log_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool log_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_log_info *loginfo = targinfo; + const struct ipt_log_info *loginfo = par->targinfo; if (loginfo->level >= 8) { pr_debug("LOG: level %u >= 8\n", loginfo->level); @@ -463,7 +458,7 @@ log_tg_check(const char *tablename, const void *e, static struct xt_target log_tg_reg __read_mostly = { .name = "LOG", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = log_tg, .targetsize = sizeof(struct ipt_log_info), .checkentry = log_tg_check, @@ -483,7 +478,7 @@ static int __init log_tg_init(void) ret = xt_register_target(&log_tg_reg); if (ret < 0) return ret; - nf_log_register(PF_INET, &ipt_log_logger); + nf_log_register(NFPROTO_IPV4, &ipt_log_logger); return 0; } diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 0841aefaa503..f389f60cb105 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -31,12 +31,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); static DEFINE_RWLOCK(masq_lock); /* FIXME: Multiple targets. --RR */ -static bool -masquerade_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool masquerade_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { pr_debug("masquerade_check: bad MAP_IPS.\n"); @@ -50,9 +47,7 @@ masquerade_tg_check(const char *tablename, const void *e, } static unsigned int -masquerade_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; struct nf_conn_nat *nat; @@ -62,7 +57,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in, const struct rtable *rt; __be32 newsrc; - NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); nat = nfct_nat(ct); @@ -76,16 +71,16 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in, if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) return NF_ACCEPT; - mr = targinfo; + mr = par->targinfo; rt = skb->rtable; - newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); + newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); if (!newsrc) { - printk("MASQUERADE: %s ate my IP address\n", out->name); + printk("MASQUERADE: %s ate my IP address\n", par->out->name); return NF_DROP; } write_lock_bh(&masq_lock); - nat->masq_index = out->ifindex; + nat->masq_index = par->out->ifindex; write_unlock_bh(&masq_lock); /* Transfer from original range. */ @@ -119,9 +114,7 @@ static int masq_device_event(struct notifier_block *this, void *ptr) { const struct net_device *dev = ptr; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; + struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for @@ -129,7 +122,8 @@ static int masq_device_event(struct notifier_block *this, and forget them. */ NF_CT_ASSERT(dev->ifindex != 0); - nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); + nf_ct_iterate_cleanup(net, device_cmp, + (void *)(long)dev->ifindex); } return NOTIFY_DONE; @@ -153,7 +147,7 @@ static struct notifier_block masq_inet_notifier = { static struct xt_target masquerade_tg_reg __read_mostly = { .name = "MASQUERADE", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = masquerade_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 6739abfd1521..7c29582d4ec8 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -22,12 +22,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); -static bool -netmap_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool netmap_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { pr_debug("NETMAP:check: bad MAP_IPS.\n"); @@ -41,24 +38,23 @@ netmap_tg_check(const char *tablename, const void *e, } static unsigned int -netmap_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +netmap_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 new_ip, netmask; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING - || hooknum == NF_INET_POST_ROUTING - || hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_POST_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_OUT) + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) new_ip = ip_hdr(skb)->daddr & ~netmask; else new_ip = ip_hdr(skb)->saddr & ~netmask; @@ -70,12 +66,12 @@ netmap_tg(struct sk_buff *skb, const struct net_device *in, mr->range[0].min, mr->range[0].max }); /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); } static struct xt_target netmap_tg_reg __read_mostly = { .name = "NETMAP", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = netmap_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 5c6292449d13..698e5e78685b 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -26,12 +26,9 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); /* FIXME: Take multiple ranges --RR */ -static bool -redirect_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool redirect_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { pr_debug("redirect_check: bad MAP_IPS.\n"); @@ -45,24 +42,22 @@ redirect_tg_check(const char *tablename, const void *e, } static unsigned int -redirect_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 newdst; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING - || hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); /* Local packets: make them go to loopback */ - if (hooknum == NF_INET_LOCAL_OUT) + if (par->hooknum == NF_INET_LOCAL_OUT) newdst = htonl(0x7F000001); else { struct in_device *indev; @@ -92,7 +87,7 @@ redirect_tg(struct sk_buff *skb, const struct net_device *in, static struct xt_target redirect_tg_reg __read_mostly = { .name = "REDIRECT", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = redirect_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 2639872849da..0b4b6e0ff2b9 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -136,11 +136,9 @@ static inline void send_unreach(struct sk_buff *skb_in, int code) } static unsigned int -reject_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +reject_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_reject_info *reject = targinfo; + const struct ipt_reject_info *reject = par->targinfo; /* WARNING: This code causes reentry within iptables. This means that the iptables jump stack is now crap. We @@ -168,7 +166,7 @@ reject_tg(struct sk_buff *skb, const struct net_device *in, send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(skb, hooknum); + send_reset(skb, par->hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; @@ -177,13 +175,10 @@ reject_tg(struct sk_buff *skb, const struct net_device *in, return NF_DROP; } -static bool -reject_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool reject_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_reject_info *rejinfo = targinfo; - const struct ipt_entry *e = e_void; + const struct ipt_reject_info *rejinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { printk("ipt_REJECT: ECHOREPLY no longer supported.\n"); @@ -201,7 +196,7 @@ reject_tg_check(const char *tablename, const void *e_void, static struct xt_target reject_tg_reg __read_mostly = { .name = "REJECT", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = reject_tg, .targetsize = sizeof(struct ipt_reject_info), .table = "filter", diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 30eed65e7338..6d76aae90cc0 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -20,12 +20,10 @@ MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target"); MODULE_LICENSE("GPL"); static unsigned int -ttl_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct iphdr *iph; - const struct ipt_TTL_info *info = targinfo; + const struct ipt_TTL_info *info = par->targinfo; int new_ttl; if (!skb_make_writable(skb, skb->len)) @@ -61,12 +59,9 @@ ttl_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -ttl_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool ttl_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_TTL_info *info = targinfo; + const struct ipt_TTL_info *info = par->targinfo; if (info->mode > IPT_TTL_MAXMODE) { printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", @@ -80,7 +75,7 @@ ttl_tg_check(const char *tablename, const void *e, static struct xt_target ttl_tg_reg __read_mostly = { .name = "TTL", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ttl_tg, .targetsize = sizeof(struct ipt_TTL_info), .table = "mangle", diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b192756c6d0d..18a2826b57c6 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -281,18 +281,14 @@ alloc_failure: } static unsigned int -ulog_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; - - ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL); - + ipt_ulog_packet(par->hooknum, skb, par->in, par->out, + par->targinfo, NULL); return XT_CONTINUE; } -static void ipt_logfn(unsigned int pf, +static void ipt_logfn(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -317,12 +313,9 @@ static void ipt_logfn(unsigned int pf, ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } -static bool -ulog_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hookmask) +static bool ulog_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_ulog_info *loginfo = targinfo; + const struct ipt_ulog_info *loginfo = par->targinfo; if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { pr_debug("ipt_ULOG: prefix term %i\n", @@ -374,7 +367,7 @@ static int ulog_tg_compat_to_user(void __user *dst, void *src) static struct xt_target ulog_tg_reg __read_mostly = { .name = "ULOG", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ulog_tg, .targetsize = sizeof(struct ipt_ulog_info), .checkentry = ulog_tg_check, @@ -419,7 +412,7 @@ static int __init ulog_tg_init(void) return ret; } if (nflog) - nf_log_register(PF_INET, &ipt_ulog_logger); + nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); return 0; } diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 462a22c97877..88762f02779d 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -30,12 +30,9 @@ static inline bool match_type(const struct net_device *dev, __be32 addr, } static bool -addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_addrtype_info *info = matchinfo; + const struct ipt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; @@ -50,20 +47,17 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_addrtype_info_v1 *info = matchinfo; + const struct ipt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); const struct net_device *dev = NULL; bool ret = true; if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) - dev = in; + dev = par->in; else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) - dev = out; + dev = par->out; if (info->source) ret &= match_type(dev, iph->saddr, info->source) ^ @@ -74,12 +68,9 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in, return ret; } -static bool -addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) { - struct ipt_addrtype_info_v1 *info = matchinfo; + struct ipt_addrtype_info_v1 *info = par->matchinfo; if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { @@ -88,14 +79,16 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, return false; } - if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN) && + if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN)) && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { printk(KERN_ERR "ipt_addrtype: output interface limitation " "not valid in PRE_ROUTING and INPUT\n"); return false; } - if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT) && + if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT)) && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { printk(KERN_ERR "ipt_addrtype: input interface limitation " "not valid in POST_ROUTING and OUTPUT\n"); @@ -108,14 +101,14 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, static struct xt_match addrtype_mt_reg[] __read_mostly = { { .name = "addrtype", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = addrtype_mt_v0, .matchsize = sizeof(struct ipt_addrtype_info), .me = THIS_MODULE }, { .name = "addrtype", - .family = AF_INET, + .family = NFPROTO_IPV4, .revision = 1, .match = addrtype_mt_v1, .checkentry = addrtype_mt_checkentry_v1, diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index e977989629c7..0104c0b399de 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -36,27 +36,23 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) return r; } -static bool -ah_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; - const struct ipt_ah *ahinfo = matchinfo; + const struct ipt_ah *ahinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - ah = skb_header_pointer(skb, protoff, - sizeof(_ahdr), &_ahdr); + ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr); if (ah == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil AH tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return 0; } @@ -65,13 +61,9 @@ ah_mt(const struct sk_buff *skb, const struct net_device *in, !!(ahinfo->invflags & IPT_AH_INV_SPI)); } -/* Called when user tries to insert an entry of this type. */ -static bool -ah_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ah_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_ah *ahinfo = matchinfo; + const struct ipt_ah *ahinfo = par->matchinfo; /* Must specify no unknown invflags */ if (ahinfo->invflags & ~IPT_AH_INV_MASK) { @@ -83,7 +75,7 @@ ah_mt_check(const char *tablename, const void *ip_void, static struct xt_match ah_mt_reg __read_mostly = { .name = "ah", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ah_mt, .matchsize = sizeof(struct ipt_ah), .proto = IPPROTO_AH, diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 749de8284ce5..6289b64144c6 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -67,12 +67,9 @@ static inline bool match_tcp(const struct sk_buff *skb, return true; } -static bool -ecn_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_ecn_info *info = matchinfo; + const struct ipt_ecn_info *info = par->matchinfo; if (info->operation & IPT_ECN_OP_MATCH_IP) if (!match_ip(skb, info)) @@ -81,20 +78,17 @@ ecn_mt(const struct sk_buff *skb, const struct net_device *in, if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { if (ip_hdr(skb)->protocol != IPPROTO_TCP) return false; - if (!match_tcp(skb, info, hotdrop)) + if (!match_tcp(skb, info, par->hotdrop)) return false; } return true; } -static bool -ecn_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ecn_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_ecn_info *info = matchinfo; - const struct ipt_ip *ip = ip_void; + const struct ipt_ecn_info *info = par->matchinfo; + const struct ipt_ip *ip = par->entryinfo; if (info->operation & IPT_ECN_OP_MATCH_MASK) return false; @@ -114,7 +108,7 @@ ecn_mt_check(const char *tablename, const void *ip_void, static struct xt_match ecn_mt_reg __read_mostly = { .name = "ecn", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ecn_mt, .matchsize = sizeof(struct ipt_ecn_info), .checkentry = ecn_mt_check, diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c deleted file mode 100644 index 3974d7cae5c0..000000000000 --- a/net/ipv4/netfilter/ipt_recent.c +++ /dev/null @@ -1,501 +0,0 @@ -/* - * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This is a replacement of the old ipt_recent module, which carried the - * following copyright notice: - * - * Author: Stephen Frost <sfrost@snowman.net> - * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org - */ -#include <linux/init.h> -#include <linux/ip.h> -#include <linux/moduleparam.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <linux/list.h> -#include <linux/random.h> -#include <linux/jhash.h> -#include <linux/bitops.h> -#include <linux/skbuff.h> -#include <linux/inet.h> -#include <net/net_namespace.h> - -#include <linux/netfilter/x_tables.h> -#include <linux/netfilter_ipv4/ipt_recent.h> - -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4"); -MODULE_LICENSE("GPL"); - -static unsigned int ip_list_tot = 100; -static unsigned int ip_pkt_list_tot = 20; -static unsigned int ip_list_hash_size = 0; -static unsigned int ip_list_perms = 0644; -static unsigned int ip_list_uid = 0; -static unsigned int ip_list_gid = 0; -module_param(ip_list_tot, uint, 0400); -module_param(ip_pkt_list_tot, uint, 0400); -module_param(ip_list_hash_size, uint, 0400); -module_param(ip_list_perms, uint, 0400); -module_param(ip_list_uid, uint, 0400); -module_param(ip_list_gid, uint, 0400); -MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); -MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); -MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); -MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); -MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files"); -MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files"); - -struct recent_entry { - struct list_head list; - struct list_head lru_list; - __be32 addr; - u_int8_t ttl; - u_int8_t index; - u_int16_t nstamps; - unsigned long stamps[0]; -}; - -struct recent_table { - struct list_head list; - char name[IPT_RECENT_NAME_LEN]; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc; -#endif - unsigned int refcnt; - unsigned int entries; - struct list_head lru_list; - struct list_head iphash[0]; -}; - -static LIST_HEAD(tables); -static DEFINE_SPINLOCK(recent_lock); -static DEFINE_MUTEX(recent_mutex); - -#ifdef CONFIG_PROC_FS -static struct proc_dir_entry *proc_dir; -static const struct file_operations recent_fops; -#endif - -static u_int32_t hash_rnd; -static int hash_rnd_initted; - -static unsigned int recent_entry_hash(__be32 addr) -{ - if (!hash_rnd_initted) { - get_random_bytes(&hash_rnd, 4); - hash_rnd_initted = 1; - } - return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1); -} - -static struct recent_entry * -recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl) -{ - struct recent_entry *e; - unsigned int h; - - h = recent_entry_hash(addr); - list_for_each_entry(e, &table->iphash[h], list) - if (e->addr == addr && (ttl == e->ttl || !ttl || !e->ttl)) - return e; - return NULL; -} - -static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) -{ - list_del(&e->list); - list_del(&e->lru_list); - kfree(e); - t->entries--; -} - -static struct recent_entry * -recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl) -{ - struct recent_entry *e; - - if (t->entries >= ip_list_tot) { - e = list_entry(t->lru_list.next, struct recent_entry, lru_list); - recent_entry_remove(t, e); - } - e = kmalloc(sizeof(*e) + sizeof(e->stamps[0]) * ip_pkt_list_tot, - GFP_ATOMIC); - if (e == NULL) - return NULL; - e->addr = addr; - e->ttl = ttl; - e->stamps[0] = jiffies; - e->nstamps = 1; - e->index = 1; - list_add_tail(&e->list, &t->iphash[recent_entry_hash(addr)]); - list_add_tail(&e->lru_list, &t->lru_list); - t->entries++; - return e; -} - -static void recent_entry_update(struct recent_table *t, struct recent_entry *e) -{ - e->stamps[e->index++] = jiffies; - if (e->index > e->nstamps) - e->nstamps = e->index; - e->index %= ip_pkt_list_tot; - list_move_tail(&e->lru_list, &t->lru_list); -} - -static struct recent_table *recent_table_lookup(const char *name) -{ - struct recent_table *t; - - list_for_each_entry(t, &tables, list) - if (!strcmp(t->name, name)) - return t; - return NULL; -} - -static void recent_table_flush(struct recent_table *t) -{ - struct recent_entry *e, *next; - unsigned int i; - - for (i = 0; i < ip_list_hash_size; i++) - list_for_each_entry_safe(e, next, &t->iphash[i], list) - recent_entry_remove(t, e); -} - -static bool -recent_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) -{ - const struct ipt_recent_info *info = matchinfo; - struct recent_table *t; - struct recent_entry *e; - __be32 addr; - u_int8_t ttl; - bool ret = info->invert; - - if (info->side == IPT_RECENT_DEST) - addr = ip_hdr(skb)->daddr; - else - addr = ip_hdr(skb)->saddr; - - ttl = ip_hdr(skb)->ttl; - /* use TTL as seen before forwarding */ - if (out && !skb->sk) - ttl++; - - spin_lock_bh(&recent_lock); - t = recent_table_lookup(info->name); - e = recent_entry_lookup(t, addr, - info->check_set & IPT_RECENT_TTL ? ttl : 0); - if (e == NULL) { - if (!(info->check_set & IPT_RECENT_SET)) - goto out; - e = recent_entry_init(t, addr, ttl); - if (e == NULL) - *hotdrop = true; - ret = !ret; - goto out; - } - - if (info->check_set & IPT_RECENT_SET) - ret = !ret; - else if (info->check_set & IPT_RECENT_REMOVE) { - recent_entry_remove(t, e); - ret = !ret; - } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) { - unsigned long time = jiffies - info->seconds * HZ; - unsigned int i, hits = 0; - - for (i = 0; i < e->nstamps; i++) { - if (info->seconds && time_after(time, e->stamps[i])) - continue; - if (++hits >= info->hit_count) { - ret = !ret; - break; - } - } - } - - if (info->check_set & IPT_RECENT_SET || - (info->check_set & IPT_RECENT_UPDATE && ret)) { - recent_entry_update(t, e); - e->ttl = ttl; - } -out: - spin_unlock_bh(&recent_lock); - return ret; -} - -static bool -recent_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) -{ - const struct ipt_recent_info *info = matchinfo; - struct recent_table *t; - unsigned i; - bool ret = false; - - if (hweight8(info->check_set & - (IPT_RECENT_SET | IPT_RECENT_REMOVE | - IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) != 1) - return false; - if ((info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE)) && - (info->seconds || info->hit_count)) - return false; - if (info->hit_count > ip_pkt_list_tot) - return false; - if (info->name[0] == '\0' || - strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN) - return false; - - mutex_lock(&recent_mutex); - t = recent_table_lookup(info->name); - if (t != NULL) { - t->refcnt++; - ret = true; - goto out; - } - - t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size, - GFP_KERNEL); - if (t == NULL) - goto out; - t->refcnt = 1; - strcpy(t->name, info->name); - INIT_LIST_HEAD(&t->lru_list); - for (i = 0; i < ip_list_hash_size; i++) - INIT_LIST_HEAD(&t->iphash[i]); -#ifdef CONFIG_PROC_FS - t->proc = proc_create(t->name, ip_list_perms, proc_dir, &recent_fops); - if (t->proc == NULL) { - kfree(t); - goto out; - } - t->proc->uid = ip_list_uid; - t->proc->gid = ip_list_gid; - t->proc->data = t; -#endif - spin_lock_bh(&recent_lock); - list_add_tail(&t->list, &tables); - spin_unlock_bh(&recent_lock); - ret = true; -out: - mutex_unlock(&recent_mutex); - return ret; -} - -static void recent_mt_destroy(const struct xt_match *match, void *matchinfo) -{ - const struct ipt_recent_info *info = matchinfo; - struct recent_table *t; - - mutex_lock(&recent_mutex); - t = recent_table_lookup(info->name); - if (--t->refcnt == 0) { - spin_lock_bh(&recent_lock); - list_del(&t->list); - spin_unlock_bh(&recent_lock); -#ifdef CONFIG_PROC_FS - remove_proc_entry(t->name, proc_dir); -#endif - recent_table_flush(t); - kfree(t); - } - mutex_unlock(&recent_mutex); -} - -#ifdef CONFIG_PROC_FS -struct recent_iter_state { - struct recent_table *table; - unsigned int bucket; -}; - -static void *recent_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(recent_lock) -{ - struct recent_iter_state *st = seq->private; - const struct recent_table *t = st->table; - struct recent_entry *e; - loff_t p = *pos; - - spin_lock_bh(&recent_lock); - - for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++) - list_for_each_entry(e, &t->iphash[st->bucket], list) - if (p-- == 0) - return e; - return NULL; -} - -static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct recent_iter_state *st = seq->private; - const struct recent_table *t = st->table; - struct recent_entry *e = v; - struct list_head *head = e->list.next; - - while (head == &t->iphash[st->bucket]) { - if (++st->bucket >= ip_list_hash_size) - return NULL; - head = t->iphash[st->bucket].next; - } - (*pos)++; - return list_entry(head, struct recent_entry, list); -} - -static void recent_seq_stop(struct seq_file *s, void *v) - __releases(recent_lock) -{ - spin_unlock_bh(&recent_lock); -} - -static int recent_seq_show(struct seq_file *seq, void *v) -{ - const struct recent_entry *e = v; - unsigned int i; - - i = (e->index - 1) % ip_pkt_list_tot; - seq_printf(seq, "src=%u.%u.%u.%u ttl: %u last_seen: %lu oldest_pkt: %u", - NIPQUAD(e->addr), e->ttl, e->stamps[i], e->index); - for (i = 0; i < e->nstamps; i++) - seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); - seq_printf(seq, "\n"); - return 0; -} - -static const struct seq_operations recent_seq_ops = { - .start = recent_seq_start, - .next = recent_seq_next, - .stop = recent_seq_stop, - .show = recent_seq_show, -}; - -static int recent_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *pde = PDE(inode); - struct recent_iter_state *st; - - st = __seq_open_private(file, &recent_seq_ops, sizeof(*st)); - if (st == NULL) - return -ENOMEM; - - st->table = pde->data; - return 0; -} - -static ssize_t recent_proc_write(struct file *file, const char __user *input, - size_t size, loff_t *loff) -{ - const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); - struct recent_table *t = pde->data; - struct recent_entry *e; - char buf[sizeof("+255.255.255.255")], *c = buf; - __be32 addr; - int add; - - if (size > sizeof(buf)) - size = sizeof(buf); - if (copy_from_user(buf, input, size)) - return -EFAULT; - while (isspace(*c)) - c++; - - if (size - (c - buf) < 5) - return c - buf; - if (!strncmp(c, "clear", 5)) { - c += 5; - spin_lock_bh(&recent_lock); - recent_table_flush(t); - spin_unlock_bh(&recent_lock); - return c - buf; - } - - switch (*c) { - case '-': - add = 0; - c++; - break; - case '+': - c++; - default: - add = 1; - break; - } - addr = in_aton(c); - - spin_lock_bh(&recent_lock); - e = recent_entry_lookup(t, addr, 0); - if (e == NULL) { - if (add) - recent_entry_init(t, addr, 0); - } else { - if (add) - recent_entry_update(t, e); - else - recent_entry_remove(t, e); - } - spin_unlock_bh(&recent_lock); - return size; -} - -static const struct file_operations recent_fops = { - .open = recent_seq_open, - .read = seq_read, - .write = recent_proc_write, - .release = seq_release_private, - .owner = THIS_MODULE, -}; -#endif /* CONFIG_PROC_FS */ - -static struct xt_match recent_mt_reg __read_mostly = { - .name = "recent", - .family = AF_INET, - .match = recent_mt, - .matchsize = sizeof(struct ipt_recent_info), - .checkentry = recent_mt_check, - .destroy = recent_mt_destroy, - .me = THIS_MODULE, -}; - -static int __init recent_mt_init(void) -{ - int err; - - if (!ip_list_tot || !ip_pkt_list_tot || ip_pkt_list_tot > 255) - return -EINVAL; - ip_list_hash_size = 1 << fls(ip_list_tot); - - err = xt_register_match(&recent_mt_reg); -#ifdef CONFIG_PROC_FS - if (err) - return err; - proc_dir = proc_mkdir("ipt_recent", init_net.proc_net); - if (proc_dir == NULL) { - xt_unregister_match(&recent_mt_reg); - err = -ENOMEM; - } -#endif - return err; -} - -static void __exit recent_mt_exit(void) -{ - BUG_ON(!list_empty(&tables)); - xt_unregister_match(&recent_mt_reg); -#ifdef CONFIG_PROC_FS - remove_proc_entry("ipt_recent", init_net.proc_net); -#endif -} - -module_init(recent_mt_init); -module_exit(recent_mt_exit); diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index e0b8caeb710c..297f1cbf4ff5 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -18,12 +18,9 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: IPv4 TTL field match"); MODULE_LICENSE("GPL"); -static bool -ttl_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_ttl_info *info = matchinfo; + const struct ipt_ttl_info *info = par->matchinfo; const u8 ttl = ip_hdr(skb)->ttl; switch (info->mode) { @@ -46,7 +43,7 @@ ttl_mt(const struct sk_buff *skb, const struct net_device *in, static struct xt_match ttl_mt_reg __read_mostly = { .name = "ttl", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ttl_mt, .matchsize = sizeof(struct ipt_ttl_info), .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 1ea677dcf845..c9224310ebae 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -70,7 +70,7 @@ ipt_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv4.iptable_filter); + dev_net(in)->ipv4.iptable_filter); } static unsigned int @@ -81,7 +81,7 @@ ipt_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv4.iptable_filter); + dev_net(in)->ipv4.iptable_filter); } static unsigned int @@ -101,7 +101,7 @@ ipt_local_out_hook(unsigned int hook, } return ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_filter); + dev_net(out)->ipv4.iptable_filter); } static struct nf_hook_ops ipt_ops[] __read_mostly = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index da59182f2226..69f2c4287146 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -81,7 +81,7 @@ ipt_pre_routing_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_pre_routing_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -92,7 +92,7 @@ ipt_post_routing_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_post_routing_net(in, out)->ipv4.iptable_mangle); + dev_net(out)->ipv4.iptable_mangle); } static unsigned int @@ -103,7 +103,7 @@ ipt_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -114,7 +114,7 @@ ipt_forward_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -147,7 +147,7 @@ ipt_local_hook(unsigned int hook, tos = iph->tos; ret = ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_mangle); + dev_net(out)->ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index fddce7754b72..8faebfe638f1 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -53,7 +53,7 @@ ipt_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_pre_routing_net(in, out)->ipv4.iptable_raw); + dev_net(in)->ipv4.iptable_raw); } static unsigned int @@ -72,7 +72,7 @@ ipt_local_hook(unsigned int hook, return NF_ACCEPT; } return ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_raw); + dev_net(out)->ipv4.iptable_raw); } /* 'raw' is the very first table. */ diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index db6d312128e1..36f3be3cc428 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -73,7 +73,7 @@ ipt_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv4.iptable_security); + dev_net(in)->ipv4.iptable_security); } static unsigned int @@ -84,7 +84,7 @@ ipt_forward_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv4.iptable_security); + dev_net(in)->ipv4.iptable_security); } static unsigned int @@ -103,7 +103,7 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } return ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_security); + dev_net(out)->ipv4.iptable_security); } static struct nf_hook_ops ipt_ops[] __read_mostly = { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5a955c440364..4a7c35275396 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -1,3 +1,4 @@ + /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> * @@ -24,6 +25,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, struct nf_conn *ct, @@ -63,23 +65,6 @@ static int ipv4_print_tuple(struct seq_file *s, NIPQUAD(tuple->dst.u3.ip)); } -/* Returns new sk_buff, or NULL */ -static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) -{ - int err; - - skb_orphan(skb); - - local_bh_disable(); - err = ip_defrag(skb, user); - local_bh_enable(); - - if (!err) - ip_send_check(ip_hdr(skb)); - - return err; -} - static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -144,35 +129,13 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* Previously seen (loopback)? Ignore. Do this before - fragment check. */ - if (skb->nfct) - return NF_ACCEPT; - - /* Gather fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (nf_ct_ipv4_gather_frags(skb, - hooknum == NF_INET_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT)) - return NF_STOLEN; - } - return NF_ACCEPT; -} - static unsigned int ipv4_conntrack_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); } static unsigned int ipv4_conntrack_local(unsigned int hooknum, @@ -188,20 +151,13 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, printk("ipt_hook: happy cracking.\n"); return NF_ACCEPT; } - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, - { .hook = ipv4_conntrack_in, .owner = THIS_MODULE, .pf = PF_INET, @@ -209,13 +165,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .priority = NF_IP_PRI_CONNTRACK, }, { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, - { .hook = ipv4_conntrack_local, .owner = THIS_MODULE, .pf = PF_INET, @@ -254,7 +203,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, .procname = "ip_conntrack_count", - .data = &nf_conntrack_count, + .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = &proc_dointvec, @@ -270,7 +219,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, .procname = "ip_conntrack_checksum", - .data = &nf_conntrack_checksum, + .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -278,7 +227,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, .procname = "ip_conntrack_log_invalid", - .data = &nf_ct_log_invalid, + .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -323,7 +272,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(sock_net(sk), &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -422,6 +371,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) int ret = 0; need_conntrack(); + nf_defrag_ipv4_enable(); ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 3a020720e40b..313ebf00ee36 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -21,18 +21,20 @@ #include <net/netfilter/nf_conntrack_acct.h> struct ct_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + n = rcu_dereference(net->ct.hash[st->bucket].first); if (n) return n; } @@ -42,13 +44,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq) static struct hlist_node *ct_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = rcu_dereference(nf_conntrack_hash[st->bucket].first); + head = rcu_dereference(net->ct.hash[st->bucket].first); } return head; } @@ -158,8 +161,8 @@ static const struct seq_operations ct_seq_ops = { static int ct_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ct_seq_ops, - sizeof(struct ct_iter_state)); + return seq_open_net(inode, file, &ct_seq_ops, + sizeof(struct ct_iter_state)); } static const struct file_operations ct_file_ops = { @@ -167,21 +170,23 @@ static const struct file_operations ct_file_ops = { .open = ct_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; /* expects */ struct ct_expect_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + n = rcu_dereference(net->ct.expect_hash[st->bucket].first); if (n) return n; } @@ -191,13 +196,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + head = rcu_dereference(net->ct.expect_hash[st->bucket].first); } return head; } @@ -265,8 +271,8 @@ static const struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &exp_seq_ops, - sizeof(struct ct_expect_iter_state)); + return seq_open_net(inode, file, &exp_seq_ops, + sizeof(struct ct_expect_iter_state)); } static const struct file_operations ip_exp_file_ops = { @@ -274,11 +280,12 @@ static const struct file_operations ip_exp_file_ops = { .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; if (*pos == 0) @@ -288,7 +295,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -296,13 +303,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; for (cpu = *pos; cpu < NR_CPUS; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -314,7 +322,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { - unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + struct net *net = seq_file_net(seq); + unsigned int nr_conntracks = atomic_read(&net->ct.count); const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { @@ -354,7 +363,8 @@ static const struct seq_operations ct_cpu_seq_ops = { static int ct_cpu_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &ct_cpu_seq_ops); + return seq_open_net(inode, file, &ct_cpu_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ct_cpu_seq_fops = { @@ -362,39 +372,54 @@ static const struct file_operations ct_cpu_seq_fops = { .open = ct_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; -int __init nf_conntrack_ipv4_compat_init(void) +static int __net_init ip_conntrack_net_init(struct net *net) { struct proc_dir_entry *proc, *proc_exp, *proc_stat; - proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops); if (!proc) goto err1; - proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440, &ip_exp_file_ops); if (!proc_exp) goto err2; proc_stat = proc_create("ip_conntrack", S_IRUGO, - init_net.proc_net_stat, &ct_cpu_seq_fops); + net->proc_net_stat, &ct_cpu_seq_fops); if (!proc_stat) goto err3; return 0; err3: - proc_net_remove(&init_net, "ip_conntrack_expect"); + proc_net_remove(net, "ip_conntrack_expect"); err2: - proc_net_remove(&init_net, "ip_conntrack"); + proc_net_remove(net, "ip_conntrack"); err1: return -ENOMEM; } +static void __net_exit ip_conntrack_net_exit(struct net *net) +{ + remove_proc_entry("ip_conntrack", net->proc_net_stat); + proc_net_remove(net, "ip_conntrack_expect"); + proc_net_remove(net, "ip_conntrack"); +} + +static struct pernet_operations ip_conntrack_net_ops = { + .init = ip_conntrack_net_init, + .exit = ip_conntrack_net_exit, +}; + +int __init nf_conntrack_ipv4_compat_init(void) +{ + return register_pernet_subsys(&ip_conntrack_net_ops); +} + void __exit nf_conntrack_ipv4_compat_fini(void) { - remove_proc_entry("ip_conntrack", init_net.proc_net_stat); - proc_net_remove(&init_net, "ip_conntrack_expect"); - proc_net_remove(&init_net, "ip_conntrack"); + unregister_pernet_subsys(&ip_conntrack_net_ops); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 97791048fa9b..4e8879220222 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -79,7 +79,7 @@ static int icmp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* Try to delete connection immediately after all replies: @@ -91,7 +91,7 @@ static int icmp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); } @@ -123,7 +123,7 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int -icmp_error_message(struct sk_buff *skb, +icmp_error_message(struct net *net, struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { @@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&innertuple); + h = nf_conntrack_find_get(net, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; @@ -172,8 +172,8 @@ icmp_error_message(struct sk_buff *skb, /* Small and modified version of icmp_rcv */ static int -icmp_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) +icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; struct icmphdr _ih; @@ -181,16 +181,16 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, /* Not enough header? */ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; } /* See ip_conntrack_proto_tcp.c */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; @@ -203,7 +203,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, * discarded. */ if (icmph->type > NR_ICMP_TYPES) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; @@ -217,7 +217,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, && icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(skb, ctinfo, hooknum); + return icmp_error_message(net, skb, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c new file mode 100644 index 000000000000..aa2c50a180f7 --- /dev/null +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -0,0 +1,96 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/route.h> +#include <net/ip.h> + +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> + +/* Returns new sk_buff, or NULL */ +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +{ + int err; + + skb_orphan(skb); + + local_bh_disable(); + err = ip_defrag(skb, user); + local_bh_enable(); + + if (!err) + ip_send_check(ip_hdr(skb)); + + return err; +} + +static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if (skb->nfct) + return NF_ACCEPT; +#endif + + /* Gather fragments. */ + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (nf_ct_ipv4_gather_frags(skb, + hooknum == NF_INET_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT)) + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static struct nf_hook_ops ipv4_defrag_ops[] = { + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, +}; + +static int __init nf_defrag_init(void) +{ + return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +static void __exit nf_defrag_fini(void) +{ + nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +void nf_defrag_ipv4_enable(void) +{ +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); + +module_init(nf_defrag_init); +module_exit(nf_defrag_fini); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 6c6a3cba8d50..2ac9eaf1a8c9 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -37,9 +37,6 @@ static struct nf_conntrack_l3proto *l3proto __read_mostly; /* Calculated at init based on memory size */ static unsigned int nf_nat_htable_size __read_mostly; -static int nf_nat_vmalloced; - -static struct hlist_head *bysource __read_mostly; #define MAX_IP_NAT_PROTO 256 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] @@ -145,7 +142,8 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(const struct nf_conntrack_tuple *tuple, +find_appropriate_src(struct net *net, + const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { @@ -155,7 +153,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, const struct hlist_node *n; rcu_read_lock(); - hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ @@ -231,6 +229,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); const struct nf_nat_protocol *proto; /* 1) If this srcip/proto/src-proto-part is currently mapped, @@ -242,7 +241,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC && !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (find_appropriate_src(orig_tuple, tuple, range)) { + if (find_appropriate_src(net, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) return; @@ -283,6 +282,7 @@ nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); @@ -334,7 +334,8 @@ nf_nat_setup_info(struct nf_conn *ct, /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); nat->ct = ct; - hlist_add_head_rcu(&nat->bysource, &bysource[srchash]); + hlist_add_head_rcu(&nat->bysource, + &net->ipv4.nat_bysource[srchash]); spin_unlock_bh(&nf_nat_lock); } @@ -583,6 +584,40 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .flags = NF_CT_EXT_F_PREALLOC, }; +static int __net_init nf_nat_net_init(struct net *net) +{ + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, + &net->ipv4.nat_vmalloced); + if (!net->ipv4.nat_bysource) + return -ENOMEM; + return 0; +} + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int clean_nat(struct nf_conn *i, void *data) +{ + struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; + memset(nat, 0, sizeof(*nat)); + i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); + return 0; +} + +static void __net_exit nf_nat_net_exit(struct net *net) +{ + nf_ct_iterate_cleanup(net, &clean_nat, NULL); + synchronize_rcu(); + nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, + nf_nat_htable_size); +} + +static struct pernet_operations nf_nat_net_ops = { + .init = nf_nat_net_init, + .exit = nf_nat_net_exit, +}; + static int __init nf_nat_init(void) { size_t i; @@ -599,12 +634,9 @@ static int __init nf_nat_init(void) /* Leave them the same for the moment. */ nf_nat_htable_size = nf_conntrack_htable_size; - bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &nf_nat_vmalloced); - if (!bysource) { - ret = -ENOMEM; + ret = register_pernet_subsys(&nf_nat_net_ops); + if (ret < 0) goto cleanup_extend; - } /* Sew in builtin protocols. */ spin_lock_bh(&nf_nat_lock); @@ -629,23 +661,9 @@ static int __init nf_nat_init(void) return ret; } -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int clean_nat(struct nf_conn *i, void *data) -{ - struct nf_conn_nat *nat = nfct_nat(i); - - if (!nat) - return 0; - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); - return 0; -} - static void __exit nf_nat_cleanup(void) { - nf_ct_iterate_cleanup(&clean_nat, NULL); - synchronize_rcu(); - nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); + unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 112dcfa12900..cf7a42bf9820 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -193,7 +193,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, CTINFO2DIR(ctinfo)); - nf_conntrack_event_cache(IPCT_NATSEQADJ, skb); + nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } return 1; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index da3d91a5ef5c..9eb171056c63 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -40,6 +40,7 @@ MODULE_ALIAS("ip_nat_pptp"); static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { + struct net *net = nf_ct_net(ct); const struct nf_conn *master = ct->master; struct nf_conntrack_expect *other_exp; struct nf_conntrack_tuple t; @@ -73,7 +74,7 @@ static void pptp_nat_expected(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); - other_exp = nf_ct_expect_find_get(&t); + other_exp = nf_ct_expect_find_get(net, &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index e8b4d0d4439e..bea54a685109 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -33,7 +33,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} nat_initial_table __initdata = { +} nat_initial_table __net_initdata = { .repl = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, @@ -58,47 +58,42 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table __nat_table = { +static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; -static struct xt_table *nat_table; /* Source NAT */ -static unsigned int ipt_snat_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; - NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); - NF_CT_ASSERT(out); + NF_CT_ASSERT(par->out != NULL); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); } /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ -static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) +static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip) { static int warned = 0; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; struct rtable *rt; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + if (ip_route_output_key(net, &rt, &fl) != 0) return; if (rt->rt_src != srcip && !warned) { @@ -110,40 +105,32 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) ip_rt_put(rt); } -static unsigned int ipt_dnat_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING || - hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - if (hooknum == NF_INET_LOCAL_OUT && + if (par->hooknum == NF_INET_LOCAL_OUT && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) - warn_if_extra_mangle(ip_hdr(skb)->daddr, + warn_if_extra_mangle(dev_net(par->out), ip_hdr(skb)->daddr, mr->range[0].min_ip); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); } -static bool ipt_snat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_snat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -153,13 +140,9 @@ static bool ipt_snat_checkentry(const char *tablename, return true; } -static bool ipt_dnat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -194,9 +177,10 @@ int nf_nat_rule_find(struct sk_buff *skb, const struct net_device *out, struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); int ret; - ret = ipt_do_table(skb, hooknum, in, out, nat_table); + ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) @@ -226,14 +210,32 @@ static struct xt_target ipt_dnat_reg __read_mostly = { .family = AF_INET, }; +static int __net_init nf_nat_rule_net_init(struct net *net) +{ + net->ipv4.nat_table = ipt_register_table(net, &nat_table, + &nat_initial_table.repl); + if (IS_ERR(net->ipv4.nat_table)) + return PTR_ERR(net->ipv4.nat_table); + return 0; +} + +static void __net_exit nf_nat_rule_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.nat_table); +} + +static struct pernet_operations nf_nat_rule_net_ops = { + .init = nf_nat_rule_net_init, + .exit = nf_nat_rule_net_exit, +}; + int __init nf_nat_rule_init(void) { int ret; - nat_table = ipt_register_table(&init_net, &__nat_table, - &nat_initial_table.repl); - if (IS_ERR(nat_table)) - return PTR_ERR(nat_table); + ret = register_pernet_subsys(&nf_nat_rule_net_ops); + if (ret != 0) + goto out; ret = xt_register_target(&ipt_snat_reg); if (ret != 0) goto unregister_table; @@ -247,8 +249,8 @@ int __init nf_nat_rule_init(void) unregister_snat: xt_unregister_target(&ipt_snat_reg); unregister_table: - ipt_unregister_table(nat_table); - + unregister_pernet_subsys(&nf_nat_rule_net_ops); + out: return ret; } @@ -256,5 +258,5 @@ void nf_nat_rule_cleanup(void) { xt_unregister_target(&ipt_dnat_reg); xt_unregister_target(&ipt_snat_reg); - ipt_unregister_table(nat_table); + unregister_pernet_subsys(&nf_nat_rule_net_ops); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e0689fd7b798..276d047fb85a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -26,16 +26,13 @@ static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; -extern seqlock_t sysctl_port_range_lock; -extern int sysctl_local_port_range[2]; - /* Update system visible IP port range */ static void set_local_port_range(int range[2]) { - write_seqlock(&sysctl_port_range_lock); - sysctl_local_port_range[0] = range[0]; - sysctl_local_port_range[1] = range[1]; - write_sequnlock(&sysctl_port_range_lock); + write_seqlock(&sysctl_local_ports.lock); + sysctl_local_ports.range[0] = range[0]; + sysctl_local_ports.range[1] = range[1]; + write_sequnlock(&sysctl_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -44,8 +41,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, size_t *lenp, loff_t *ppos) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -54,6 +50,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, .extra2 = &ip_local_port_range_max, }; + inet_get_local_port_range(range, range + 1); ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); if (write && ret == 0) { @@ -73,8 +70,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, void __user *newval, size_t newlen) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -83,6 +79,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, .extra2 = &ip_local_port_range_max, }; + inet_get_local_port_range(range, range + 1); ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) { if (range[1] < range[0]) @@ -396,8 +393,8 @@ static struct ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, .procname = "ip_local_port_range", - .data = &sysctl_local_port_range, - .maxlen = sizeof(sysctl_local_port_range), + .data = &sysctl_local_ports.range, + .maxlen = sizeof(sysctl_local_ports.range), .mode = 0644, .proc_handler = &ipv4_local_port_range, .strategy = &ipv4_sysctl_local_port_range, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7d81a1ee5507..eccb7165a80c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -497,10 +497,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, struct sk_buff *skb) { - if (flags & MSG_OOB) { - tp->urg_mode = 1; + if (flags & MSG_OOB) tp->snd_up = tp->write_seq; - } } static inline void tcp_push(struct sock *sk, int flags, int mss_now, @@ -1161,7 +1159,7 @@ static void tcp_prequeue_process(struct sock *sk) * necessary */ local_bh_disable(); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); local_bh_enable(); /* Clear memory counter. */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3b76bce769dd..63da39372d40 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2836,7 +2836,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) +static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, + u32 prior_snd_una) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2903,9 +2904,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; - if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up))) - tp->urg_mode = 0; - tp->packets_out -= acked_pcount; pkts_acked += acked_pcount; @@ -2935,6 +2933,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) tp->lost_skb_hint = NULL; } + if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una))) + tp->snd_up = tp->snd_una; + if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; @@ -3311,7 +3312,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, prior_fackets); + flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); @@ -4460,8 +4461,8 @@ static void tcp_new_space(struct sock *sk) if (tcp_should_expand_sndbuf(sk)) { int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + - MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), - demanded = max_t(unsigned int, tp->snd_cwnd, + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + int demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering + 1); sndmem *= 2 * demanded; if (sndmem > sk->sk_sndbuf) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8b24bd833cb4..ba46769c6e97 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -589,7 +589,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) #endif arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ - sizeof(struct tcphdr), IPPROTO_TCP, 0); + arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; @@ -1577,8 +1577,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = iph->tos; TCP_SKB_CB(skb)->sacked = 0; - sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr, - th->source, iph->daddr, th->dest, inet_iif(skb)); + sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); if (!sk) goto no_tcp_socket; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f976fc57892c..779f2e9d0689 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -395,6 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->pred_flags = 0; newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1; + newtp->snd_up = treq->snt_isn + 1; tcp_prequeue_init(newtp); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 493553c71d32..990a58493235 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -345,6 +345,11 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) TCP_SKB_CB(skb)->end_seq = seq; } +static inline int tcp_urg_mode(const struct tcp_sock *tp) +{ + return tp->snd_una != tp->snd_up; +} + #define OPTION_SACK_ADVERTISE (1 << 0) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) @@ -646,7 +651,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->check = 0; th->urg_ptr = 0; - if (unlikely(tp->urg_mode && + /* The urg_mode check is necessary during a below snd_una win probe */ + if (unlikely(tcp_urg_mode(tp) && between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { th->urg_ptr = htons(tp->snd_up - tcb->seq); th->urg = 1; @@ -1012,7 +1018,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. * - * LARGESEND note: !urg_mode is overkill, only frames up to snd_up + * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up * cannot be large. However, taking into account rare use of URG, this * is not a big flaw. */ @@ -1029,7 +1035,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tp->mss_cache; - if (large_allowed && sk_can_gso(sk) && !tp->urg_mode) + if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp)) doing_tso = 1; if (dst) { @@ -1193,7 +1199,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, /* Don't use the nagle rule for urgent data (or for the final FIN). * Nagle can be ignored during F-RTO too (see RFC4138). */ - if (tp->urg_mode || (tp->frto_counter == 2) || + if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) return 1; @@ -2358,6 +2364,7 @@ static void tcp_connect_init(struct sock *sk) tcp_init_wl(tp, tp->write_seq, 0); tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; + tp->snd_up = tp->write_seq; tp->rcv_nxt = 0; tp->rcv_wup = 0; tp->copied_seq = 0; @@ -2567,8 +2574,7 @@ int tcp_write_wakeup(struct sock *sk) tcp_event_new_data_sent(sk, skb); return err; } else { - if (tp->urg_mode && - between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) + if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) tcp_xmit_probe_skb(sk, 1); return tcp_xmit_probe_skb(sk, 0); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 5ab6ba19c3ce..6b6dff1164b9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -201,7 +201,7 @@ static void tcp_delack_timer(unsigned long data) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); tp->ucopy.memory = 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c83d0ef469c9..67d8430b4a2a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -108,9 +108,6 @@ * Snmp MIB for the UDP layer */ -DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; -EXPORT_SYMBOL(udp_stats_in6); - struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); @@ -158,55 +155,23 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, write_lock_bh(&udp_hash_lock); if (!snum) { - int i, low, high, remaining; - unsigned rover, best, best_size_so_far; + int low, high, remaining; + unsigned rand; + unsigned short first; inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; - best_size_so_far = UINT_MAX; - best = rover = net_random() % remaining + low; - - /* 1st pass: look for empty (or shortest) hash chain */ - for (i = 0; i < UDP_HTABLE_SIZE; i++) { - int size = 0; - - head = &udptable[udp_hashfn(net, rover)]; - if (hlist_empty(head)) - goto gotit; - - sk_for_each(sk2, node, head) { - if (++size >= best_size_so_far) - goto next; - } - best_size_so_far = size; - best = rover; - next: - /* fold back if end of range */ - if (++rover > high) - rover = low + ((rover - low) - & (UDP_HTABLE_SIZE - 1)); - - - } - - /* 2nd pass: find hole in shortest hash chain */ - rover = best; - for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { - if (! __udp_lib_lport_inuse(net, rover, udptable)) - goto gotit; - rover += UDP_HTABLE_SIZE; - if (rover > high) - rover = low + ((rover - low) - & (UDP_HTABLE_SIZE - 1)); + rand = net_random(); + snum = first = rand % remaining + low; + rand |= 1; + while (__udp_lib_lport_inuse(net, snum, udptable)) { + do { + snum = snum + rand; + } while (snum < low || snum > high); + if (snum == first) + goto fail; } - - - /* All ports in use! */ - goto fail; - -gotit: - snum = rover; } else { head = &udptable[udp_hashfn(net, snum)]; @@ -302,6 +267,21 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, return result; } +static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport, + struct hlist_head udptable[]) +{ + struct sock *sk; + const struct iphdr *iph = ip_hdr(skb); + + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else + return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); +} + struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { @@ -1208,8 +1188,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr, udptable); - sk = __udp4_lib_lookup(net, saddr, uh->source, daddr, - uh->dest, inet_iif(skb), udptable); + sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { int ret = udp_queue_rcv_skb(sk, skb); |