From cca5cf91c789f3301cc2541a79c323c53be5a8e1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 15 Jul 2010 11:27:41 +0200 Subject: nfnetlink_log: do not expose NFULNL_COPY_DISABLED to user-space This patch moves NFULNL_COPY_PACKET definition from linux/netfilter/nfnetlink_log.h to net/netfilter/nfnetlink_log.h since this copy mode is only for internal use. I have also changed the value from 0x03 to 0xff. Thus, we avoid a gap from user-space that may confuse users if we add new copy modes in the future. This change was introduced in: http://www.spinics.net/lists/netfilter-devel/msg13535.html Since this change is not included in any stable Linux kernel, I think it's safe to make this change now. Anyway, this copy mode does not make any sense from user-space, so this patch should not break any existing setup. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink_log.h | 2 +- include/net/netfilter/nfnetlink_log.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index 1d0b84aa1d42..ea9b8d380527 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -89,7 +89,7 @@ enum nfulnl_attr_config { #define NFULNL_COPY_NONE 0x00 #define NFULNL_COPY_META 0x01 #define NFULNL_COPY_PACKET 0x02 -#define NFULNL_COPY_DISABLED 0x03 +/* 0xff is reserved, don't use it for new copy modes. */ #define NFULNL_CFG_F_SEQ 0x0001 #define NFULNL_CFG_F_SEQ_GLOBAL 0x0002 diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h index b0569ff0775e..e2dec42c2db2 100644 --- a/include/net/netfilter/nfnetlink_log.h +++ b/include/net/netfilter/nfnetlink_log.h @@ -10,5 +10,7 @@ nfulnl_log_packet(u_int8_t pf, const struct nf_loginfo *li_user, const char *prefix); +#define NFULNL_COPY_DISABLED 0xff + #endif /* _KER_NFNETLINK_LOG_H */ -- cgit v1.2.3 From edf0e1fb0d0910880881523cfaaabcec06a2c0d5 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 15 Jul 2010 17:20:46 +0200 Subject: netfilter: add CHECKSUM target This adds a `CHECKSUM' target, which can be used in the iptables mangle table. You can use this target to compute and fill in the checksum in a packet that lacks a checksum. This is particularly useful, if you need to work around old applications such as dhcp clients, that do not work well with checksum offloads, but don't want to disable checksum offload in your device. The problem happens in the field with virtualized applications. For reference, see Red Hat bz 605555, as well as http://www.spinics.net/lists/kvm/msg37660.html Typical expected use (helps old dhclient binary running in a VM): iptables -A POSTROUTING -t mangle -p udp --dport bootpc \ -j CHECKSUM --checksum-fill Includes fixes by Jan Engelhardt Signed-off-by: Michael S. Tsirkin Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_CHECKSUM.h | 18 +++++++++ net/netfilter/Kconfig | 16 ++++++++ net/netfilter/Makefile | 1 + net/netfilter/xt_CHECKSUM.c | 70 +++++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+) create mode 100644 include/linux/netfilter/xt_CHECKSUM.h create mode 100644 net/netfilter/xt_CHECKSUM.c (limited to 'include') diff --git a/include/linux/netfilter/xt_CHECKSUM.h b/include/linux/netfilter/xt_CHECKSUM.h new file mode 100644 index 000000000000..3b4fb77acef6 --- /dev/null +++ b/include/linux/netfilter/xt_CHECKSUM.h @@ -0,0 +1,18 @@ +/* Header file for iptables ipt_CHECKSUM target + * + * (C) 2002 by Harald Welte + * (C) 2010 Red Hat Inc + * Author: Michael S. Tsirkin + * + * This software is distributed under GNU GPL v2, 1991 +*/ +#ifndef _IPT_CHECKSUM_TARGET_H +#define _IPT_CHECKSUM_TARGET_H + +#define XT_CHECKSUM_OP_FILL 0x01 /* fill in checksum in IP header */ + +struct xt_CHECKSUM_info { + __u8 operation; /* bitset of operations */ +}; + +#endif /* _IPT_CHECKSUM_TARGET_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index aa2f106347e4..5fb8efa84df3 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -326,6 +326,22 @@ config NETFILTER_XT_CONNMARK comment "Xtables targets" +config NETFILTER_XT_TARGET_CHECKSUM + tristate "CHECKSUM target support" + depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on NETFILTER_ADVANCED + ---help--- + This option adds a `CHECKSUM' target, which can be used in the iptables mangle + table. + + You can use this target to compute and fill in the checksum in + a packet that lacks a checksum. This is particularly useful, + if you need to work around old applications such as dhcp clients, + that do not work well with checksum offloads, but don't want to disable + checksum offload in your device. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_CLASSIFY tristate '"CLASSIFY" target support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index e28420aac5ef..36ef8e63be1e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o # targets +obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c new file mode 100644 index 000000000000..0f642ef8cd26 --- /dev/null +++ b/net/netfilter/xt_CHECKSUM.c @@ -0,0 +1,70 @@ +/* iptables module for the packet checksum mangling + * + * (C) 2002 by Harald Welte + * (C) 2010 Red Hat, Inc. + * + * Author: Michael S. Tsirkin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael S. Tsirkin "); +MODULE_DESCRIPTION("Xtables: checksum modification"); +MODULE_ALIAS("ipt_CHECKSUM"); +MODULE_ALIAS("ip6t_CHECKSUM"); + +static unsigned int +checksum_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb_checksum_help(skb); + + return XT_CONTINUE; +} + +static int checksum_tg_check(const struct xt_tgchk_param *par) +{ + const struct xt_CHECKSUM_info *einfo = par->targinfo; + + if (einfo->operation & ~XT_CHECKSUM_OP_FILL) { + pr_info("unsupported CHECKSUM operation %x\n", einfo->operation); + return -EINVAL; + } + if (!einfo->operation) { + pr_info("no CHECKSUM operation enabled\n"); + return -EINVAL; + } + return 0; +} + +static struct xt_target checksum_tg_reg __read_mostly = { + .name = "CHECKSUM", + .family = NFPROTO_UNSPEC, + .target = checksum_tg, + .targetsize = sizeof(struct xt_CHECKSUM_info), + .table = "mangle", + .checkentry = checksum_tg_check, + .me = THIS_MODULE, +}; + +static int __init checksum_tg_init(void) +{ + return xt_register_target(&checksum_tg_reg); +} + +static void __exit checksum_tg_exit(void) +{ + xt_unregister_target(&checksum_tg_reg); +} + +module_init(checksum_tg_init); +module_exit(checksum_tg_exit); -- cgit v1.2.3 From 22cb516696304a9b85892b18c483a27d97cfa51b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 16 Jul 2010 14:08:20 +0200 Subject: netfilter: correct CHECKSUM header and export it Signed-off-by: Michael S. Tsirkin Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_CHECKSUM.h | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index bb103f43afa0..b93b64dc9fae 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -3,6 +3,7 @@ header-y += nf_conntrack_tuple_common.h header-y += nfnetlink_conntrack.h header-y += nfnetlink_log.h header-y += nfnetlink_queue.h +header-y += xt_CHECKSUM.h header-y += xt_CLASSIFY.h header-y += xt_CONNMARK.h header-y += xt_CONNSECMARK.h diff --git a/include/linux/netfilter/xt_CHECKSUM.h b/include/linux/netfilter/xt_CHECKSUM.h index 3b4fb77acef6..9a2e4661654e 100644 --- a/include/linux/netfilter/xt_CHECKSUM.h +++ b/include/linux/netfilter/xt_CHECKSUM.h @@ -6,8 +6,10 @@ * * This software is distributed under GNU GPL v2, 1991 */ -#ifndef _IPT_CHECKSUM_TARGET_H -#define _IPT_CHECKSUM_TARGET_H +#ifndef _XT_CHECKSUM_TARGET_H +#define _XT_CHECKSUM_TARGET_H + +#include #define XT_CHECKSUM_OP_FILL 0x01 /* fill in checksum in IP header */ @@ -15,4 +17,4 @@ struct xt_CHECKSUM_info { __u8 operation; /* bitset of operations */ }; -#endif /* _IPT_CHECKSUM_TARGET_H */ +#endif /* _XT_CHECKSUM_TARGET_H */ -- cgit v1.2.3 From 9c3e1c39679144c250dda95098333ecb5f1f407a Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Fri, 23 Jul 2010 12:42:58 +0200 Subject: netfilter: xt_ipvs (netfilter matcher for IPVS) This implements the kernel-space side of the netfilter matcher xt_ipvs. [ minor fixes by Simon Horman ] Signed-off-by: Hannes Eder Signed-off-by: Simon Horman [ Patrick: added xt_ipvs.h to Kbuild ] Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_ipvs.h | 27 ++++++ net/netfilter/Kconfig | 10 ++ net/netfilter/Makefile | 1 + net/netfilter/ipvs/ip_vs_proto.c | 1 + net/netfilter/xt_ipvs.c | 189 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 229 insertions(+) create mode 100644 include/linux/netfilter/xt_ipvs.h create mode 100644 net/netfilter/xt_ipvs.c (limited to 'include') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index b93b64dc9fae..0cb62c857187 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -31,6 +31,7 @@ header-y += xt_dscp.h header-y += xt_esp.h header-y += xt_hashlimit.h header-y += xt_iprange.h +header-y += xt_ipvs.h header-y += xt_helper.h header-y += xt_length.h header-y += xt_limit.h diff --git a/include/linux/netfilter/xt_ipvs.h b/include/linux/netfilter/xt_ipvs.h new file mode 100644 index 000000000000..1167aeb7a347 --- /dev/null +++ b/include/linux/netfilter/xt_ipvs.h @@ -0,0 +1,27 @@ +#ifndef _XT_IPVS_H +#define _XT_IPVS_H + +enum { + XT_IPVS_IPVS_PROPERTY = 1 << 0, /* all other options imply this one */ + XT_IPVS_PROTO = 1 << 1, + XT_IPVS_VADDR = 1 << 2, + XT_IPVS_VPORT = 1 << 3, + XT_IPVS_DIR = 1 << 4, + XT_IPVS_METHOD = 1 << 5, + XT_IPVS_VPORTCTL = 1 << 6, + XT_IPVS_MASK = (1 << 7) - 1, + XT_IPVS_ONCE_MASK = XT_IPVS_MASK & ~XT_IPVS_IPVS_PROPERTY +}; + +struct xt_ipvs_mtinfo { + union nf_inet_addr vaddr, vmask; + __be16 vport; + __u8 l4proto; + __u8 fwd_method; + __be16 vportctl; + + __u8 invert; + __u8 bitmask; +}; + +#endif /* _XT_IPVS_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 5fb8efa84df3..551b58419df9 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -742,6 +742,16 @@ config NETFILTER_XT_MATCH_IPRANGE If unsure, say M. +config NETFILTER_XT_MATCH_IPVS + tristate '"ipvs" match support' + depends on IP_VS + depends on NETFILTER_ADVANCED + depends on NF_CONNTRACK + help + This option allows you to match against IPVS properties of a packet. + + If unsure, say N. + config NETFILTER_XT_MATCH_LENGTH tristate '"length" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 36ef8e63be1e..4366c79a6683 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o +obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 2d3d5e4b35f8..027f654799fe 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -98,6 +98,7 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) return NULL; } +EXPORT_SYMBOL(ip_vs_proto_get); /* diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c new file mode 100644 index 000000000000..7a4d66db95ae --- /dev/null +++ b/net/netfilter/xt_ipvs.c @@ -0,0 +1,189 @@ +/* + * xt_ipvs - kernel module to match IPVS connection properties + * + * Author: Hannes Eder + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#ifdef CONFIG_IP_VS_IPV6 +#include +#endif +#include +#include +#include +#include +#include +#include + +#include + +MODULE_AUTHOR("Hannes Eder "); +MODULE_DESCRIPTION("Xtables: match IPVS connection properties"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_ipvs"); +MODULE_ALIAS("ip6t_ipvs"); + +/* borrowed from xt_conntrack */ +static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr, + const union nf_inet_addr *uaddr, + const union nf_inet_addr *umask, + unsigned int l3proto) +{ + if (l3proto == NFPROTO_IPV4) + return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0; +#ifdef CONFIG_IP_VS_IPV6 + else if (l3proto == NFPROTO_IPV6) + return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6, + &uaddr->in6) == 0; +#endif + else + return false; +} + +static bool +ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_ipvs_mtinfo *data = par->matchinfo; + /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ + const u_int8_t family = par->family; + struct ip_vs_iphdr iph; + struct ip_vs_protocol *pp; + struct ip_vs_conn *cp; + bool match = true; + + if (data->bitmask == XT_IPVS_IPVS_PROPERTY) { + match = skb->ipvs_property ^ + !!(data->invert & XT_IPVS_IPVS_PROPERTY); + goto out; + } + + /* other flags than XT_IPVS_IPVS_PROPERTY are set */ + if (!skb->ipvs_property) { + match = false; + goto out; + } + + ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); + + if (data->bitmask & XT_IPVS_PROTO) + if ((iph.protocol == data->l4proto) ^ + !(data->invert & XT_IPVS_PROTO)) { + match = false; + goto out; + } + + pp = ip_vs_proto_get(iph.protocol); + if (unlikely(!pp)) { + match = false; + goto out; + } + + /* + * Check if the packet belongs to an existing entry + */ + cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); + if (unlikely(cp == NULL)) { + match = false; + goto out; + } + + /* + * We found a connection, i.e. ct != 0, make sure to call + * __ip_vs_conn_put before returning. In our case jump to out_put_con. + */ + + if (data->bitmask & XT_IPVS_VPORT) + if ((cp->vport == data->vport) ^ + !(data->invert & XT_IPVS_VPORT)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_VPORTCTL) + if ((cp->control != NULL && + cp->control->vport == data->vportctl) ^ + !(data->invert & XT_IPVS_VPORTCTL)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_DIR) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + + if (ct == NULL || nf_ct_is_untracked(ct)) { + match = false; + goto out_put_cp; + } + + if ((ctinfo >= IP_CT_IS_REPLY) ^ + !!(data->invert & XT_IPVS_DIR)) { + match = false; + goto out_put_cp; + } + } + + if (data->bitmask & XT_IPVS_METHOD) + if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^ + !(data->invert & XT_IPVS_METHOD)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_VADDR) { + if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr, + &data->vmask, family) ^ + !(data->invert & XT_IPVS_VADDR)) { + match = false; + goto out_put_cp; + } + } + +out_put_cp: + __ip_vs_conn_put(cp); +out: + pr_debug("match=%d\n", match); + return match; +} + +static int ipvs_mt_check(const struct xt_mtchk_param *par) +{ + if (par->family != NFPROTO_IPV4 +#ifdef CONFIG_IP_VS_IPV6 + && par->family != NFPROTO_IPV6 +#endif + ) { + pr_info("protocol family %u not supported\n", par->family); + return -EINVAL; + } + + return 0; +} + +static struct xt_match xt_ipvs_mt_reg __read_mostly = { + .name = "ipvs", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = ipvs_mt, + .checkentry = ipvs_mt_check, + .matchsize = XT_ALIGN(sizeof(struct xt_ipvs_mtinfo)), + .me = THIS_MODULE, +}; + +static int __init ipvs_mt_init(void) +{ + return xt_register_match(&xt_ipvs_mt_reg); +} + +static void __exit ipvs_mt_exit(void) +{ + xt_unregister_match(&xt_ipvs_mt_reg); +} + +module_init(ipvs_mt_init); +module_exit(ipvs_mt_exit); -- cgit v1.2.3 From 7f1c407579519e71a0dcadc05614fd98acec585e Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Fri, 23 Jul 2010 12:48:52 +0200 Subject: IPVS: make FTP work with full NAT support Use nf_conntrack/nf_nat code to do the packet mangling and the TCP sequence adjusting. The function 'ip_vs_skb_replace' is now dead code, so it is removed. To SNAT FTP, use something like: % iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \ --vport 21 -j SNAT --to-source 192.168.10.10 and for the data connections in passive mode: % iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \ --vportctl 21 -j SNAT --to-source 192.168.10.10 using '-m state --state RELATED' would also works. Make sure the kernel modules ip_vs_ftp, nf_conntrack_ftp, and nf_nat_ftp are loaded. [ up-port and minor fixes by Simon Horman ] Signed-off-by: Hannes Eder Signed-off-by: Simon Horman Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 2 - net/netfilter/ipvs/Kconfig | 2 +- net/netfilter/ipvs/ip_vs_app.c | 43 ---------- net/netfilter/ipvs/ip_vs_core.c | 1 - net/netfilter/ipvs/ip_vs_ftp.c | 176 +++++++++++++++++++++++++++++++++++++--- 5 files changed, 165 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index fe82b1e10a29..1f9e51180bdb 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -736,8 +736,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc); extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); -extern int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, - char *o_buf, int o_len, char *n_buf, int n_len); extern int ip_vs_app_init(void); extern void ip_vs_app_cleanup(void); diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 366244492ac7..be10f6526042 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -235,7 +235,7 @@ comment 'IPVS application helper' config IP_VS_FTP tristate "FTP protocol helper" - depends on IP_VS_PROTO_TCP + depends on IP_VS_PROTO_TCP && NF_NAT ---help--- FTP is a protocol that transfers IP address and/or port number in the payload. In the virtual server via Network Address Translation, diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 1cb0e834f8ff..e76f87f4aca8 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -569,49 +569,6 @@ static const struct file_operations ip_vs_app_fops = { }; #endif - -/* - * Replace a segment of data with a new segment - */ -int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, - char *o_buf, int o_len, char *n_buf, int n_len) -{ - int diff; - int o_offset; - int o_left; - - EnterFunction(9); - - diff = n_len - o_len; - o_offset = o_buf - (char *)skb->data; - /* The length of left data after o_buf+o_len in the skb data */ - o_left = skb->len - (o_offset + o_len); - - if (diff <= 0) { - memmove(o_buf + n_len, o_buf + o_len, o_left); - memcpy(o_buf, n_buf, n_len); - skb_trim(skb, skb->len + diff); - } else if (diff <= skb_tailroom(skb)) { - skb_put(skb, diff); - memmove(o_buf + n_len, o_buf + o_len, o_left); - memcpy(o_buf, n_buf, n_len); - } else { - if (pskb_expand_head(skb, skb_headroom(skb), diff, pri)) - return -ENOMEM; - skb_put(skb, diff); - memmove(skb->data + o_offset + n_len, - skb->data + o_offset + o_len, o_left); - skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len); - } - - /* must update the iph total length here */ - ip_hdr(skb)->tot_len = htons(skb->len); - - LeaveFunction(9); - return 0; -} - - int __init ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 58f82dfc950a..4f8ddba48011 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -54,7 +54,6 @@ EXPORT_SYMBOL(register_ip_vs_scheduler); EXPORT_SYMBOL(unregister_ip_vs_scheduler); -EXPORT_SYMBOL(ip_vs_skb_replace); EXPORT_SYMBOL(ip_vs_proto_name); EXPORT_SYMBOL(ip_vs_conn_new); EXPORT_SYMBOL(ip_vs_conn_in_get); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 2ae747a376a5..f228a17ec649 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -20,6 +20,17 @@ * * Author: Wouter Gadeyne * + * + * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from + * http://www.ssi.bg/~ja/nfct/: + * + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS + * + * Portions Copyright (C) 2001-2002 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. + * + * Portions Copyright (C) 2003-2008 + * Julian Anastasov */ #define KMSG_COMPONENT "IPVS" @@ -32,6 +43,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -43,6 +57,16 @@ #define SERVER_STRING "227 Entering Passive Mode (" #define CLIENT_STRING "PORT " +#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" +#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ + &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ + (T)->dst.protonum + +#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" +#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ + &((C)->vaddr.ip), ntohs((C)->vport), \ + &((C)->daddr.ip), ntohs((C)->dport), \ + (C)->protocol, (C)->state /* * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper @@ -123,6 +147,119 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, return 1; } +/* + * Called from init_conntrack() as expectfn handler. + */ +static void +ip_vs_expect_callback(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_conntrack_tuple *orig, new_reply; + struct ip_vs_conn *cp; + + if (exp->tuple.src.l3num != PF_INET) + return; + + /* + * We assume that no NF locks are held before this callback. + * ip_vs_conn_out_get and ip_vs_conn_in_get should match their + * expectations even if they use wildcard values, now we provide the + * actual values from the newly created original conntrack direction. + * The conntrack is confirmed when packet reaches IPVS hooks. + */ + + /* RS->CLIENT */ + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply CLIENT->RS to CLIENT->VS */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found inout cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.dst.u3 = cp->vaddr; + new_reply.dst.u.tcp.port = cp->vport; + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE + ", inout cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + /* CLIENT->VS */ + cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply VS->CLIENT to RS->CLIENT */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found outin cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.src.u3 = cp->daddr; + new_reply.src.u.tcp.port = cp->dport; + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " + FMT_TUPLE ", outin cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE + " - unknown expect\n", + __func__, ct, ct->status, ARG_TUPLE(orig)); + return; + +alter: + /* Never alter conntrack for non-NAT conns */ + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) + nf_conntrack_alter_reply(ct, &new_reply); + ip_vs_conn_put(cp); + return; +} + +/* + * Create NF conntrack expectation with wildcard (optional) source port. + * Then the default callback function will alter the reply and will confirm + * the conntrack entry when the first packet comes. + */ +static void +ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct, + struct ip_vs_conn *cp, u_int8_t proto, + const __be16 *port, int from_rs) +{ + struct nf_conntrack_expect *exp; + + BUG_ON(!ct || ct == &nf_conntrack_untracked); + + exp = nf_ct_expect_alloc(ct); + if (!exp) + return; + + if (from_rs) + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, + nf_ct_l3num(ct), &cp->daddr, &cp->caddr, + proto, port, &cp->cport); + else + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, + nf_ct_l3num(ct), &cp->caddr, &cp->vaddr, + proto, port, &cp->vport); + + exp->expectfn = ip_vs_expect_callback; + + IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n", + __func__, ct, ARG_TUPLE(&exp->tuple)); + nf_ct_expect_related(exp); + nf_ct_expect_put(exp); +} /* * Look at outgoing ftp packets to catch the response to a PASV command @@ -149,7 +286,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned buf_len; - int ret; + int ret = 0; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -219,19 +358,26 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); + ct = nf_ct_get(skb, &ctinfo); + if (ct && !nf_ct_is_untracked(ct)) { + /* If mangling fails this function will return 0 + * which will cause the packet to be dropped. + * Mangling can only fail under memory pressure, + * hopefully it will succeed on the retransmitted + * packet. + */ + ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + start-data, end-start, + buf, buf_len); + if (ret) + ip_vs_expect_related(skb, ct, n_cp, + IPPROTO_TCP, NULL, 0); + } + /* - * Calculate required delta-offset to keep TCP happy + * Not setting 'diff' is intentional, otherwise the sequence + * would be adjusted twice. */ - *diff = buf_len - (end-start); - - if (*diff == 0) { - /* simply replace it with new passive address */ - memcpy(start, buf, buf_len); - ret = 1; - } else { - ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, - end-start, buf, buf_len); - } cp->app_data = NULL; ip_vs_tcp_conn_listen(n_cp); @@ -263,6 +409,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; + struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -349,6 +496,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_control_add(n_cp, cp); } + ct = (struct nf_conn *)skb->nfct; + if (ct && ct != &nf_conntrack_untracked) + ip_vs_expect_related(skb, ct, n_cp, + IPPROTO_TCP, &n_cp->dport, 1); + /* * Move tunnel to listen state */ -- cgit v1.2.3 From e8648a1fdb54da1f683784b36a17aa65ea56e931 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 23 Jul 2010 12:59:36 +0200 Subject: netfilter: add xt_cpu match In some situations a CPU match permits a better spreading of connections, or select targets only for a given cpu. With Remote Packet Steering or multiqueue NIC and appropriate IRQ affinities, we can distribute trafic on available cpus, per session. (all RX packets for a given flow is handled by a given cpu) Some legacy applications being not SMP friendly, one way to scale a server is to run multiple copies of them. Instead of randomly choosing an instance, we can use the cpu number as a key so that softirq handler for a whole instance is running on a single cpu, maximizing cache effects in TCP/UDP stacks. Using NAT for example, a four ways machine might run four copies of server application, using a separate listening port for each instance, but still presenting an unique external port : iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 0 \ -j REDIRECT --to-port 8080 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 1 \ -j REDIRECT --to-port 8081 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 2 \ -j REDIRECT --to-port 8082 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 3 \ -j REDIRECT --to-port 8083 Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 3 +- include/linux/netfilter/xt_cpu.h | 11 +++++++ net/netfilter/Kconfig | 9 ++++++ net/netfilter/Makefile | 1 + net/netfilter/xt_cpu.c | 63 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 include/linux/netfilter/xt_cpu.h create mode 100644 net/netfilter/xt_cpu.c (limited to 'include') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 0cb62c857187..edeeabdc1500 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -20,12 +20,13 @@ header-y += xt_TCPMSS.h header-y += xt_TCPOPTSTRIP.h header-y += xt_TEE.h header-y += xt_TPROXY.h +header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h header-y += xt_connlimit.h header-y += xt_connmark.h header-y += xt_conntrack.h -header-y += xt_cluster.h +header-y += xt_cpu.h header-y += xt_dccp.h header-y += xt_dscp.h header-y += xt_esp.h diff --git a/include/linux/netfilter/xt_cpu.h b/include/linux/netfilter/xt_cpu.h new file mode 100644 index 000000000000..93c7f11d8f42 --- /dev/null +++ b/include/linux/netfilter/xt_cpu.h @@ -0,0 +1,11 @@ +#ifndef _XT_CPU_H +#define _XT_CPU_H + +#include + +struct xt_cpu_info { + __u32 cpu; + __u32 invert; +}; + +#endif /*_XT_CPU_H*/ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 551b58419df9..43288259f4a1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -663,6 +663,15 @@ config NETFILTER_XT_MATCH_CONNTRACK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_CPU + tristate '"cpu" match support' + depends on NETFILTER_ADVANCED + help + CPU matching allows you to match packets based on the CPU + currently handling the packet. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_DCCP tristate '"dccp" protocol match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 4366c79a6683..441050f31111 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c new file mode 100644 index 000000000000..b39db8a5cbae --- /dev/null +++ b/net/netfilter/xt_cpu.c @@ -0,0 +1,63 @@ +/* Kernel module to match running CPU */ + +/* + * Might be used to distribute connections on several daemons, if + * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable, + * each RX queue IRQ affined to one CPU (1:1 mapping) + * + */ + +/* (C) 2010 Eric Dumazet + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Dumazet "); +MODULE_DESCRIPTION("Xtables: CPU match"); + +static int cpu_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_cpu_info *info = par->matchinfo; + + if (info->invert & ~1) + return -EINVAL; + return 0; +} + +static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cpu_info *info = par->matchinfo; + + return (info->cpu == smp_processor_id()) ^ info->invert; +} + +static struct xt_match cpu_mt_reg __read_mostly = { + .name = "cpu", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cpu_mt_check, + .match = cpu_mt, + .matchsize = sizeof(struct xt_cpu_info), + .me = THIS_MODULE, +}; + +static int __init cpu_mt_init(void) +{ + return xt_register_match(&cpu_mt_reg); +} + +static void __exit cpu_mt_exit(void) +{ + xt_unregister_match(&cpu_mt_reg); +} + +module_init(cpu_mt_init); +module_exit(cpu_mt_exit); -- cgit v1.2.3 From 49daf6a22622d4e1619aeaad5f9f0472bf89daff Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Fri, 23 Jul 2010 14:07:47 +0200 Subject: xt_quota: report initial quota value instead of current value to userspace We should copy the initial value to userspace for iptables-save and to allow removal of specific quota rules. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_quota.h | 2 +- net/netfilter/xt_quota.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h index 8dc89dfc1361..b0d28c659ab7 100644 --- a/include/linux/netfilter/xt_quota.h +++ b/include/linux/netfilter/xt_quota.h @@ -11,9 +11,9 @@ struct xt_quota_priv; struct xt_quota_info { u_int32_t flags; u_int32_t pad; + aligned_u64 quota; /* Used internally by the kernel */ - aligned_u64 quota; struct xt_quota_priv *master; }; diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 304b1fda1a0d..70eb2b4984dd 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -36,8 +36,6 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par) /* we do not allow even small packets from now on */ priv->quota = 0; } - /* Copy quota back to matchinfo so that iptables can display it */ - q->quota = priv->quota; spin_unlock_bh(&priv->lock); return ret; -- cgit v1.2.3 From ee92d37861a90b8f14fa621ae5abcfb29a89aaa9 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Mon, 2 Aug 2010 17:06:19 +0200 Subject: netfilter: nf_conntrack_extend: introduce __nf_ct_ext_exist() some users of nf_ct_ext_exist() know ct->ext isn't NULL. For these users, the check for ct->ext isn't necessary, the function __nf_ct_ext_exist() can be used instead. the type of the return value of nf_ct_ext_exist() is changed to bool. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_extend.h | 9 +++++++-- net/netfilter/nf_conntrack_extend.c | 22 ++++++++++++---------- 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 32d15bd6efa3..0772d296dfdb 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -28,9 +28,14 @@ struct nf_ct_ext { char data[0]; }; -static inline int nf_ct_ext_exist(const struct nf_conn *ct, u8 id) +static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id) { - return (ct->ext && ct->ext->offset[id]); + return !!ext->offset[id]; +} + +static inline bool nf_ct_ext_exist(const struct nf_conn *ct, u8 id) +{ + return (ct->ext && __nf_ct_ext_exist(ct->ext, id)); } static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index fdc8fb4ae10f..7dcf7a404190 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -23,9 +23,10 @@ void __nf_ct_ext_destroy(struct nf_conn *ct) { unsigned int i; struct nf_ct_ext_type *t; + struct nf_ct_ext *ext = ct->ext; for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!nf_ct_ext_exist(ct, i)) + if (!__nf_ct_ext_exist(ext, i)) continue; rcu_read_lock(); @@ -73,44 +74,45 @@ static void __nf_ct_ext_free_rcu(struct rcu_head *head) void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) { - struct nf_ct_ext *new; + struct nf_ct_ext *old, *new; int i, newlen, newoff; struct nf_ct_ext_type *t; /* Conntrack must not be confirmed to avoid races on reallocation. */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); - if (!ct->ext) + old = ct->ext; + if (!old) return nf_ct_ext_create(&ct->ext, id, gfp); - if (nf_ct_ext_exist(ct, id)) + if (__nf_ct_ext_exist(old, id)) return NULL; rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); BUG_ON(t == NULL); - newoff = ALIGN(ct->ext->len, t->align); + newoff = ALIGN(old->len, t->align); newlen = newoff + t->len; rcu_read_unlock(); - new = __krealloc(ct->ext, newlen, gfp); + new = __krealloc(old, newlen, gfp); if (!new) return NULL; - if (new != ct->ext) { + if (new != old) { for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!nf_ct_ext_exist(ct, i)) + if (!__nf_ct_ext_exist(old, i)) continue; rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[i]); if (t && t->move) t->move((void *)new + new->offset[i], - (void *)ct->ext + ct->ext->offset[i]); + (void *)old + old->offset[i]); rcu_read_unlock(); } - call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu); + call_rcu(&old->rcu, __nf_ct_ext_free_rcu); ct->ext = new; } -- cgit v1.2.3 From 5c0d2374a16fcb52096df914ee57720987677be5 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 2 Aug 2010 17:12:44 +0200 Subject: ipvs: provide default ip_vs_conn_{in,out}_get_proto This removes duplicate code by providing a default implementation which is used by 3 of the 4 modules that provide these call. Signed-off-by: Simon Horman Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 12 ++++++++ net/netfilter/ipvs/ip_vs_conn.c | 45 ++++++++++++++++++++++++++++ net/netfilter/ipvs/ip_vs_proto_sctp.c | 53 ++------------------------------- net/netfilter/ipvs/ip_vs_proto_tcp.c | 50 ++----------------------------- net/netfilter/ipvs/ip_vs_proto_udp.c | 56 ++--------------------------------- 5 files changed, 63 insertions(+), 153 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1f9e51180bdb..a4747a0f7303 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -632,10 +632,22 @@ extern struct ip_vs_conn *ip_vs_ct_in_get (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, const union nf_inet_addr *d_addr, __be16 d_port); +struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, + int inverse); + extern struct ip_vs_conn *ip_vs_conn_out_get (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, const union nf_inet_addr *d_addr, __be16 d_port); +struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, + int inverse); + /* put back the conn without restarting its timer */ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) { diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 654544e72264..b71c69a2db13 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -271,6 +271,29 @@ struct ip_vs_conn *ip_vs_conn_in_get return cp; } +struct ip_vs_conn * +ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} +EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); + /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, @@ -356,6 +379,28 @@ struct ip_vs_conn *ip_vs_conn_out_get return ret; } +struct ip_vs_conn * +ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} +EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); /* * Put back the conn and restart its timer with its timeout diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index db5575967c14..4c0855cb006e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -8,55 +8,6 @@ #include #include - -static struct ip_vs_conn * -sctp_conn_in_get(int af, - const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) - return ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); -} - -static struct ip_vs_conn * -sctp_conn_out_get(int af, - const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) - return ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); -} - static int sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -1169,8 +1120,8 @@ struct ip_vs_protocol ip_vs_protocol_sctp = { .register_app = sctp_register_app, .unregister_app = sctp_unregister_app, .conn_schedule = sctp_conn_schedule, - .conn_in_get = sctp_conn_in_get, - .conn_out_get = sctp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = sctp_snat_handler, .dnat_handler = sctp_dnat_handler, .csum_check = sctp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 91d28e073742..282d24de8592 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -27,52 +27,6 @@ #include - -static struct ip_vs_conn * -tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - return ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - return ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } -} - -static struct ip_vs_conn * -tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - return ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - return ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } -} - - static int tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -721,8 +675,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .register_app = tcp_register_app, .unregister_app = tcp_unregister_app, .conn_schedule = tcp_conn_schedule, - .conn_in_get = tcp_conn_in_get, - .conn_out_get = tcp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = tcp_snat_handler, .dnat_handler = tcp_dnat_handler, .csum_check = tcp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index e7a6885e0167..8553231b5d41 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -27,58 +27,6 @@ #include #include -static struct ip_vs_conn * -udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - cp = ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } - - return cp; -} - - -static struct ip_vs_conn * -udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - cp = ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } - - return cp; -} - - static int udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -520,8 +468,8 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .init = udp_init, .exit = udp_exit, .conn_schedule = udp_conn_schedule, - .conn_in_get = udp_conn_in_get, - .conn_out_get = udp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = udp_snat_handler, .dnat_handler = udp_dnat_handler, .csum_check = udp_csum_check, -- cgit v1.2.3 From f43dc98b3be36551143e3bbaf1bb3067835c24f4 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Mon, 2 Aug 2010 17:20:54 +0200 Subject: netfilter: nf_nat: make unique_tuple return void The only user of unique_tuple() get_unique_tuple() doesn't care about the return value of unique_tuple(), so make unique_tuple() return void (nothing). Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_nat_protocol.h | 8 ++++---- net/ipv4/netfilter/nf_nat_proto_common.c | 8 ++++---- net/ipv4/netfilter/nf_nat_proto_dccp.c | 6 +++--- net/ipv4/netfilter/nf_nat_proto_gre.c | 8 ++++---- net/ipv4/netfilter/nf_nat_proto_icmp.c | 6 +++--- net/ipv4/netfilter/nf_nat_proto_sctp.c | 6 +++--- net/ipv4/netfilter/nf_nat_proto_tcp.c | 5 ++--- net/ipv4/netfilter/nf_nat_proto_udp.c | 5 ++--- net/ipv4/netfilter/nf_nat_proto_udplite.c | 6 +++--- net/ipv4/netfilter/nf_nat_proto_unknown.c | 4 ++-- 10 files changed, 30 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h index c398017ccfa3..df17bac46bf5 100644 --- a/include/net/netfilter/nf_nat_protocol.h +++ b/include/net/netfilter/nf_nat_protocol.h @@ -27,9 +27,9 @@ struct nf_nat_protocol { /* Alter the per-proto part of the tuple (depending on maniptype), to give a unique tuple in the given range if - possible; return false if not. Per-protocol part of tuple - is initialized to the incoming packet. */ - bool (*unique_tuple)(struct nf_conntrack_tuple *tuple, + possible. Per-protocol part of tuple is initialized to the + incoming packet. */ + void (*unique_tuple)(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct); @@ -63,7 +63,7 @@ extern bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, const union nf_conntrack_man_proto *min, const union nf_conntrack_man_proto *max); -extern bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, +extern void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct, diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 6c4f11f51446..2844a0383a11 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); -bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, +void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct, @@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { /* If it's dst rewrite, can't change port */ if (maniptype == IP_NAT_MANIP_DST) - return false; + return; if (ntohs(*portptr) < 1024) { /* Loose convention: >> 512 is credential passing */ @@ -87,9 +87,9 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, continue; if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) *rover = off; - return true; + return; } - return false; + return; } EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c index 22485ce306d4..570faf2667b2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c @@ -22,14 +22,14 @@ static u_int16_t dccp_port_rover; -static bool +static void dccp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &dccp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &dccp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index d7e89201351e..89933ab6f63e 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); /* generate unique tuple ... */ -static bool +static void gre_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, @@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* If there is no master conntrack we are not PPTP, do not change tuples */ if (!ct->master) - return false; + return; if (maniptype == IP_NAT_MANIP_SRC) keyptr = &tuple->src.u.gre.key; @@ -71,11 +71,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, for (i = 0; i < range_size; i++, key++) { *keyptr = htons(min + key % range_size); if (!nf_nat_used_tuple(tuple, ct)) - return true; + return; } pr_debug("%p: no NAT mapping\n", ct); - return false; + return; } /* manipulate a GRE packet according to maniptype */ diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 19a8b0b07d8e..97003fe312e3 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); } -static bool +static void icmp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, @@ -46,9 +46,9 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + (id % range_size)); if (!nf_nat_used_tuple(tuple, ct)) - return true; + return; } - return false; + return; } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c index 3fc598eeeb1a..756331d42661 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c @@ -16,14 +16,14 @@ static u_int16_t nf_sctp_port_rover; -static bool +static void sctp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &nf_sctp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &nf_sctp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 399e2cfa263b..aa460a595d5d 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -20,14 +20,13 @@ static u_int16_t tcp_port_rover; -static bool +static void tcp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &tcp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 9e61c79492e4..dfe65c7e2925 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -19,14 +19,13 @@ static u_int16_t udp_port_rover; -static bool +static void udp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &udp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c index 440a229bbd87..3cc8c8af39ef 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c @@ -18,14 +18,14 @@ static u_int16_t udplite_port_rover; -static bool +static void udplite_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &udplite_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &udplite_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index 14381c62acea..a50f2bc1c732 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c @@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, return true; } -static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple, +static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { /* Sorry: we can't help you; if it's not unique, we can't frob anything. */ - return false; + return; } static bool -- cgit v1.2.3