From f1ddcaf3393b7a3871809b97fae90fac841a1f39 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 Jan 2007 10:05:15 +1100
Subject: [CRYPTO] api: Remove deprecated interface

This patch removes the old cipher interface and related code.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 127 -------------------------------------------------
 1 file changed, 127 deletions(-)

(limited to 'include')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 4aa9046601da..95936a5e7c12 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -51,15 +51,9 @@
 /*
  * Transform masks and values (for crt_flags).
  */
-#define CRYPTO_TFM_MODE_MASK		0x000000ff
 #define CRYPTO_TFM_REQ_MASK		0x000fff00
 #define CRYPTO_TFM_RES_MASK		0xfff00000
 
-#define CRYPTO_TFM_MODE_ECB		0x00000001
-#define CRYPTO_TFM_MODE_CBC		0x00000002
-#define CRYPTO_TFM_MODE_CFB		0x00000004
-#define CRYPTO_TFM_MODE_CTR		0x00000008
-
 #define CRYPTO_TFM_REQ_WEAK_KEY		0x00000100
 #define CRYPTO_TFM_REQ_MAY_SLEEP	0x00000200
 #define CRYPTO_TFM_RES_WEAK_KEY		0x00100000
@@ -71,12 +65,8 @@
 /*
  * Miscellaneous stuff.
  */
-#define CRYPTO_UNSPEC			0
 #define CRYPTO_MAX_ALG_NAME		64
 
-#define CRYPTO_DIR_ENCRYPT		1
-#define CRYPTO_DIR_DECRYPT		0
-
 /*
  * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual
  * declaration) is used to ensure that the crypto_tfm context structure is
@@ -148,19 +138,6 @@ struct cipher_alg {
 	                  unsigned int keylen);
 	void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 	void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-	unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc,
-					u8 *dst, const u8 *src,
-					unsigned int nbytes) __deprecated;
-	unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc,
-					u8 *dst, const u8 *src,
-					unsigned int nbytes) __deprecated;
-	unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc,
-					u8 *dst, const u8 *src,
-					unsigned int nbytes) __deprecated;
-	unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc,
-					u8 *dst, const u8 *src,
-					unsigned int nbytes) __deprecated;
 };
 
 struct digest_alg {
@@ -243,11 +220,6 @@ int crypto_unregister_alg(struct crypto_alg *alg);
 #ifdef CONFIG_CRYPTO
 int crypto_has_alg(const char *name, u32 type, u32 mask);
 #else
-static inline int crypto_alg_available(const char *name, u32 flags)
-{
-	return 0;
-}
-
 static inline int crypto_has_alg(const char *name, u32 type, u32 mask)
 {
 	return 0;
@@ -395,40 +367,11 @@ static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm)
 	return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK;
 }
 
-static unsigned int crypto_tfm_alg_min_keysize(struct crypto_tfm *tfm)
-	__deprecated;
-static inline unsigned int crypto_tfm_alg_min_keysize(struct crypto_tfm *tfm)
-{
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	return tfm->__crt_alg->cra_cipher.cia_min_keysize;
-}
-
-static unsigned int crypto_tfm_alg_max_keysize(struct crypto_tfm *tfm)
-	__deprecated;
-static inline unsigned int crypto_tfm_alg_max_keysize(struct crypto_tfm *tfm)
-{
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	return tfm->__crt_alg->cra_cipher.cia_max_keysize;
-}
-
-static unsigned int crypto_tfm_alg_ivsize(struct crypto_tfm *tfm) __deprecated;
-static inline unsigned int crypto_tfm_alg_ivsize(struct crypto_tfm *tfm)
-{
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	return tfm->crt_cipher.cit_ivsize;
-}
-
 static inline unsigned int crypto_tfm_alg_blocksize(struct crypto_tfm *tfm)
 {
 	return tfm->__crt_alg->cra_blocksize;
 }
 
-static inline unsigned int crypto_tfm_alg_digestsize(struct crypto_tfm *tfm)
-{
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	return tfm->__crt_alg->cra_digest.dia_digestsize;
-}
-
 static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm)
 {
 	return tfm->__crt_alg->cra_alignmask;
@@ -809,76 +752,6 @@ static inline int crypto_hash_setkey(struct crypto_hash *hash,
 	return crypto_hash_crt(hash)->setkey(hash, key, keylen);
 }
 
-static int crypto_cipher_encrypt(struct crypto_tfm *tfm,
-				 struct scatterlist *dst,
-				 struct scatterlist *src,
-				 unsigned int nbytes) __deprecated;
-static inline int crypto_cipher_encrypt(struct crypto_tfm *tfm,
-                                        struct scatterlist *dst,
-                                        struct scatterlist *src,
-                                        unsigned int nbytes)
-{
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	return tfm->crt_cipher.cit_encrypt(tfm, dst, src, nbytes);
-}                                        
-
-static int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm,
-				    struct scatterlist *dst,
-				    struct scatterlist *src,
-				    unsigned int nbytes, u8 *iv) __deprecated;
-static inline int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm,
-                                           struct scatterlist *dst,
-                                           struct scatterlist *src,
-                                           unsigned int nbytes, u8 *iv)
-{
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	return tfm->crt_cipher.cit_encrypt_iv(tfm, dst, src, nbytes, iv);
-}                                        
-
-static int crypto_cipher_decrypt(struct crypto_tfm *tfm,
-				 struct scatterlist *dst,
-				 struct scatterlist *src,
-				 unsigned int nbytes) __deprecated;
-static inline int crypto_cipher_decrypt(struct crypto_tfm *tfm,
-                                        struct scatterlist *dst,
-                                        struct scatterlist *src,
-                                        unsigned int nbytes)
-{
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	return tfm->crt_cipher.cit_decrypt(tfm, dst, src, nbytes);
-}
-
-static int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm,
-				    struct scatterlist *dst,
-				    struct scatterlist *src,
-				    unsigned int nbytes, u8 *iv) __deprecated;
-static inline int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm,
-                                           struct scatterlist *dst,
-                                           struct scatterlist *src,
-                                           unsigned int nbytes, u8 *iv)
-{
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	return tfm->crt_cipher.cit_decrypt_iv(tfm, dst, src, nbytes, iv);
-}
-
-static void crypto_cipher_set_iv(struct crypto_tfm *tfm,
-				 const u8 *src, unsigned int len) __deprecated;
-static inline void crypto_cipher_set_iv(struct crypto_tfm *tfm,
-                                        const u8 *src, unsigned int len)
-{
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	memcpy(tfm->crt_cipher.cit_iv, src, len);
-}
-
-static void crypto_cipher_get_iv(struct crypto_tfm *tfm,
-				 u8 *dst, unsigned int len) __deprecated;
-static inline void crypto_cipher_get_iv(struct crypto_tfm *tfm,
-                                        u8 *dst, unsigned int len)
-{
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	memcpy(dst, tfm->crt_cipher.cit_iv, len);
-}
-
 static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm)
 {
 	return (struct crypto_comp *)tfm;
-- 
cgit v1.2.3


From 2e306ee016fd4750289e65c3b1856db569f1f3f2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 17 Dec 2006 10:05:58 +1100
Subject: [CRYPTO] api: Add type-safe spawns

This patch allows spawns of specific types (e.g., cipher) to be allocated.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c         | 13 +++++++++++--
 crypto/cbc.c            |  9 +++++----
 crypto/ecb.c            |  9 +++++----
 crypto/hmac.c           |  9 +++++----
 crypto/lrw.c            | 11 ++++++-----
 crypto/pcbc.c           |  9 +++++----
 crypto/xcbc.c           |  9 +++++----
 include/crypto/algapi.h | 20 +++++++++++++++++++-
 8 files changed, 61 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 69eb504721a4..0f1abca1b98c 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -377,7 +377,8 @@ void crypto_drop_spawn(struct crypto_spawn *spawn)
 }
 EXPORT_SYMBOL_GPL(crypto_drop_spawn);
 
-struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn)
+struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
+				    u32 mask)
 {
 	struct crypto_alg *alg;
 	struct crypto_alg *alg2;
@@ -396,10 +397,18 @@ struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn)
 		return ERR_PTR(-EAGAIN);
 	}
 
+	tfm = ERR_PTR(-EINVAL);
+	if (unlikely((alg->cra_flags ^ type) & mask))
+		goto out_put_alg;
+
 	tfm = __crypto_alloc_tfm(alg);
 	if (IS_ERR(tfm))
-		crypto_mod_put(alg);
+		goto out_put_alg;
+
+	return tfm;
 
+out_put_alg:
+	crypto_mod_put(alg);
 	return tfm;
 }
 EXPORT_SYMBOL_GPL(crypto_spawn_tfm);
diff --git a/crypto/cbc.c b/crypto/cbc.c
index f5542b4db387..136fea7e7000 100644
--- a/crypto/cbc.c
+++ b/crypto/cbc.c
@@ -243,6 +243,7 @@ static int crypto_cbc_init_tfm(struct crypto_tfm *tfm)
 	struct crypto_instance *inst = (void *)tfm->__crt_alg;
 	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
 	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_cipher *cipher;
 
 	switch (crypto_tfm_alg_blocksize(tfm)) {
 	case 8:
@@ -260,11 +261,11 @@ static int crypto_cbc_init_tfm(struct crypto_tfm *tfm)
 			ctx->xor = xor_quad;
 	}
 
-	tfm = crypto_spawn_tfm(spawn);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
 
-	ctx->child = crypto_cipher_cast(tfm);
+	ctx->child = cipher;
 	return 0;
 }
 
diff --git a/crypto/ecb.c b/crypto/ecb.c
index f239aa9c4017..839a0aed8c22 100644
--- a/crypto/ecb.c
+++ b/crypto/ecb.c
@@ -99,12 +99,13 @@ static int crypto_ecb_init_tfm(struct crypto_tfm *tfm)
 	struct crypto_instance *inst = (void *)tfm->__crt_alg;
 	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
 	struct crypto_ecb_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_cipher *cipher;
 
-	tfm = crypto_spawn_tfm(spawn);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
 
-	ctx->child = crypto_cipher_cast(tfm);
+	ctx->child = cipher;
 	return 0;
 }
 
diff --git a/crypto/hmac.c b/crypto/hmac.c
index b521bcd2b2c6..44187c5ee593 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -172,15 +172,16 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
 
 static int hmac_init_tfm(struct crypto_tfm *tfm)
 {
+	struct crypto_hash *hash;
 	struct crypto_instance *inst = (void *)tfm->__crt_alg;
 	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
 	struct hmac_ctx *ctx = hmac_ctx(__crypto_hash_cast(tfm));
 
-	tfm = crypto_spawn_tfm(spawn);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
+	hash = crypto_spawn_hash(spawn);
+	if (IS_ERR(hash))
+		return PTR_ERR(hash);
 
-	ctx->child = crypto_hash_cast(tfm);
+	ctx->child = hash;
 	return 0;
 }
 
diff --git a/crypto/lrw.c b/crypto/lrw.c
index 56642586d84f..b4105080ac7a 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -201,21 +201,22 @@ static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 
 static int init_tfm(struct crypto_tfm *tfm)
 {
+	struct crypto_cipher *cipher;
 	struct crypto_instance *inst = (void *)tfm->__crt_alg;
 	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
 	struct priv *ctx = crypto_tfm_ctx(tfm);
 	u32 *flags = &tfm->crt_flags;
 
-	tfm = crypto_spawn_tfm(spawn);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
 
-	if (crypto_tfm_alg_blocksize(tfm) != 16) {
+	if (crypto_cipher_blocksize(cipher) != 16) {
 		*flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
 		return -EINVAL;
 	}
 
-	ctx->child = crypto_cipher_cast(tfm);
+	ctx->child = cipher;
 	return 0;
 }
 
diff --git a/crypto/pcbc.c b/crypto/pcbc.c
index 0ffb46eb259f..5174d7fdad6e 100644
--- a/crypto/pcbc.c
+++ b/crypto/pcbc.c
@@ -247,6 +247,7 @@ static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm)
 	struct crypto_instance *inst = (void *)tfm->__crt_alg;
 	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
 	struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_cipher *cipher;
 
 	switch (crypto_tfm_alg_blocksize(tfm)) {
 	case 8:
@@ -264,11 +265,11 @@ static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm)
 			ctx->xor = xor_quad;
 	}
 
-	tfm = crypto_spawn_tfm(spawn);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
 
-	ctx->child = crypto_cipher_cast(tfm);
+	ctx->child = cipher;
 	return 0;
 }
 
diff --git a/crypto/xcbc.c b/crypto/xcbc.c
index 317e9f08fc04..d7b4be0a057d 100644
--- a/crypto/xcbc.c
+++ b/crypto/xcbc.c
@@ -254,14 +254,15 @@ static int crypto_xcbc_digest(struct hash_desc *pdesc,
 
 static int xcbc_init_tfm(struct crypto_tfm *tfm)
 {
+	struct crypto_cipher *cipher;
 	struct crypto_instance *inst = (void *)tfm->__crt_alg;
 	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
 	struct crypto_xcbc_ctx *ctx = crypto_hash_ctx_aligned(__crypto_hash_cast(tfm));
 	int bs = crypto_hash_blocksize(__crypto_hash_cast(tfm));
 
-	tfm = crypto_spawn_tfm(spawn);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
 
 	switch(bs) {
 	case 16:
@@ -271,7 +272,7 @@ static int xcbc_init_tfm(struct crypto_tfm *tfm)
 		return -EINVAL;
 	}
 
-	ctx->child = crypto_cipher_cast(tfm);
+	ctx->child = cipher;
 	ctx->odds = (u8*)(ctx+1);
 	ctx->prev = ctx->odds + bs;
 	ctx->key = ctx->prev + bs;
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 5748aecdb414..99c534d573d2 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -93,7 +93,8 @@ struct crypto_template *crypto_lookup_template(const char *name);
 int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
 		      struct crypto_instance *inst);
 void crypto_drop_spawn(struct crypto_spawn *spawn);
-struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn);
+struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
+				    u32 mask);
 
 struct crypto_alg *crypto_get_attr_alg(void *param, unsigned int len,
 				       u32 type, u32 mask);
@@ -132,11 +133,28 @@ static inline void *crypto_blkcipher_ctx_aligned(struct crypto_blkcipher *tfm)
 	return crypto_tfm_ctx_aligned(&tfm->base);
 }
 
+static inline struct crypto_cipher *crypto_spawn_cipher(
+	struct crypto_spawn *spawn)
+{
+	u32 type = CRYPTO_ALG_TYPE_CIPHER;
+	u32 mask = CRYPTO_ALG_TYPE_MASK;
+
+	return __crypto_cipher_cast(crypto_spawn_tfm(spawn, type, mask));
+}
+
 static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm)
 {
 	return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher;
 }
 
+static inline struct crypto_hash *crypto_spawn_hash(struct crypto_spawn *spawn)
+{
+	u32 type = CRYPTO_ALG_TYPE_HASH;
+	u32 mask = CRYPTO_ALG_TYPE_HASH_MASK;
+
+	return __crypto_hash_cast(crypto_spawn_tfm(spawn, type, mask));
+}
+
 static inline void *crypto_hash_ctx_aligned(struct crypto_hash *tfm)
 {
 	return crypto_tfm_ctx_aligned(&tfm->base);
-- 
cgit v1.2.3


From 27d2a3300755387d2fec231d37944907ff992ce8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 24 Jan 2007 20:50:26 +1100
Subject: [CRYPTO] api: Allow multiple frontends per backend

This patch adds support for multiple frontend types for each backend
algorithm by passing the type and mask through to the backend type
init function.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c         |  2 +-
 crypto/api.c            | 25 +++++++++++++------------
 crypto/blkcipher.c      |  5 +++--
 crypto/hash.c           |  5 +++--
 crypto/internal.h       |  3 ++-
 include/crypto/algapi.h |  4 ++--
 6 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 0f1abca1b98c..f7d2185b2c8f 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -401,7 +401,7 @@ struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
 	if (unlikely((alg->cra_flags ^ type) & mask))
 		goto out_put_alg;
 
-	tfm = __crypto_alloc_tfm(alg);
+	tfm = __crypto_alloc_tfm(alg, type, mask);
 	if (IS_ERR(tfm))
 		goto out_put_alg;
 
diff --git a/crypto/api.c b/crypto/api.c
index 8b80baec853a..55af8bb0f050 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -212,12 +212,12 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(crypto_alg_mod_lookup);
 
-static int crypto_init_ops(struct crypto_tfm *tfm)
+static int crypto_init_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 {
-	const struct crypto_type *type = tfm->__crt_alg->cra_type;
+	const struct crypto_type *type_obj = tfm->__crt_alg->cra_type;
 
-	if (type)
-		return type->init(tfm);
+	if (type_obj)
+		return type_obj->init(tfm, type, mask);
 
 	switch (crypto_tfm_alg_type(tfm)) {
 	case CRYPTO_ALG_TYPE_CIPHER:
@@ -266,14 +266,14 @@ static void crypto_exit_ops(struct crypto_tfm *tfm)
 	}
 }
 
-static unsigned int crypto_ctxsize(struct crypto_alg *alg)
+static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask)
 {
-	const struct crypto_type *type = alg->cra_type;
+	const struct crypto_type *type_obj = alg->cra_type;
 	unsigned int len;
 
 	len = alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1);
-	if (type)
-		return len + type->ctxsize(alg);
+	if (type_obj)
+		return len + type_obj->ctxsize(alg, type, mask);
 
 	switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
 	default:
@@ -303,20 +303,21 @@ void crypto_shoot_alg(struct crypto_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_shoot_alg);
 
-struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg)
+struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
+				      u32 mask)
 {
 	struct crypto_tfm *tfm = NULL;
 	unsigned int tfm_size;
 	int err = -ENOMEM;
 
-	tfm_size = sizeof(*tfm) + crypto_ctxsize(alg);
+	tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask);
 	tfm = kzalloc(tfm_size, GFP_KERNEL);
 	if (tfm == NULL)
 		goto out_err;
 
 	tfm->__crt_alg = alg;
 
-	err = crypto_init_ops(tfm);
+	err = crypto_init_ops(tfm, type, mask);
 	if (err)
 		goto out_free_tfm;
 
@@ -372,7 +373,7 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask)
 			goto err;
 		}
 
-		tfm = __crypto_alloc_tfm(alg);
+		tfm = __crypto_alloc_tfm(alg, type, mask);
 		if (!IS_ERR(tfm))
 			return tfm;
 
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index cbb4c4e5c229..b5befe8c3a96 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -349,7 +349,8 @@ static int setkey(struct crypto_tfm *tfm, const u8 *key,
 	return cipher->setkey(tfm, key, keylen);
 }
 
-static unsigned int crypto_blkcipher_ctxsize(struct crypto_alg *alg)
+static unsigned int crypto_blkcipher_ctxsize(struct crypto_alg *alg, u32 type,
+					     u32 mask)
 {
 	struct blkcipher_alg *cipher = &alg->cra_blkcipher;
 	unsigned int len = alg->cra_ctxsize;
@@ -362,7 +363,7 @@ static unsigned int crypto_blkcipher_ctxsize(struct crypto_alg *alg)
 	return len;
 }
 
-static int crypto_init_blkcipher_ops(struct crypto_tfm *tfm)
+static int crypto_init_blkcipher_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 {
 	struct blkcipher_tfm *crt = &tfm->crt_blkcipher;
 	struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher;
diff --git a/crypto/hash.c b/crypto/hash.c
index cdec23d885fe..12c4514f3478 100644
--- a/crypto/hash.c
+++ b/crypto/hash.c
@@ -16,12 +16,13 @@
 
 #include "internal.h"
 
-static unsigned int crypto_hash_ctxsize(struct crypto_alg *alg)
+static unsigned int crypto_hash_ctxsize(struct crypto_alg *alg, u32 type,
+					u32 mask)
 {
 	return alg->cra_ctxsize;
 }
 
-static int crypto_init_hash_ops(struct crypto_tfm *tfm)
+static int crypto_init_hash_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 {
 	struct hash_tfm *crt = &tfm->crt_hash;
 	struct hash_alg *alg = &tfm->__crt_alg->cra_hash;
diff --git a/crypto/internal.h b/crypto/internal.h
index 784a7745315f..60acad9788c5 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -120,7 +120,8 @@ void crypto_exit_compress_ops(struct crypto_tfm *tfm);
 void crypto_larval_error(const char *name, u32 type, u32 mask);
 
 void crypto_shoot_alg(struct crypto_alg *alg);
-struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg);
+struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
+				      u32 mask);
 
 int crypto_register_instance(struct crypto_template *tmpl,
 			     struct crypto_instance *inst);
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 99c534d573d2..4e05e93ff681 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -18,8 +18,8 @@ struct module;
 struct seq_file;
 
 struct crypto_type {
-	unsigned int (*ctxsize)(struct crypto_alg *alg);
-	int (*init)(struct crypto_tfm *tfm);
+	unsigned int (*ctxsize)(struct crypto_alg *alg, u32 type, u32 mask);
+	int (*init)(struct crypto_tfm *tfm, u32 type, u32 mask);
 	void (*exit)(struct crypto_tfm *tfm);
 	void (*show)(struct seq_file *m, struct crypto_alg *alg);
 };
-- 
cgit v1.2.3


From 78a1fe4f242cbe6b4578e072b75e171b92745afa Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 24 Dec 2006 10:02:00 +1100
Subject: [CRYPTO] api: Use structs for cipher/compression

Now that all cipher/compression users have switched over to the new
allocation scheme, we can get rid of the compatility defines and use
proper structs for them.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 95936a5e7c12..779aa78ee643 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -311,13 +311,18 @@ struct crypto_tfm {
 	void *__crt_ctx[] CRYPTO_MINALIGN_ATTR;
 };
 
-#define crypto_cipher crypto_tfm
-#define crypto_comp crypto_tfm
-
 struct crypto_blkcipher {
 	struct crypto_tfm base;
 };
 
+struct crypto_cipher {
+	struct crypto_tfm base;
+};
+
+struct crypto_comp {
+	struct crypto_tfm base;
+};
+
 struct crypto_hash {
 	struct crypto_tfm base;
 };
@@ -576,7 +581,7 @@ static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name,
 
 static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm)
 {
-	return tfm;
+	return &tfm->base;
 }
 
 static inline void crypto_free_cipher(struct crypto_cipher *tfm)
@@ -776,7 +781,7 @@ static inline struct crypto_comp *crypto_alloc_comp(const char *alg_name,
 
 static inline struct crypto_tfm *crypto_comp_tfm(struct crypto_comp *tfm)
 {
-	return tfm;
+	return &tfm->base;
 }
 
 static inline void crypto_free_comp(struct crypto_comp *tfm)
@@ -807,14 +812,16 @@ static inline int crypto_comp_compress(struct crypto_comp *tfm,
                                        const u8 *src, unsigned int slen,
                                        u8 *dst, unsigned int *dlen)
 {
-	return crypto_comp_crt(tfm)->cot_compress(tfm, src, slen, dst, dlen);
+	return crypto_comp_crt(tfm)->cot_compress(crypto_comp_tfm(tfm),
+						  src, slen, dst, dlen);
 }
 
 static inline int crypto_comp_decompress(struct crypto_comp *tfm,
                                          const u8 *src, unsigned int slen,
                                          u8 *dst, unsigned int *dlen)
 {
-	return crypto_comp_crt(tfm)->cot_decompress(tfm, src, slen, dst, dlen);
+	return crypto_comp_crt(tfm)->cot_decompress(crypto_comp_tfm(tfm),
+						    src, slen, dst, dlen);
 }
 
 #endif	/* _LINUX_CRYPTO_H */
-- 
cgit v1.2.3


From 390fbd1bfaa7b561af8e4f385067c55bdf4100ba Mon Sep 17 00:00:00 2001
From: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Date: Sun, 22 Oct 2006 15:02:48 +1000
Subject: [IPSEC]: added the definition of Camellia cipher

This patch adds the definitions used by pfkeyv2 interface for Camellia
cipher algorithm.

Signed-off-by: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/pfkeyv2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h
index 265bafab6494..52ed4a56f672 100644
--- a/include/linux/pfkeyv2.h
+++ b/include/linux/pfkeyv2.h
@@ -297,6 +297,7 @@ struct sadb_x_sec_ctx {
 #define SADB_X_EALG_BLOWFISHCBC		7
 #define SADB_EALG_NULL			11
 #define SADB_X_EALG_AESCBC		12
+#define SADB_X_EALG_CAMELLIACBC		22
 #define SADB_EALG_MAX                   253 /* last EALG */
 /* private allocations should use 249-255 (RFC2407) */
 #define SADB_X_EALG_SERPENTCBC  252     /* draft-ietf-ipsec-ciph-aes-cbc-00 */
-- 
cgit v1.2.3


From ba7808eac17360dda459f82222859b0e3879854b Mon Sep 17 00:00:00 2001
From: Frederik Deweerdt <frederik.deweerdt@gmail.com>
Date: Sun, 4 Feb 2007 20:15:27 -0800
Subject: [TCP]: remove tcp header from tcp_v4_check (take #2)

The tcphdr struct passed to tcp_v4_check is not used, the following
patch removes it from the parameter list.

This adds the netfilter modifications missing in the patch I sent
for rc3-mm1.

Signed-off-by: Frederik Deweerdt <frederik.deweerdt@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h                  |  5 ++---
 net/ipv4/netfilter/ip_nat_helper.c |  2 +-
 net/ipv4/netfilter/ipt_REJECT.c    |  2 +-
 net/ipv4/netfilter/nf_nat_helper.c |  2 +-
 net/ipv4/tcp_ipv4.c                | 12 ++++++------
 5 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index cd8fa0c858ae..5c472f255b77 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -802,9 +802,8 @@ static inline void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
 /*
  * Calculate(/check) TCP checksum
  */
-static inline __sum16 tcp_v4_check(struct tcphdr *th, int len,
-			       __be32 saddr, __be32 daddr,
-			       __wsum base)
+static inline __sum16 tcp_v4_check(int len, __be32 saddr,
+				   __be32 daddr, __wsum base)
 {
 	return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
 }
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index ee80feb4b2a9..2e5c4bc52a60 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -183,7 +183,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
 	datalen = (*pskb)->len - iph->ihl*4;
 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
 		tcph->check = 0;
-		tcph->check = tcp_v4_check(tcph, datalen,
+		tcph->check = tcp_v4_check(datalen,
 					   iph->saddr, iph->daddr,
 					   csum_partial((char *)tcph,
 					   		datalen, 0));
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f0319e5ee437..c9cad23844d7 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -116,7 +116,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 
 	/* Adjust TCP checksum */
 	tcph->check = 0;
-	tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
+	tcph->check = tcp_v4_check(sizeof(struct tcphdr),
 				   nskb->nh.iph->saddr,
 				   nskb->nh.iph->daddr,
 				   csum_partial((char *)tcph,
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 98fbfc84d183..dc6738bdfab7 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -176,7 +176,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 	datalen = (*pskb)->len - iph->ihl*4;
 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
 		tcph->check = 0;
-		tcph->check = tcp_v4_check(tcph, datalen,
+		tcph->check = tcp_v4_check(datalen,
 					   iph->saddr, iph->daddr,
 					   csum_partial((char *)tcph,
 					   		datalen, 0));
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 12de90a5047c..f061ec50d8bc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -502,11 +502,11 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 	struct tcphdr *th = skb->h.th;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		th->check = ~tcp_v4_check(th, len,
-					  inet->saddr, inet->daddr, 0);
+		th->check = ~tcp_v4_check(len, inet->saddr,
+					  inet->daddr, 0);
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
-		th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
+		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
 					 csum_partial((char *)th,
 						      th->doff << 2,
 						      skb->csum));
@@ -525,7 +525,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 	th = skb->h.th;
 
 	th->check = 0;
-	th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
+	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
 	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
@@ -747,7 +747,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 	if (skb) {
 		struct tcphdr *th = skb->h.th;
 
-		th->check = tcp_v4_check(th, skb->len,
+		th->check = tcp_v4_check(skb->len,
 					 ireq->loc_addr,
 					 ireq->rmt_addr,
 					 csum_partial((char *)th, skb->len,
@@ -1514,7 +1514,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
 {
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
+		if (!tcp_v4_check(skb->len, skb->nh.iph->saddr,
 				  skb->nh.iph->daddr, skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			return 0;
-- 
cgit v1.2.3


From 8eb9086f21c73b38b5ca27558db4c91d62d0e70b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 8 Feb 2007 02:09:21 -0800
Subject: [IPV4/IPV6]: Always wait for IPSEC SA resolution in socket contexts.

Do this even for non-blocking sockets.  This avoids the silly -EAGAIN
that applications can see now, even for non-blocking sockets in some
cases (f.e. connect()).

With help from Venkat Tekkirala.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 5 +++--
 net/dccp/ipv4.c     | 2 +-
 net/dccp/ipv6.c     | 2 +-
 net/ipv4/af_inet.c  | 2 +-
 net/ipv4/datagram.c | 2 +-
 net/ipv4/raw.c      | 2 +-
 net/ipv4/tcp_ipv4.c | 2 +-
 net/ipv4/udp.c      | 2 +-
 net/ipv6/datagram.c | 2 +-
 net/ipv6/raw.c      | 2 +-
 net/ipv6/tcp_ipv6.c | 2 +-
 net/ipv6/udp.c      | 2 +-
 12 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index 486e37aff06c..1440bdb5a27d 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -146,7 +146,8 @@ static inline char rt_tos2priority(u8 tos)
 
 static inline int ip_route_connect(struct rtable **rp, __be32 dst,
 				   __be32 src, u32 tos, int oif, u8 protocol,
-				   __be16 sport, __be16 dport, struct sock *sk)
+				   __be16 sport, __be16 dport, struct sock *sk,
+				   int flags)
 {
 	struct flowi fl = { .oif = oif,
 			    .nl_u = { .ip4_u = { .daddr = dst,
@@ -168,7 +169,7 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst,
 		*rp = NULL;
 	}
 	security_sk_classify_flow(sk, &fl);
-	return ip_route_output_flow(rp, &fl, sk, 0);
+	return ip_route_output_flow(rp, &fl, sk, flags);
 }
 
 static inline int ip_route_newports(struct rtable **rp, u8 protocol,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 90c74b4adb73..fa2c982d4309 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -72,7 +72,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 			       IPPROTO_DCCP,
-			       inet->sport, usin->sin_port, sk);
+			       inet->sport, usin->sin_port, sk, 1);
 	if (tmp < 0)
 		return tmp;
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6b91a9dd0411..79140b3e592e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1041,7 +1041,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	err = xfrm_lookup(&dst, &fl, sk, 0);
+	err = xfrm_lookup(&dst, &fl, sk, 1);
 	if (err < 0)
 		goto failure;
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 864009643675..5750a2b2a0d6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1007,7 +1007,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 			       RT_CONN_FLAGS(sk),
 			       sk->sk_bound_dev_if,
 			       sk->sk_protocol,
-			       inet->sport, inet->dport, sk);
+			       inet->sport, inet->dport, sk, 0);
 	if (err)
 		return err;
 
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 7b068a891953..0072d79f0c2a 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -49,7 +49,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
 			       RT_CONN_FLAGS(sk), oif,
 			       sk->sk_protocol,
-			       inet->sport, usin->sin_port, sk);
+			       inet->sport, usin->sin_port, sk, 1);
 	if (err)
 		return err;
 	if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a6c63bbd9ddb..fed6a1e7af9e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -489,7 +489,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		}
 
 		security_sk_classify_flow(sk, &fl);
-		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
+		err = ip_route_output_flow(&rt, &fl, sk, 1);
 	}
 	if (err)
 		goto done;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f061ec50d8bc..383e4b52dbde 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -191,7 +191,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 			       IPPROTO_TCP,
-			       inet->sport, usin->sin_port, sk);
+			       inet->sport, usin->sin_port, sk, 1);
 	if (tmp < 0)
 		return tmp;
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cfff930f2baf..8b54c68a0d12 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -629,7 +629,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 					       { .sport = inet->sport,
 						 .dport = dport } } };
 		security_sk_classify_flow(sk, &fl);
-		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
+		err = ip_route_output_flow(&rt, &fl, sk, 1);
 		if (err)
 			goto out;
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 5c94fea90e97..ecde30140f4a 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -178,7 +178,7 @@ ipv4_connected:
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+	if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0)
 		goto out;
 
 	/* source address lookup done in ip6_dst_lookup */
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4ae1b19ada5d..f2e883c9368a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -815,7 +815,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+	if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0)
 		goto out;
 
 	if (hlimit < 0) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c25e930c2c69..dcb7b00a737d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -265,7 +265,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+	if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0)
 		goto failure;
 
 	if (saddr == NULL) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f52a5c3cc0a3..15e5195549cb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -736,7 +736,7 @@ do_udp_sendmsg:
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+	if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0)
 		goto out;
 
 	if (hlimit < 0) {
-- 
cgit v1.2.3


From 8dc4194474159660d7f37c495e3fc3f10d0db8cc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 4 Feb 2007 23:31:32 -0800
Subject: [PACKET]: Add optional checksum computation for recvmsg

This patch is needed to make ISC's DHCP server (and probably other
DHCP servers/clients using AF_PACKET) to be able to serve another
client on the same Xen host.

The problem is that packets between different domains on the same
Xen host only have partial checksums.  Unfortunately this piece of
information is not passed along in AF_PACKET unless you're using
the mmap interface.  Since dhcpd doesn't support packet-mmap, UDP
packets from the same host come out with apparently bogus checksums.

This patch adds a mechanism for AF_PACKET recvmsg(2) to return the
status along with the packet.  It does so by adding a new cmsg that
contains this information along with some other relevant data such
as the original packet length.

I didn't include the time stamp information since there is already
a cmsg for that.

This patch also changes the mmap code to set the CSUMNOTREADY flag
on all packets instead of just outoing packets on cooked sockets.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h | 10 +++++++++
 net/packet/af_packet.c    | 57 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 58 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 99393ef3af39..f3de05c30678 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -41,6 +41,7 @@ struct sockaddr_ll
 #define PACKET_RX_RING			5
 #define PACKET_STATISTICS		6
 #define PACKET_COPY_THRESH		7
+#define PACKET_AUXDATA			8
 
 struct tpacket_stats
 {
@@ -48,6 +49,15 @@ struct tpacket_stats
 	unsigned int	tp_drops;
 };
 
+struct tpacket_auxdata
+{
+	__u32		tp_status;
+	__u32		tp_len;
+	__u32		tp_snaplen;
+	__u16		tp_mac;
+	__u16		tp_net;
+};
+
 struct tpacket_hdr
 {
 	unsigned long	tp_status;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6dc01bdeb76b..8973ea78831e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -200,7 +200,8 @@ struct packet_sock {
 #endif
 	struct packet_type	prot_hook;
 	spinlock_t		bind_lock;
-	char			running;	/* prot_hook is attached*/
+	unsigned int		running:1,	/* prot_hook is attached*/
+				auxdata:1;
 	int			ifindex;	/* bound device		*/
 	__be16			num;
 #ifdef CONFIG_PACKET_MULTICAST
@@ -214,6 +215,8 @@ struct packet_sock {
 #endif
 };
 
+#define PACKET_SKB_CB(__skb)	((struct tpacket_auxdata *)((__skb)->cb))
+
 #ifdef CONFIG_PACKET_MMAP
 
 static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
@@ -462,6 +465,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	u8 * skb_head = skb->data;
 	int skb_len = skb->len;
 	unsigned int snaplen, res;
+	struct tpacket_auxdata *aux;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -523,6 +527,15 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	if (dev->hard_header_parse)
 		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
 
+	aux = PACKET_SKB_CB(skb);
+	aux->tp_status = TP_STATUS_USER;
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		aux->tp_status |= TP_STATUS_CSUMNOTREADY;
+	aux->tp_len = skb->len;
+	aux->tp_snaplen = snaplen;
+	aux->tp_mac = 0;
+	aux->tp_net = skb->nh.raw - skb->data;
+
 	if (pskb_trim(skb, snaplen))
 		goto drop_n_acct;
 
@@ -582,11 +595,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
 			skb_pull(skb, skb->nh.raw - skb->data);
-			if (skb->ip_summed == CHECKSUM_PARTIAL)
-				status |= TP_STATUS_CSUMNOTREADY;
 		}
 	}
 
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		status |= TP_STATUS_CSUMNOTREADY;
+
 	snaplen = skb->len;
 
 	res = run_filter(skb, sk, snaplen);
@@ -1119,6 +1133,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (msg->msg_name)
 		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
 
+	if (pkt_sk(sk)->auxdata) {
+		struct tpacket_auxdata *aux = PACKET_SKB_CB(skb);
+		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(*aux), aux);
+	}
+
 	/*
 	 *	Free or return the buffer as appropriate. Again this
 	 *	hides all the races and re-entrancy issues from us.
@@ -1317,6 +1336,7 @@ static int
 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
 {
 	struct sock *sk = sock->sk;
+	struct packet_sock *po = pkt_sk(sk);
 	int ret;
 
 	if (level != SOL_PACKET)
@@ -1369,6 +1389,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		return 0;
 	}
 #endif
+	case PACKET_AUXDATA:
+	{
+		int val;
+
+		if (optlen < sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->auxdata = !!val;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1378,8 +1410,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 			     char __user *optval, int __user *optlen)
 {
 	int len;
+	int val;
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
+	void *data;
+	struct tpacket_stats st;
 
 	if (level != SOL_PACKET)
 		return -ENOPROTOOPT;
@@ -1392,9 +1427,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		
 	switch(optname)	{
 	case PACKET_STATISTICS:
-	{
-		struct tpacket_stats st;
-
 		if (len > sizeof(struct tpacket_stats))
 			len = sizeof(struct tpacket_stats);
 		spin_lock_bh(&sk->sk_receive_queue.lock);
@@ -1403,16 +1435,23 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
 		st.tp_packets += st.tp_drops;
 
-		if (copy_to_user(optval, &st, len))
-			return -EFAULT;
+		data = &st;
+		break;
+	case PACKET_AUXDATA:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = po->auxdata;
+
+		data = &val;
 		break;
-	}
 	default:
 		return -ENOPROTOOPT;
 	}
 
 	if (put_user(len, optlen))
 		return -EFAULT;
+	if (copy_to_user(optval, data, len))
+		return -EFAULT;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 6f74651ae626ec672028587bc700538076dfbefb Mon Sep 17 00:00:00 2001
From: Baruch Even <baruch@ev-en.org>
Date: Sun, 4 Feb 2007 23:36:42 -0800
Subject: [TCP]: Seperate DSACK from SACK fast path

Move DSACK code outside the SACK fast-path checking code. If the DSACK
determined that the information was too old we stayed with a partial cache
copied. Most likely this matters very little since the next packet will not be
DSACK and we will find it in the cache. but it's still not good form and there
is little reason to couple the two checks.

Since the SACK receive cache doesn't need the data to be in host order we also
remove the ntohl in the checking loop.

Signed-off-by: Baruch Even <baruch@ev-en.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h  |  2 +-
 net/ipv4/tcp_input.c | 66 ++++++++++++++++++++++++----------------------------
 2 files changed, 32 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3cc70d1a3504..29d3089038ab 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -316,7 +316,7 @@ struct tcp_sock {
 	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
 	struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
 
-	struct tcp_sack_block recv_sack_cache[4];
+	struct tcp_sack_block_wire recv_sack_cache[4];
 
 	/* from STCP, retrans queue hinting */
 	struct sk_buff* lost_skb_hint;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7670ef968dce..870f53afd363 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -951,16 +951,43 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 		tp->fackets_out = 0;
 	prior_fackets = tp->fackets_out;
 
+	/* Check for D-SACK. */
+	if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) {
+		dup_sack = 1;
+		tp->rx_opt.sack_ok |= 4;
+		NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
+	} else if (num_sacks > 1 &&
+			!after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) &&
+			!before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) {
+		dup_sack = 1;
+		tp->rx_opt.sack_ok |= 4;
+		NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
+	}
+
+	/* D-SACK for already forgotten data...
+	 * Do dumb counting. */
+	if (dup_sack &&
+			!after(ntohl(sp[0].end_seq), prior_snd_una) &&
+			after(ntohl(sp[0].end_seq), tp->undo_marker))
+		tp->undo_retrans--;
+
+	/* Eliminate too old ACKs, but take into
+	 * account more or less fresh ones, they can
+	 * contain valid SACK info.
+	 */
+	if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
+		return 0;
+
 	/* SACK fastpath:
 	 * if the only SACK change is the increase of the end_seq of
 	 * the first block then only apply that SACK block
 	 * and use retrans queue hinting otherwise slowpath */
 	flag = 1;
-	for (i = 0; i< num_sacks; i++) {
-		__u32 start_seq = ntohl(sp[i].start_seq);
-		__u32 end_seq =	 ntohl(sp[i].end_seq);
+	for (i = 0; i < num_sacks; i++) {
+		__be32 start_seq = sp[i].start_seq;
+		__be32 end_seq = sp[i].end_seq;
 
-		if (i == 0){
+		if (i == 0) {
 			if (tp->recv_sack_cache[i].start_seq != start_seq)
 				flag = 0;
 		} else {
@@ -970,37 +997,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 		}
 		tp->recv_sack_cache[i].start_seq = start_seq;
 		tp->recv_sack_cache[i].end_seq = end_seq;
-
-		/* Check for D-SACK. */
-		if (i == 0) {
-			u32 ack = TCP_SKB_CB(ack_skb)->ack_seq;
-
-			if (before(start_seq, ack)) {
-				dup_sack = 1;
-				tp->rx_opt.sack_ok |= 4;
-				NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
-			} else if (num_sacks > 1 &&
-				   !after(end_seq, ntohl(sp[1].end_seq)) &&
-				   !before(start_seq, ntohl(sp[1].start_seq))) {
-				dup_sack = 1;
-				tp->rx_opt.sack_ok |= 4;
-				NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
-			}
-
-			/* D-SACK for already forgotten data...
-			 * Do dumb counting. */
-			if (dup_sack &&
-			    !after(end_seq, prior_snd_una) &&
-			    after(end_seq, tp->undo_marker))
-				tp->undo_retrans--;
-
-			/* Eliminate too old ACKs, but take into
-			 * account more or less fresh ones, they can
-			 * contain valid SACK info.
-			 */
-			if (before(ack, prior_snd_una - tp->max_window))
-				return 0;
-		}
 	}
 
 	first_sack_index = 0;
-- 
cgit v1.2.3


From 97353cb4c05c2edf260e9d1b19a29d3cc0060a09 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 5 Feb 2007 18:07:27 -0800
Subject: [NET] net/wanrouter/wanmain.c: cleanups

This patch contains the following cleanups:
- make the following needlessly global functions static:
  - lock_adapter_irq()
  - unlock_adapter_irq()
- #if 0 the following unused global functions:
  - wanrouter_encapsulate()
  - wanrouter_type_trans()

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/wanrouter.h |  8 --------
 net/wanrouter/wanmain.c   | 17 ++++++++---------
 2 files changed, 8 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h
index 2cd05013edfc..3add87465b1f 100644
--- a/include/linux/wanrouter.h
+++ b/include/linux/wanrouter.h
@@ -516,9 +516,6 @@ struct wan_device {
 /* Public functions available for device drivers */
 extern int register_wan_device(struct wan_device *wandev);
 extern int unregister_wan_device(char *name);
-__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev);
-int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
-			  unsigned short type);
 
 /* Proc interface functions. These must not be called by the drivers! */
 extern int wanrouter_proc_init(void);
@@ -527,11 +524,6 @@ extern int wanrouter_proc_add(struct wan_device *wandev);
 extern int wanrouter_proc_delete(struct wan_device *wandev);
 extern int wanrouter_ioctl( struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
 
-extern void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags);
-extern void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags);
-
-
-
 /* Public Data */
 /* list of registered devices */
 extern struct wan_device *wanrouter_router_devlist;
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 769cdd62c1bb..4d90a179aeda 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -86,8 +86,8 @@ static int wanrouter_device_del_if(struct wan_device *wandev,
 
 static struct wan_device *wanrouter_find_device(char *name);
 static int wanrouter_delete_interface(struct wan_device *wandev, char *name);
-void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags);
-void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags);
+static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags);
+static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags);
 
 
@@ -104,8 +104,8 @@ struct wan_device* wanrouter_router_devlist; /* list of registered devices */
  *	Organize Unique Identifiers for encapsulation/decapsulation
  */
 
-static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 };
 #if 0
+static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 };
 static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 };
 #endif
 
@@ -246,6 +246,8 @@ int unregister_wan_device(char *name)
 	return 0;
 }
 
+#if 0
+
 /*
  *	Encapsulate packet.
  *
@@ -341,6 +343,7 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
 	return ethertype;
 }
 
+#endif  /*  0  */
 
 /*
  *	WAN device IOCTL.
@@ -799,23 +802,19 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name)
 	return 0;
 }
 
-void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
+static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
 {
        	spin_lock_irqsave(lock, *smp_flags);
 }
 
 
-void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
+static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
 {
 	spin_unlock_irqrestore(lock, *smp_flags);
 }
 
 EXPORT_SYMBOL(register_wan_device);
 EXPORT_SYMBOL(unregister_wan_device);
-EXPORT_SYMBOL(wanrouter_encapsulate);
-EXPORT_SYMBOL(wanrouter_type_trans);
-EXPORT_SYMBOL(lock_adapter_irq);
-EXPORT_SYMBOL(unlock_adapter_irq);
 
 MODULE_LICENSE("GPL");
 
-- 
cgit v1.2.3


From cdca72652adf597f7fef821a27595fd0dd5eea19 Mon Sep 17 00:00:00 2001
From: Miika Komu <miika@iki.fi>
Date: Tue, 6 Feb 2007 14:24:56 -0800
Subject: [IPSEC]: exporting xfrm_state_afinfo

This patch exports xfrm_state_afinfo.

Signed-off-by: Miika Komu <miika@iki.fi>
Signed-off-by: Diego Beltrami <Diego.Beltrami@hiit.fi>
Signed-off-by: Kazunori Miyazawa <miyazawa@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  3 +++
 net/ipv4/xfrm4_state.c |  1 +
 net/ipv6/xfrm6_state.c |  1 +
 net/xfrm/xfrm_state.c  | 10 +++++-----
 4 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e4765413cf80..bf91d632901d 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -252,10 +252,13 @@ struct xfrm_state_afinfo {
 						xfrm_address_t *daddr, xfrm_address_t *saddr);
 	int			(*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
 	int			(*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
+	int			(*output)(struct sk_buff *skb);
 };
 
 extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo);
 extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo);
+extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
+extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
 
 extern void xfrm_state_delete_tunnel(struct xfrm_state *x);
 
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 3cc3df0c6ece..93e2c061cdda 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -51,6 +51,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
+	.output			= xfrm4_output,
 };
 
 void __init xfrm4_state_init(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 9ddaa9d41539..60ad5f074e0a 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -171,6 +171,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
+	.output			= xfrm6_output,
 };
 
 void __init xfrm6_state_init(void)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index fdb08d9f34aa..24f7bfd07af2 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -183,9 +183,6 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
 int __xfrm_state_delete(struct xfrm_state *x);
 
-static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
-static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
-
 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
 
@@ -1458,7 +1455,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
 }
 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
 
-static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
+struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
 {
 	struct xfrm_state_afinfo *afinfo;
 	if (unlikely(family >= NPROTO))
@@ -1470,11 +1467,14 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
 	return afinfo;
 }
 
-static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
+void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
 {
 	read_unlock(&xfrm_state_afinfo_lock);
 }
 
+EXPORT_SYMBOL(xfrm_state_get_afinfo);
+EXPORT_SYMBOL(xfrm_state_put_afinfo);
+
 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
 void xfrm_state_delete_tunnel(struct xfrm_state *x)
 {
-- 
cgit v1.2.3


From 22f8cde5bc336fd19603bb8c4572b33d14f14f87 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Wed, 7 Feb 2007 00:09:58 -0800
Subject: [NET]: unregister_netdevice as void

There was no real useful information from the unregister_netdevice() return
code, the only error occurred in a situation that was a driver bug. So
change it to a void function.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            | 13 +++++--------
 net/ipv4/ip_gre.c         |  3 ++-
 net/ipv4/ipip.c           |  3 ++-
 net/ipv6/ip6_tunnel.c     |  3 ++-
 net/ipv6/sit.c            |  3 ++-
 6 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2e37f5012788..1a528548cd1d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -589,7 +589,7 @@ extern int		dev_open(struct net_device *dev);
 extern int		dev_close(struct net_device *dev);
 extern int		dev_queue_xmit(struct sk_buff *skb);
 extern int		register_netdevice(struct net_device *dev);
-extern int		unregister_netdevice(struct net_device *dev);
+extern void		unregister_netdevice(struct net_device *dev);
 extern void		free_netdev(struct net_device *dev);
 extern void		synchronize_net(void);
 extern int 		register_netdevice_notifier(struct notifier_block *nb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 455d589683e8..1e94a1b9a0f4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3247,7 +3247,7 @@ void synchronize_net(void)
  *	unregister_netdev() instead of this.
  */
 
-int unregister_netdevice(struct net_device *dev)
+void unregister_netdevice(struct net_device *dev)
 {
 	struct net_device *d, **dp;
 
@@ -3258,7 +3258,9 @@ int unregister_netdevice(struct net_device *dev)
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
 				  "was registered\n", dev->name, dev);
-		return -ENODEV;
+
+		WARN_ON(1);
+		return;
 	}
 
 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
@@ -3280,11 +3282,7 @@ int unregister_netdevice(struct net_device *dev)
 			break;
 		}
 	}
-	if (!d) {
-		printk(KERN_ERR "unregister net_device: '%s' not found\n",
-		       dev->name);
-		return -ENODEV;
-	}
+	BUG_ON(!d);
 
 	dev->reg_state = NETREG_UNREGISTERING;
 
@@ -3316,7 +3314,6 @@ int unregister_netdevice(struct net_device *dev)
 	synchronize_net();
 
 	dev_put(dev);
-	return 0;
 }
 
 /**
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 476cb6084c75..51c83500790f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1008,7 +1008,8 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 				goto done;
 			dev = t->dev;
 		}
-		err = unregister_netdevice(dev);
+		unregister_netdevice(dev);
+		err = 0;
 		break;
 
 	default:
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 9d719d664e5b..da8bbd20c7ed 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -754,7 +754,8 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 				goto done;
 			dev = t->dev;
 		}
-		err = unregister_netdevice(dev);
+		unregister_netdevice(dev);
+		err = 0;
 		break;
 
 	default:
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 8d918348f5bb..2b9e3bb7da65 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -999,7 +999,8 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 				break;
 			dev = t->dev;
 		}
-		err = unregister_netdevice(dev);
+		err = 0;
+		unregister_netdevice(dev);
 		break;
 	default:
 		err = -EINVAL;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 77b7b0911438..47cfeadac6dd 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -686,7 +686,8 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 				goto done;
 			dev = t->dev;
 		}
-		err = unregister_netdevice(dev);
+		unregister_netdevice(dev);
+		err = 0;
 		break;
 
 	default:
-- 
cgit v1.2.3


From 6fecd1985116fb08bdee3b9db6719e159fe5e43d Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Wed, 7 Feb 2007 15:05:12 -0800
Subject: [NETFILTER]: Add SANE connection tracking helper

This is nf_conntrack_sane, a netfilter connection tracking helper module
for the SANE protocol used by the 'saned' daemon to make scanners available
via network. The SANE protocol uses separate control & data connections,
similar to passive FTP. The helper module is needed to recognize the data
connection as RELATED to the control one.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nf_conntrack_sane.h |  21 +++
 include/net/netfilter/nf_conntrack.h        |   2 +
 net/netfilter/Kconfig                       |  13 ++
 net/netfilter/Makefile                      |   1 +
 net/netfilter/nf_conntrack_sane.c           | 242 ++++++++++++++++++++++++++++
 5 files changed, 279 insertions(+)
 create mode 100644 include/linux/netfilter/nf_conntrack_sane.h
 create mode 100644 net/netfilter/nf_conntrack_sane.c

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_sane.h b/include/linux/netfilter/nf_conntrack_sane.h
new file mode 100644
index 000000000000..4767d6e23e97
--- /dev/null
+++ b/include/linux/netfilter/nf_conntrack_sane.h
@@ -0,0 +1,21 @@
+#ifndef _NF_CONNTRACK_SANE_H
+#define _NF_CONNTRACK_SANE_H
+/* SANE tracking. */
+
+#ifdef __KERNEL__
+
+#define SANE_PORT	6566
+
+enum sane_state {
+	SANE_STATE_NORMAL,
+	SANE_STATE_START_REQUESTED,
+};
+
+/* This structure exists only once per master */
+struct nf_ct_sane_master {
+	enum sane_state state;
+};
+
+#endif /* __KERNEL__ */
+
+#endif /* _NF_CONNTRACK_SANE_H */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index bd01b4633ee2..68ec27490c20 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -45,6 +45,7 @@ union nf_conntrack_expect_proto {
 #include <linux/netfilter/nf_conntrack_ftp.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 #include <linux/netfilter/nf_conntrack_h323.h>
+#include <linux/netfilter/nf_conntrack_sane.h>
 
 /* per conntrack: application helper private data */
 union nf_conntrack_help {
@@ -52,6 +53,7 @@ union nf_conntrack_help {
 	struct nf_ct_ftp_master ct_ftp_info;
 	struct nf_ct_pptp_master ct_pptp_info;
 	struct nf_ct_h323_master ct_h323_info;
+	struct nf_ct_sane_master ct_sane_info;
 };
 
 #include <linux/types.h>
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 80107d4909c5..614c92c17835 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -235,6 +235,19 @@ config NF_CONNTRACK_PPTP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CONNTRACK_SANE
+	tristate "SANE protocol support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && NF_CONNTRACK
+	help
+	  SANE is a protocol for remote access to scanners as implemented
+	  by the 'saned' daemon. Like FTP, it uses separate control and
+	  data connections.
+
+	  With this module you can support SANE on a connection tracking
+	  firewall.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NF_CONNTRACK_SIP
 	tristate "SIP protocol support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 5dc5574f7e99..5054b0ff8096 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
 obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
 obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
 obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
+obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
 obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
 
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
new file mode 100644
index 000000000000..eb2d1dc46d45
--- /dev/null
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -0,0 +1,242 @@
+/* SANE connection tracking helper
+ * (SANE = Scanner Access Now Easy)
+ * For documentation about the SANE network protocol see
+ * http://www.sane-project.org/html/doc015.html
+ */
+
+/* Copyright (C) 2007 Red Hat, Inc.
+ * Author: Michal Schmidt <mschmidt@redhat.com>
+ * Based on the FTP conntrack helper (net/netfilter/nf_conntrack_ftp.c):
+ *  (C) 1999-2001 Paul `Rusty' Russell
+ *  (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *  (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *  (C) 2003 Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_sane.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>");
+MODULE_DESCRIPTION("SANE connection tracking helper");
+
+static char *sane_buffer;
+
+static DEFINE_SPINLOCK(nf_sane_lock);
+
+#define MAX_PORTS 8
+static u_int16_t ports[MAX_PORTS];
+static unsigned int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+struct sane_request {
+	__be32 RPC_code;
+#define SANE_NET_START      7   /* RPC code */
+
+	__be32 handle;
+};
+
+struct sane_reply_net_start {
+	__be32 status;
+#define SANE_STATUS_SUCCESS 0
+
+	__be16 zero;
+	__be16 port;
+	/* other fields aren't interesting for conntrack */
+};
+
+static int help(struct sk_buff **pskb,
+		unsigned int protoff,
+		struct nf_conn *ct,
+		enum ip_conntrack_info ctinfo)
+{
+	unsigned int dataoff, datalen;
+	struct tcphdr _tcph, *th;
+	char *sb_ptr;
+	int ret = NF_ACCEPT;
+	int dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sane_master *ct_sane_info;
+	struct nf_conntrack_expect *exp;
+	struct nf_conntrack_tuple *tuple;
+	struct sane_request *req;
+	struct sane_reply_net_start *reply;
+	int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+
+	ct_sane_info = &nfct_help(ct)->help.ct_sane_info;
+	/* Until there's been traffic both ways, don't look in packets. */
+	if (ctinfo != IP_CT_ESTABLISHED &&
+	    ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY)
+		return NF_ACCEPT;
+
+	/* Not a full tcp header? */
+	th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return NF_ACCEPT;
+
+	/* No data? */
+	dataoff = protoff + th->doff * 4;
+	if (dataoff >= (*pskb)->len)
+		return NF_ACCEPT;
+
+	datalen = (*pskb)->len - dataoff;
+
+	spin_lock_bh(&nf_sane_lock);
+	sb_ptr = skb_header_pointer(*pskb, dataoff, datalen, sane_buffer);
+	BUG_ON(sb_ptr == NULL);
+
+	if (dir == IP_CT_DIR_ORIGINAL) {
+		if (datalen != sizeof(struct sane_request))
+			goto out;
+
+		req = (struct sane_request *)sb_ptr;
+		if (req->RPC_code != htonl(SANE_NET_START)) {
+			/* Not an interesting command */
+			ct_sane_info->state = SANE_STATE_NORMAL;
+			goto out;
+		}
+
+		/* We're interested in the next reply */
+		ct_sane_info->state = SANE_STATE_START_REQUESTED;
+		goto out;
+	}
+
+	/* Is it a reply to an uninteresting command? */
+	if (ct_sane_info->state != SANE_STATE_START_REQUESTED)
+		goto out;
+
+	/* It's a reply to SANE_NET_START. */
+	ct_sane_info->state = SANE_STATE_NORMAL;
+
+	if (datalen < sizeof(struct sane_reply_net_start)) {
+		DEBUGP("nf_ct_sane: NET_START reply too short\n");
+		goto out;
+	}
+
+	reply = (struct sane_reply_net_start *)sb_ptr;
+	if (reply->status != htonl(SANE_STATUS_SUCCESS)) {
+		/* saned refused the command */
+		DEBUGP("nf_ct_sane: unsuccessful SANE_STATUS = %u\n",
+			ntohl(reply->status));
+		goto out;
+	}
+
+	/* Invalid saned reply? Ignore it. */
+	if (reply->zero != 0)
+		goto out;
+
+	exp = nf_conntrack_expect_alloc(ct);
+	if (exp == NULL) {
+		ret = NF_DROP;
+		goto out;
+	}
+
+	tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	nf_conntrack_expect_init(exp, family,
+				 &tuple->src.u3, &tuple->dst.u3,
+				 IPPROTO_TCP,
+				 NULL, &reply->port);
+
+	DEBUGP("nf_ct_sane: expect: ");
+	NF_CT_DUMP_TUPLE(&exp->tuple);
+	NF_CT_DUMP_TUPLE(&exp->mask);
+
+	/* Can't expect this?  Best to drop packet now. */
+	if (nf_conntrack_expect_related(exp) != 0)
+		ret = NF_DROP;
+
+	nf_conntrack_expect_put(exp);
+
+out:
+	spin_unlock_bh(&nf_sane_lock);
+	return ret;
+}
+
+static struct nf_conntrack_helper sane[MAX_PORTS][2];
+static char sane_names[MAX_PORTS][2][sizeof("sane-65535")];
+
+/* don't make this __exit, since it's called from __init ! */
+static void nf_conntrack_sane_fini(void)
+{
+	int i, j;
+
+	for (i = 0; i < ports_c; i++) {
+		for (j = 0; j < 2; j++) {
+			DEBUGP("nf_ct_sane: unregistering helper for pf: %d "
+			       "port: %d\n",
+				sane[i][j].tuple.src.l3num, ports[i]);
+			nf_conntrack_helper_unregister(&sane[i][j]);
+		}
+	}
+
+	kfree(sane_buffer);
+}
+
+static int __init nf_conntrack_sane_init(void)
+{
+	int i, j = -1, ret = 0;
+	char *tmpname;
+
+	sane_buffer = kmalloc(65536, GFP_KERNEL);
+	if (!sane_buffer)
+		return -ENOMEM;
+
+	if (ports_c == 0)
+		ports[ports_c++] = SANE_PORT;
+
+	/* FIXME should be configurable whether IPv4 and IPv6 connections
+		 are tracked or not - YK */
+	for (i = 0; i < ports_c; i++) {
+		sane[i][0].tuple.src.l3num = PF_INET;
+		sane[i][1].tuple.src.l3num = PF_INET6;
+		for (j = 0; j < 2; j++) {
+			sane[i][j].tuple.src.u.tcp.port = htons(ports[i]);
+			sane[i][j].tuple.dst.protonum = IPPROTO_TCP;
+			sane[i][j].mask.src.u.tcp.port = 0xFFFF;
+			sane[i][j].mask.dst.protonum = 0xFF;
+			sane[i][j].max_expected = 1;
+			sane[i][j].timeout = 5 * 60;	/* 5 Minutes */
+			sane[i][j].me = THIS_MODULE;
+			sane[i][j].help = help;
+			tmpname = &sane_names[i][j][0];
+			if (ports[i] == SANE_PORT)
+				sprintf(tmpname, "sane");
+			else
+				sprintf(tmpname, "sane-%d", ports[i]);
+			sane[i][j].name = tmpname;
+
+			DEBUGP("nf_ct_sane: registering helper for pf: %d "
+			       "port: %d\n",
+				sane[i][j].tuple.src.l3num, ports[i]);
+			ret = nf_conntrack_helper_register(&sane[i][j]);
+			if (ret) {
+				printk(KERN_ERR "nf_ct_sane: failed to "
+				       "register helper for pf: %d port: %d\n",
+					sane[i][j].tuple.src.l3num, ports[i]);
+				nf_conntrack_sane_fini();
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
+module_init(nf_conntrack_sane_init);
+module_exit(nf_conntrack_sane_fini);
-- 
cgit v1.2.3


From a09113c2c8ec59a5cc228efa5869aade2b8f13f7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 7 Feb 2007 15:05:33 -0800
Subject: [NETFILTER]: tcp conntrack: do liberal tracking for picked up
 connections

Do liberal tracking (only RSTs need to be in-window) for connections picked
up without seeing a SYN to deal with window scaling. Also change logging
of invalid packets not to log packets accepted by liberal tracking to avoid
spamming the logs.

Based on suggestion from James Ralston <ralston@pobox.com>

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nf_conntrack_tcp.h  |  4 ++-
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 40 +++++++++++------------------
 net/netfilter/nf_conntrack_proto_tcp.c      | 40 +++++++++++------------------
 3 files changed, 33 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index 2f4e98b90cc0..007af4c2770b 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -27,6 +27,9 @@ enum tcp_conntrack {
 /* This sender sent FIN first */
 #define IP_CT_TCP_FLAG_CLOSE_INIT		0x04
 
+/* Be liberal in window checking */
+#define IP_CT_TCP_FLAG_BE_LIBERAL		0x08
+
 #ifdef __KERNEL__
 
 struct ip_ct_tcp_state {
@@ -34,7 +37,6 @@ struct ip_ct_tcp_state {
 	u_int32_t	td_maxend;	/* max of ack + max(win, 1) */
 	u_int32_t	td_maxwin;	/* max(win) */
 	u_int8_t	td_scale;	/* window scale factor */
-	u_int8_t	loose;		/* used when connection picked up from the middle */
 	u_int8_t	flags;		/* per direction options */
 };
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 06e4e8a6dd9f..c34f48fe5478 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -50,12 +50,9 @@ static DEFINE_RWLOCK(tcp_lock);
     If it's non-zero, we mark only out of window RST segments as INVALID. */
 int ip_ct_tcp_be_liberal __read_mostly = 0;
 
-/* When connection is picked up from the middle, how many packets are required
-   to pass in each direction when we assume we are in sync - if any side uses
-   window scaling, we lost the game. 
-   If it is set to zero, we disable picking up already established 
+/* If it is set to zero, we disable picking up already established
    connections. */
-int ip_ct_tcp_loose __read_mostly = 3;
+int ip_ct_tcp_loose __read_mostly = 1;
 
 /* Max number of the retransmitted packets without receiving an (acceptable) 
    ACK from the destination. If this number is reached, a shorter timer 
@@ -694,11 +691,10 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 	    	before(sack, receiver->td_end + 1),
 	    	after(ack, receiver->td_end - MAXACKWINDOW(sender)));
 	
-	if (sender->loose || receiver->loose ||
-	    (before(seq, sender->td_maxend + 1) &&
-	     after(end, sender->td_end - receiver->td_maxwin - 1) &&
-	     before(sack, receiver->td_end + 1) &&
-	     after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
+	if (before(seq, sender->td_maxend + 1) &&
+	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
+	    before(sack, receiver->td_end + 1) &&
+	    after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
 	    	/*
 		 * Take into account window scaling (RFC 1323).
 		 */
@@ -743,15 +739,13 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 				state->retrans = 0;
 			}
 		}
-		/*
-		 * Close the window of disabled window tracking :-)
-		 */
-		if (sender->loose)
-			sender->loose--;
-		
 		res = 1;
 	} else {
-		if (LOG_INVALID(IPPROTO_TCP))
+		res = 0;
+		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
+		    ip_ct_tcp_be_liberal)
+			res = 1;
+		if (!res && LOG_INVALID(IPPROTO_TCP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
 			"ip_ct_tcp: %s ",
 			before(seq, sender->td_maxend + 1) ?
@@ -762,8 +756,6 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 			: "ACK is over the upper bound (ACKed data not seen yet)"
 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
 			: "SEQ is over the upper bound (over the window of the receiver)");
-
-		res = ip_ct_tcp_be_liberal;
   	}
   
 	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
@@ -1105,8 +1097,6 @@ static int tcp_new(struct ip_conntrack *conntrack,
 
 		tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
 		conntrack->proto.tcp.seen[1].flags = 0;
-		conntrack->proto.tcp.seen[0].loose = 
-		conntrack->proto.tcp.seen[1].loose = 0;
 	} else if (ip_ct_tcp_loose == 0) {
 		/* Don't try to pick up connections. */
 		return 0;
@@ -1127,11 +1117,11 @@ static int tcp_new(struct ip_conntrack *conntrack,
 			conntrack->proto.tcp.seen[0].td_maxwin;
 		conntrack->proto.tcp.seen[0].td_scale = 0;
 
-		/* We assume SACK. Should we assume window scaling too? */
+		/* We assume SACK and liberal window checking to handle
+		 * window scaling */
 		conntrack->proto.tcp.seen[0].flags =
-		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
-		conntrack->proto.tcp.seen[0].loose = 
-		conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
+		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
+						     IP_CT_TCP_FLAG_BE_LIBERAL;
 	}
     
 	conntrack->proto.tcp.seen[1].td_end = 0;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 626b0011dd89..6fccdcf43e08 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -60,12 +60,9 @@ static DEFINE_RWLOCK(tcp_lock);
     If it's non-zero, we mark only out of window RST segments as INVALID. */
 int nf_ct_tcp_be_liberal __read_mostly = 0;
 
-/* When connection is picked up from the middle, how many packets are required
-   to pass in each direction when we assume we are in sync - if any side uses
-   window scaling, we lost the game. 
-   If it is set to zero, we disable picking up already established 
+/* If it is set to zero, we disable picking up already established
    connections. */
-int nf_ct_tcp_loose __read_mostly = 3;
+int nf_ct_tcp_loose __read_mostly = 1;
 
 /* Max number of the retransmitted packets without receiving an (acceptable) 
    ACK from the destination. If this number is reached, a shorter timer 
@@ -650,11 +647,10 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 	    	before(sack, receiver->td_end + 1),
 	    	after(ack, receiver->td_end - MAXACKWINDOW(sender)));
 
-	if (sender->loose || receiver->loose ||
-	    (before(seq, sender->td_maxend + 1) &&
-	     after(end, sender->td_end - receiver->td_maxwin - 1) &&
-	     before(sack, receiver->td_end + 1) &&
-	     after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
+	if (before(seq, sender->td_maxend + 1) &&
+	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
+	    before(sack, receiver->td_end + 1) &&
+	    after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
 	    	/*
 		 * Take into account window scaling (RFC 1323).
 		 */
@@ -699,15 +695,13 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 				state->retrans = 0;
 			}
 		}
-		/*
-		 * Close the window of disabled window tracking :-)
-		 */
-		if (sender->loose)
-			sender->loose--;
-
 		res = 1;
 	} else {
-		if (LOG_INVALID(IPPROTO_TCP))
+		res = 0;
+		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
+		    nf_ct_tcp_be_liberal)
+			res = 1;
+		if (!res && LOG_INVALID(IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 			"nf_ct_tcp: %s ",
 			before(seq, sender->td_maxend + 1) ?
@@ -718,8 +712,6 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 			: "ACK is over the upper bound (ACKed data not seen yet)"
 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
 			: "SEQ is over the upper bound (over the window of the receiver)");
-
-		res = nf_ct_tcp_be_liberal;
   	}
   
 	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
@@ -1063,8 +1055,6 @@ static int tcp_new(struct nf_conn *conntrack,
 
 		tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]);
 		conntrack->proto.tcp.seen[1].flags = 0;
-		conntrack->proto.tcp.seen[0].loose = 
-		conntrack->proto.tcp.seen[1].loose = 0;
 	} else if (nf_ct_tcp_loose == 0) {
 		/* Don't try to pick up connections. */
 		return 0;
@@ -1085,11 +1075,11 @@ static int tcp_new(struct nf_conn *conntrack,
 			conntrack->proto.tcp.seen[0].td_maxwin;
 		conntrack->proto.tcp.seen[0].td_scale = 0;
 
-		/* We assume SACK. Should we assume window scaling too? */
+		/* We assume SACK and liberal window checking to handle
+		 * window scaling */
 		conntrack->proto.tcp.seen[0].flags =
-		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
-		conntrack->proto.tcp.seen[0].loose = 
-		conntrack->proto.tcp.seen[1].loose = nf_ct_tcp_loose;
+		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
+						     IP_CT_TCP_FLAG_BE_LIBERAL;
 	}
     
 	conntrack->proto.tcp.seen[1].td_end = 0;
-- 
cgit v1.2.3


From cdd289a2f833b93e65b9a09a02c37f47a58140a8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 7 Feb 2007 15:09:46 -0800
Subject: [NETFILTER]: add IPv6-capable TCPMSS target

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/Kbuild            |   1 +
 include/linux/netfilter/xt_TCPMSS.h       |  10 +
 include/linux/netfilter_ipv4/ipt_TCPMSS.h |   7 +-
 net/ipv4/netfilter/Kconfig                |  26 ---
 net/ipv4/netfilter/Makefile               |   1 -
 net/ipv4/netfilter/ipt_TCPMSS.c           | 207 ---------------------
 net/netfilter/Kconfig                     |  26 +++
 net/netfilter/Makefile                    |   1 +
 net/netfilter/xt_TCPMSS.c                 | 296 ++++++++++++++++++++++++++++++
 9 files changed, 337 insertions(+), 238 deletions(-)
 create mode 100644 include/linux/netfilter/xt_TCPMSS.h
 delete mode 100644 net/ipv4/netfilter/ipt_TCPMSS.c
 create mode 100644 net/netfilter/xt_TCPMSS.c

(limited to 'include')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 6328175a1c3a..43397a414cd6 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -33,6 +33,7 @@ header-y += xt_tcpmss.h
 header-y += xt_tcpudp.h
 header-y += xt_SECMARK.h
 header-y += xt_CONNSECMARK.h
+header-y += xt_TCPMSS.h
 
 unifdef-y += nf_conntrack_common.h
 unifdef-y += nf_conntrack_ftp.h
diff --git a/include/linux/netfilter/xt_TCPMSS.h b/include/linux/netfilter/xt_TCPMSS.h
new file mode 100644
index 000000000000..53a292cd47f3
--- /dev/null
+++ b/include/linux/netfilter/xt_TCPMSS.h
@@ -0,0 +1,10 @@
+#ifndef _XT_TCPMSS_H
+#define _XT_TCPMSS_H
+
+struct xt_tcpmss_info {
+	u_int16_t mss;
+};
+
+#define XT_TCPMSS_CLAMP_PMTU 0xffff
+
+#endif /* _XT_TCPMSS_H */
diff --git a/include/linux/netfilter_ipv4/ipt_TCPMSS.h b/include/linux/netfilter_ipv4/ipt_TCPMSS.h
index aadb39580cd3..7a850f945824 100644
--- a/include/linux/netfilter_ipv4/ipt_TCPMSS.h
+++ b/include/linux/netfilter_ipv4/ipt_TCPMSS.h
@@ -1,10 +1,9 @@
 #ifndef _IPT_TCPMSS_H
 #define _IPT_TCPMSS_H
 
-struct ipt_tcpmss_info {
-	u_int16_t mss;
-};
+#include <linux/netfilter/xt_TCPMSS.h>
 
-#define IPT_TCPMSS_CLAMP_PMTU 0xffff
+#define ipt_tcpmss_info		xt_tcpmss_info
+#define IPT_TCPMSS_CLAMP_PMTU	XT_TCPMSS_CLAMP_PMTU
 
 #endif /*_IPT_TCPMSS_H*/
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 47bd3ad18b71..9b08e7ad71bc 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -361,32 +361,6 @@ config IP_NF_TARGET_ULOG
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_TCPMSS
-	tristate "TCPMSS target support"
-	depends on IP_NF_IPTABLES
-	---help---
-	  This option adds a `TCPMSS' target, which allows you to alter the
-	  MSS value of TCP SYN packets, to control the maximum size for that
-	  connection (usually limiting it to your outgoing interface's MTU
-	  minus 40).
-
-	  This is used to overcome criminally braindead ISPs or servers which
-	  block ICMP Fragmentation Needed packets.  The symptoms of this
-	  problem are that everything works fine from your Linux
-	  firewall/router, but machines behind it can never exchange large
-	  packets:
-	  	1) Web browsers connect, then hang with no data received.
-	  	2) Small mail works fine, but large emails hang.
-	  	3) ssh works fine, but scp hangs after initial handshaking.
-
-	  Workaround: activate this option and add a rule to your firewall
-	  configuration like:
-
-	  iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \
-	  		 -j TCPMSS --clamp-mss-to-pmtu
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 # NAT + specific targets: ip_conntrack
 config IP_NF_NAT
 	tristate "Full NAT"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 16d177b71bf8..6625ec68180c 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -103,7 +103,6 @@ obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
 obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
 obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
 obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
-obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
 obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
 
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
deleted file mode 100644
index 93eb5c3c1884..000000000000
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * This is a module which is used for setting the MSS option in TCP packets.
- *
- * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/ip.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_TCPMSS.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables TCP MSS modification module");
-
-static inline unsigned int
-optlen(const u_int8_t *opt, unsigned int offset)
-{
-	/* Beware zero-length options: make finite progress */
-	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
-		return 1;
-	else
-		return opt[offset+1];
-}
-
-static unsigned int
-ipt_tcpmss_target(struct sk_buff **pskb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  unsigned int hooknum,
-		  const struct xt_target *target,
-		  const void *targinfo)
-{
-	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
-	struct tcphdr *tcph;
-	struct iphdr *iph;
-	u_int16_t tcplen, newmss;
-	__be16 newtotlen, oldval;
-	unsigned int i;
-	u_int8_t *opt;
-
-	if (!skb_make_writable(pskb, (*pskb)->len))
-		return NF_DROP;
-
-	iph = (*pskb)->nh.iph;
-	tcplen = (*pskb)->len - iph->ihl*4;
-	tcph = (void *)iph + iph->ihl*4;
-
-	/* Since it passed flags test in tcp match, we know it is is
-	   not a fragment, and has data >= tcp header length.  SYN
-	   packets should not contain data: if they did, then we risk
-	   running over MTU, sending Frag Needed and breaking things
-	   badly. --RR */
-	if (tcplen != tcph->doff*4) {
-		if (net_ratelimit())
-			printk(KERN_ERR
-			       "ipt_tcpmss_target: bad length (%d bytes)\n",
-			       (*pskb)->len);
-		return NF_DROP;
-	}
-
-	if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
-		if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) +
-					     sizeof(struct tcphdr)) {
-			if (net_ratelimit())
-				printk(KERN_ERR "ipt_tcpmss_target: "
-				       "unknown or invalid path-MTU (%d)\n",
-				       dst_mtu((*pskb)->dst));
-			return NF_DROP; /* or IPT_CONTINUE ?? */
-		}
-
-		newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) -
-						 sizeof(struct tcphdr);
-	} else
-		newmss = tcpmssinfo->mss;
-
- 	opt = (u_int8_t *)tcph;
-	for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
-		if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
-		    opt[i+1] == TCPOLEN_MSS) {
-			u_int16_t oldmss;
-
-			oldmss = (opt[i+2] << 8) | opt[i+3];
-
-			if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
-			    oldmss <= newmss)
-				return IPT_CONTINUE;
-
-			opt[i+2] = (newmss & 0xff00) >> 8;
-			opt[i+3] = (newmss & 0x00ff);
-
-			nf_proto_csum_replace2(&tcph->check, *pskb,
-						htons(oldmss), htons(newmss), 0);
-			return IPT_CONTINUE;
-		}
-	}
-
-	/*
-	 * MSS Option not found ?! add it..
-	 */
-	if (skb_tailroom((*pskb)) < TCPOLEN_MSS) {
-		struct sk_buff *newskb;
-
-		newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
-					 TCPOLEN_MSS, GFP_ATOMIC);
-		if (!newskb)
-			return NF_DROP;
-		kfree_skb(*pskb);
-		*pskb = newskb;
-		iph = (*pskb)->nh.iph;
-		tcph = (void *)iph + iph->ihl*4;
-	}
-
-	skb_put((*pskb), TCPOLEN_MSS);
-
- 	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
-	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
-
-	nf_proto_csum_replace2(&tcph->check, *pskb,
-				htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
-	opt[0] = TCPOPT_MSS;
-	opt[1] = TCPOLEN_MSS;
-	opt[2] = (newmss & 0xff00) >> 8;
-	opt[3] = (newmss & 0x00ff);
-
-	nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0);
-
-	oldval = ((__be16 *)tcph)[6];
-	tcph->doff += TCPOLEN_MSS/4;
-	nf_proto_csum_replace2(&tcph->check, *pskb,
-				oldval, ((__be16 *)tcph)[6], 0);
-
-	newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
-	nf_csum_replace2(&iph->check, iph->tot_len, newtotlen);
-	iph->tot_len = newtotlen;
-	return IPT_CONTINUE;
-}
-
-#define TH_SYN 0x02
-
-static inline int find_syn_match(const struct ipt_entry_match *m)
-{
-	const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data;
-
-	if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
-	    tcpinfo->flg_cmp & TH_SYN &&
-	    !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
-		return 1;
-
-	return 0;
-}
-
-/* Must specify -p tcp --syn/--tcp-flags SYN */
-static int
-ipt_tcpmss_checkentry(const char *tablename,
-		      const void *e_void,
-		      const struct xt_target *target,
-		      void *targinfo,
-		      unsigned int hook_mask)
-{
-	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
-	const struct ipt_entry *e = e_void;
-
-	if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
-	    (hook_mask & ~((1 << NF_IP_FORWARD) |
-			   (1 << NF_IP_LOCAL_OUT) |
-			   (1 << NF_IP_POST_ROUTING))) != 0) {
-		printk("TCPMSS: path-MTU clamping only supported in "
-		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
-		return 0;
-	}
-
-	if (IPT_MATCH_ITERATE(e, find_syn_match))
-		return 1;
-	printk("TCPMSS: Only works on TCP SYN packets\n");
-	return 0;
-}
-
-static struct ipt_target ipt_tcpmss_reg = {
-	.name		= "TCPMSS",
-	.target		= ipt_tcpmss_target,
-	.targetsize	= sizeof(struct ipt_tcpmss_info),
-	.proto		= IPPROTO_TCP,
-	.checkentry	= ipt_tcpmss_checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init ipt_tcpmss_init(void)
-{
-	return ipt_register_target(&ipt_tcpmss_reg);
-}
-
-static void __exit ipt_tcpmss_fini(void)
-{
-	ipt_unregister_target(&ipt_tcpmss_reg);
-}
-
-module_init(ipt_tcpmss_init);
-module_exit(ipt_tcpmss_fini);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 614c92c17835..748f7f00909a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -395,6 +395,32 @@ config NETFILTER_XT_TARGET_CONNSECMARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_TCPMSS
+	tristate '"TCPMSS" target support'
+	depends on NETFILTER_XTABLES && (IPV6 || IPV6=n)
+	---help---
+	  This option adds a `TCPMSS' target, which allows you to alter the
+	  MSS value of TCP SYN packets, to control the maximum size for that
+	  connection (usually limiting it to your outgoing interface's MTU
+	  minus 40).
+
+	  This is used to overcome criminally braindead ISPs or servers which
+	  block ICMP Fragmentation Needed packets.  The symptoms of this
+	  problem are that everything works fine from your Linux
+	  firewall/router, but machines behind it can never exchange large
+	  packets:
+	        1) Web browsers connect, then hang with no data received.
+	        2) Small mail works fine, but large emails hang.
+	        3) ssh works fine, but scp hangs after initial handshaking.
+
+	  Workaround: activate this option and add a rule to your firewall
+	  configuration like:
+
+	  iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \
+	                 -j TCPMSS --clamp-mss-to-pmtu
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_COMMENT
 	tristate  '"comment" match support'
 	depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 5054b0ff8096..b2b5c7566b26 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
 
 # matches
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
new file mode 100644
index 000000000000..db7e38c08de2
--- /dev/null
+++ b/net/netfilter/xt_TCPMSS.c
@@ -0,0 +1,296 @@
+/*
+ * This is a module which is used for setting the MSS option in TCP packets.
+ *
+ * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_tcpudp.h>
+#include <linux/netfilter/xt_TCPMSS.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("x_tables TCP MSS modification module");
+MODULE_ALIAS("ipt_TCPMSS");
+MODULE_ALIAS("ip6t_TCPMSS");
+
+static inline unsigned int
+optlen(const u_int8_t *opt, unsigned int offset)
+{
+	/* Beware zero-length options: make finite progress */
+	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
+		return 1;
+	else
+		return opt[offset+1];
+}
+
+static int
+tcpmss_mangle_packet(struct sk_buff **pskb,
+		     const struct xt_tcpmss_info *info,
+		     unsigned int tcphoff,
+		     unsigned int minlen)
+{
+	struct tcphdr *tcph;
+	unsigned int tcplen, i;
+	__be16 oldval;
+	u16 newmss;
+	u8 *opt;
+
+	if (!skb_make_writable(pskb, (*pskb)->len))
+		return -1;
+
+	tcplen = (*pskb)->len - tcphoff;
+	tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+
+	/* Since it passed flags test in tcp match, we know it is is
+	   not a fragment, and has data >= tcp header length.  SYN
+	   packets should not contain data: if they did, then we risk
+	   running over MTU, sending Frag Needed and breaking things
+	   badly. --RR */
+	if (tcplen != tcph->doff*4) {
+		if (net_ratelimit())
+			printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n",
+			       (*pskb)->len);
+		return -1;
+	}
+
+	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
+		if (dst_mtu((*pskb)->dst) <= minlen) {
+			if (net_ratelimit())
+				printk(KERN_ERR "xt_TCPMSS: "
+				       "unknown or invalid path-MTU (%u)\n",
+				       dst_mtu((*pskb)->dst));
+			return -1;
+		}
+		newmss = dst_mtu((*pskb)->dst) - minlen;
+	} else
+		newmss = info->mss;
+
+	opt = (u_int8_t *)tcph;
+	for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
+		if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
+		    opt[i+1] == TCPOLEN_MSS) {
+			u_int16_t oldmss;
+
+			oldmss = (opt[i+2] << 8) | opt[i+3];
+
+			if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
+			    oldmss <= newmss)
+				return 0;
+
+			opt[i+2] = (newmss & 0xff00) >> 8;
+			opt[i+3] = (newmss & 0x00ff);
+
+			nf_proto_csum_replace2(&tcph->check, *pskb,
+					       htons(oldmss), htons(newmss), 0);
+			return 0;
+		}
+	}
+
+	/*
+	 * MSS Option not found ?! add it..
+	 */
+	if (skb_tailroom((*pskb)) < TCPOLEN_MSS) {
+		struct sk_buff *newskb;
+
+		newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
+					 TCPOLEN_MSS, GFP_ATOMIC);
+		if (!newskb)
+			return -1;
+		kfree_skb(*pskb);
+		*pskb = newskb;
+		tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+	}
+
+	skb_put((*pskb), TCPOLEN_MSS);
+
+	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
+	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
+
+	nf_proto_csum_replace2(&tcph->check, *pskb,
+			       htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
+	opt[0] = TCPOPT_MSS;
+	opt[1] = TCPOLEN_MSS;
+	opt[2] = (newmss & 0xff00) >> 8;
+	opt[3] = (newmss & 0x00ff);
+
+	nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0);
+
+	oldval = ((__be16 *)tcph)[6];
+	tcph->doff += TCPOLEN_MSS/4;
+	nf_proto_csum_replace2(&tcph->check, *pskb,
+				oldval, ((__be16 *)tcph)[6], 0);
+	return TCPOLEN_MSS;
+}
+
+static unsigned int
+xt_tcpmss_target4(struct sk_buff **pskb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  unsigned int hooknum,
+		  const struct xt_target *target,
+		  const void *targinfo)
+{
+	struct iphdr *iph = (*pskb)->nh.iph;
+	__be16 newlen;
+	int ret;
+
+	ret = tcpmss_mangle_packet(pskb, targinfo, iph->ihl * 4,
+				   sizeof(*iph) + sizeof(struct tcphdr));
+	if (ret < 0)
+		return NF_DROP;
+	if (ret > 0) {
+		iph = (*pskb)->nh.iph;
+		newlen = htons(ntohs(iph->tot_len) + ret);
+		nf_csum_replace2(&iph->check, iph->tot_len, newlen);
+		iph->tot_len = newlen;
+	}
+	return XT_CONTINUE;
+}
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static unsigned int
+xt_tcpmss_target6(struct sk_buff **pskb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  unsigned int hooknum,
+		  const struct xt_target *target,
+		  const void *targinfo)
+{
+	struct ipv6hdr *ipv6h = (*pskb)->nh.ipv6h;
+	u8 nexthdr;
+	int tcphoff;
+	int ret;
+
+	nexthdr = ipv6h->nexthdr;
+	tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr);
+	if (tcphoff < 0) {
+		WARN_ON(1);
+		return NF_DROP;
+	}
+	ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff,
+				   sizeof(*ipv6h) + sizeof(struct tcphdr));
+	if (ret < 0)
+		return NF_DROP;
+	if (ret > 0) {
+		ipv6h = (*pskb)->nh.ipv6h;
+		ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
+	}
+	return XT_CONTINUE;
+}
+#endif
+
+#define TH_SYN 0x02
+
+/* Must specify -p tcp --syn */
+static inline int find_syn_match(const struct xt_entry_match *m)
+{
+	const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
+
+	if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
+	    tcpinfo->flg_cmp & TH_SYN &&
+	    !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
+		return 1;
+
+	return 0;
+}
+
+static int
+xt_tcpmss_checkentry4(const char *tablename,
+		      const void *entry,
+		      const struct xt_target *target,
+		      void *targinfo,
+		      unsigned int hook_mask)
+{
+	const struct xt_tcpmss_info *info = targinfo;
+	const struct ipt_entry *e = entry;
+
+	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
+	    (hook_mask & ~((1 << NF_IP_FORWARD) |
+			   (1 << NF_IP_LOCAL_OUT) |
+			   (1 << NF_IP_POST_ROUTING))) != 0) {
+		printk("xt_TCPMSS: path-MTU clamping only supported in "
+		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
+		return 0;
+	}
+	if (IPT_MATCH_ITERATE(e, find_syn_match))
+		return 1;
+	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
+	return 0;
+}
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static int
+xt_tcpmss_checkentry6(const char *tablename,
+		      const void *entry,
+		      const struct xt_target *target,
+		      void *targinfo,
+		      unsigned int hook_mask)
+{
+	const struct xt_tcpmss_info *info = targinfo;
+	const struct ip6t_entry *e = entry;
+
+	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
+	    (hook_mask & ~((1 << NF_IP6_FORWARD) |
+			   (1 << NF_IP6_LOCAL_OUT) |
+			   (1 << NF_IP6_POST_ROUTING))) != 0) {
+		printk("xt_TCPMSS: path-MTU clamping only supported in "
+		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
+		return 0;
+	}
+	if (IP6T_MATCH_ITERATE(e, find_syn_match))
+		return 1;
+	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
+	return 0;
+}
+#endif
+
+static struct xt_target xt_tcpmss_reg[] = {
+	{
+		.family		= AF_INET,
+		.name		= "TCPMSS",
+		.checkentry	= xt_tcpmss_checkentry4,
+		.target		= xt_tcpmss_target4,
+		.targetsize	= sizeof(struct xt_tcpmss_info),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	{
+		.family		= AF_INET6,
+		.name		= "TCPMSS",
+		.checkentry	= xt_tcpmss_checkentry6,
+		.target		= xt_tcpmss_target6,
+		.targetsize	= sizeof(struct xt_tcpmss_info),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+#endif
+};
+
+static int __init xt_tcpmss_init(void)
+{
+	return xt_register_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg));
+}
+
+static void __exit xt_tcpmss_fini(void)
+{
+	xt_unregister_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg));
+}
+
+module_init(xt_tcpmss_init);
+module_exit(xt_tcpmss_fini);
-- 
cgit v1.2.3


From 41f4689a7c8cd76b77864461b3c58fde8f322b2c Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Wed, 7 Feb 2007 15:10:09 -0800
Subject: [NETFILTER]: NAT: optional source port randomization support

This patch adds support to NAT to randomize source ports.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_nat.h |  1 +
 include/net/netfilter/nf_nat.h        |  1 +
 net/ipv4/netfilter/ip_nat_core.c      | 12 ++++++++++--
 net/ipv4/netfilter/ip_nat_proto_tcp.c |  5 +++++
 net/ipv4/netfilter/ip_nat_proto_udp.c |  5 +++++
 net/ipv4/netfilter/ip_nat_rule.c      |  4 ++++
 net/ipv4/netfilter/nf_nat_core.c      | 12 ++++++++++--
 net/ipv4/netfilter/nf_nat_proto_tcp.c |  4 ++++
 net/ipv4/netfilter/nf_nat_proto_udp.c |  4 ++++
 net/ipv4/netfilter/nf_nat_rule.c      |  4 ++++
 10 files changed, 48 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h
index bdf553620ca1..bbca89aab813 100644
--- a/include/linux/netfilter_ipv4/ip_nat.h
+++ b/include/linux/netfilter_ipv4/ip_nat.h
@@ -16,6 +16,7 @@ enum ip_nat_manip_type
 
 #define IP_NAT_RANGE_MAP_IPS 1
 #define IP_NAT_RANGE_PROTO_SPECIFIED 2
+#define IP_NAT_RANGE_PROTO_RANDOM 4 /* add randomness to "port" selection */
 
 /* NAT sequence number modifications */
 struct ip_nat_seq {
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 61c62068ca6b..bc57dd7b9b5c 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -16,6 +16,7 @@ enum nf_nat_manip_type
 
 #define IP_NAT_RANGE_MAP_IPS 1
 #define IP_NAT_RANGE_PROTO_SPECIFIED 2
+#define IP_NAT_RANGE_PROTO_RANDOM 4
 
 /* NAT sequence number modifications */
 struct nf_nat_seq {
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 9d1a5175dcd4..5e08c2bf887d 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -246,8 +246,9 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
 	if (maniptype == IP_NAT_MANIP_SRC) {
 		if (find_appropriate_src(orig_tuple, tuple, range)) {
 			DEBUGP("get_unique_tuple: Found current src map\n");
-			if (!ip_nat_used_tuple(tuple, conntrack))
-				return;
+			if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
+				if (!ip_nat_used_tuple(tuple, conntrack))
+					return;
 		}
 	}
 
@@ -261,6 +262,13 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
 
 	proto = ip_nat_proto_find_get(orig_tuple->dst.protonum);
 
+	/* Change protocol info to have some randomization */
+	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
+		proto->unique_tuple(tuple, range, maniptype, conntrack);
+		ip_nat_proto_put(proto);
+		return;
+	}
+
 	/* Only bother mapping if it's not already in range and unique */
 	if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
 	     || proto->in_range(tuple, maniptype, &range->min, &range->max))
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index b586d18b3fb3..14ff24f53a7a 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/random.h>
 #include <linux/netfilter.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
@@ -75,6 +76,10 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
 		range_size = ntohs(range->max.tcp.port) - min + 1;
 	}
 
+	/* Start from random port to avoid prediction */
+	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
+		port =  net_random();
+
 	for (i = 0; i < range_size; i++, port++) {
 		*portptr = htons(min + port % range_size);
 		if (!ip_nat_used_tuple(tuple, conntrack)) {
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index 5ced0877b32f..dfd521672891 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/random.h>
 #include <linux/netfilter.h>
 #include <linux/ip.h>
 #include <linux/udp.h>
@@ -74,6 +75,10 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple,
 		range_size = ntohs(range->max.udp.port) - min + 1;
 	}
 
+	/* Start from random port to avoid prediction */
+	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
+		port = net_random();
+
 	for (i = 0; i < range_size; i++, port++) {
 		*portptr = htons(min + port % range_size);
 		if (!ip_nat_used_tuple(tuple, conntrack))
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index a176aa3031e0..6ebaad36c06d 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -193,6 +193,10 @@ static int ipt_dnat_checkentry(const char *tablename,
 		printk("DNAT: multiple ranges no longer supported\n");
 		return 0;
 	}
+	if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) {
+		printk("DNAT: port randomization not supported\n");
+		return 0;
+	}
 	return 1;
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 86a92272b053..998b2557692c 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -254,8 +254,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	if (maniptype == IP_NAT_MANIP_SRC) {
 		if (find_appropriate_src(orig_tuple, tuple, range)) {
 			DEBUGP("get_unique_tuple: Found current src map\n");
-			if (!nf_nat_used_tuple(tuple, ct))
-				return;
+			if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
+				if (!nf_nat_used_tuple(tuple, ct))
+					return;
 		}
 	}
 
@@ -269,6 +270,13 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 
 	proto = nf_nat_proto_find_get(orig_tuple->dst.protonum);
 
+	/* Change protocol info to have some randomization */
+	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
+		proto->unique_tuple(tuple, range, maniptype, ct);
+		nf_nat_proto_put(proto);
+		return;
+	}
+
 	/* Only bother mapping if it's not already in range and unique */
 	if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
 	     proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 7e26a7e9bee1..439164c7a626 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/random.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
 
@@ -75,6 +76,9 @@ tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		range_size = ntohs(range->max.tcp.port) - min + 1;
 	}
 
+	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
+		port =  net_random();
+
 	for (i = 0; i < range_size; i++, port++) {
 		*portptr = htons(min + port % range_size);
 		if (!nf_nat_used_tuple(tuple, ct))
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index ab0ce4c8699f..8cae6e063bb6 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/random.h>
 #include <linux/ip.h>
 #include <linux/udp.h>
 
@@ -73,6 +74,9 @@ udp_unique_tuple(struct nf_conntrack_tuple *tuple,
 		range_size = ntohs(range->max.udp.port) - min + 1;
 	}
 
+	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
+		port = net_random();
+
 	for (i = 0; i < range_size; i++, port++) {
 		*portptr = htons(min + port % range_size);
 		if (!nf_nat_used_tuple(tuple, ct))
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index b868ee0195d4..3745efe70302 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -226,6 +226,10 @@ static int ipt_dnat_checkentry(const char *tablename,
 		printk("DNAT: multiple ranges no longer supported\n");
 		return 0;
 	}
+	if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) {
+		printk("DNAT: port randomization not supported\n");
+		return 0;
+	}
 	return 1;
 }
 
-- 
cgit v1.2.3


From 6709dbbb1978abe039ea4b76c364bf003bf40de5 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Wed, 7 Feb 2007 15:11:19 -0800
Subject: [NETFILTER]: {ip,ip6}_tables: remove x_tables wrapper functions

Use the x_tables functions directly to make it better visible which
parts are shared between ip_tables and ip6_tables.

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_tables.h  | 12 +-----------
 include/linux/netfilter_ipv6/ip6_tables.h | 10 ----------
 net/ipv4/netfilter/ip_nat_rule.c          | 26 ++++++++++++++------------
 net/ipv4/netfilter/ip_tables.c            | 20 ++++++++++----------
 net/ipv4/netfilter/ipt_CLUSTERIP.c        | 14 ++++++++------
 net/ipv4/netfilter/ipt_ECN.c              | 13 ++++++++-----
 net/ipv4/netfilter/ipt_LOG.c              | 11 ++++++-----
 net/ipv4/netfilter/ipt_MASQUERADE.c       |  9 +++++----
 net/ipv4/netfilter/ipt_NETMAP.c           |  8 +++++---
 net/ipv4/netfilter/ipt_REDIRECT.c         |  8 +++++---
 net/ipv4/netfilter/ipt_REJECT.c           | 10 ++++++----
 net/ipv4/netfilter/ipt_SAME.c             |  8 +++++---
 net/ipv4/netfilter/ipt_TOS.c              | 11 ++++++-----
 net/ipv4/netfilter/ipt_TTL.c              | 11 ++++++-----
 net/ipv4/netfilter/ipt_ULOG.c             | 13 ++++++-------
 net/ipv4/netfilter/ipt_addrtype.c         |  9 +++++----
 net/ipv4/netfilter/ipt_ah.c               | 10 ++++++----
 net/ipv4/netfilter/ipt_ecn.c              | 10 +++++++---
 net/ipv4/netfilter/ipt_iprange.c          | 10 +++++-----
 net/ipv4/netfilter/ipt_owner.c            |  9 +++++----
 net/ipv4/netfilter/ipt_recent.c           | 12 +++++++-----
 net/ipv4/netfilter/ipt_tos.c              | 10 ++++++----
 net/ipv4/netfilter/ipt_ttl.c              | 11 ++++++-----
 net/ipv4/netfilter/nf_nat_rule.c          |  2 +-
 net/ipv6/netfilter/ip6_tables.c           | 12 ++++++------
 net/ipv6/netfilter/ip6t_HL.c              | 14 ++++++++------
 net/ipv6/netfilter/ip6t_LOG.c             | 10 ++++++----
 net/ipv6/netfilter/ip6t_REJECT.c          | 10 ++++++----
 net/ipv6/netfilter/ip6t_ah.c              |  8 +++++---
 net/ipv6/netfilter/ip6t_eui64.c           |  8 +++++---
 net/ipv6/netfilter/ip6t_frag.c            |  8 +++++---
 net/ipv6/netfilter/ip6t_hbh.c             |  1 +
 net/ipv6/netfilter/ip6t_hl.c              | 11 ++++++-----
 net/ipv6/netfilter/ip6t_ipv6header.c      |  8 +++++---
 net/ipv6/netfilter/ip6t_owner.c           |  8 +++++---
 net/ipv6/netfilter/ip6t_rt.c              |  8 +++++---
 36 files changed, 202 insertions(+), 171 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 98d566c5e32a..c59bc6ff2280 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -272,16 +272,6 @@ ipt_get_target(struct ipt_entry *e)
 #include <linux/init.h>
 extern void ipt_init(void) __init;
 
-#define ipt_register_target(tgt) 	\
-({	(tgt)->family = AF_INET;	\
- 	xt_register_target(tgt); })
-#define ipt_unregister_target(tgt) xt_unregister_target(tgt)
-
-#define ipt_register_match(mtch) 	\
-({	(mtch)->family = AF_INET;	\
-	xt_register_match(mtch); })
-#define ipt_unregister_match(mtch) xt_unregister_match(mtch)
-
 //#define ipt_register_table(tbl, repl) xt_register_table(AF_INET, tbl, repl)
 //#define ipt_unregister_table(tbl) xt_unregister_table(AF_INET, tbl)
 
@@ -290,7 +280,7 @@ extern int ipt_register_table(struct ipt_table *table,
 extern void ipt_unregister_table(struct ipt_table *table);
 
 /* net/sched/ipt.c: Gimme access to your targets!  Gets target->me. */
-extern struct ipt_target *ipt_find_target(const char *name, u8 revision);
+extern struct xt_target *ipt_find_target(const char *name, u8 revision);
 
 /* Standard entry. */
 struct ipt_standard
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 4aed340401db..2fbabab30d21 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -286,16 +286,6 @@ ip6t_get_target(struct ip6t_entry *e)
 #include <linux/init.h>
 extern void ip6t_init(void) __init;
 
-#define ip6t_register_target(tgt) 		\
-({	(tgt)->family = AF_INET6;		\
- 	xt_register_target(tgt); })
-#define ip6t_unregister_target(tgt) xt_unregister_target(tgt)
-
-#define ip6t_register_match(match)		\
-({	(match)->family = AF_INET6;		\
-	xt_register_match(match); })
-#define ip6t_unregister_match(match) xt_unregister_match(match)
-
 extern int ip6t_register_table(struct ip6t_table *table,
 			       const struct ip6t_replace *repl);
 extern void ip6t_unregister_table(struct ip6t_table *table);
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 6ebaad36c06d..7a8e7bb577e2 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -99,7 +99,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    unsigned int hooknum,
-				    const struct ipt_target *target,
+				    const struct xt_target *target,
 				    const void *targinfo)
 {
 	struct ip_conntrack *ct;
@@ -141,7 +141,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    unsigned int hooknum,
-				    const struct ipt_target *target,
+				    const struct xt_target *target,
 				    const void *targinfo)
 {
 	struct ip_conntrack *ct;
@@ -166,7 +166,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
 
 static int ipt_snat_checkentry(const char *tablename,
 			       const void *entry,
-			       const struct ipt_target *target,
+			       const struct xt_target *target,
 			       void *targinfo,
 			       unsigned int hook_mask)
 {
@@ -182,7 +182,7 @@ static int ipt_snat_checkentry(const char *tablename,
 
 static int ipt_dnat_checkentry(const char *tablename,
 			       const void *entry,
-			       const struct ipt_target *target,
+			       const struct xt_target *target,
 			       void *targinfo,
 			       unsigned int hook_mask)
 {
@@ -261,8 +261,9 @@ int ip_nat_rule_find(struct sk_buff **pskb,
 	return ret;
 }
 
-static struct ipt_target ipt_snat_reg = {
+static struct xt_target ipt_snat_reg = {
 	.name		= "SNAT",
+	.family		= AF_INET,
 	.target		= ipt_snat_target,
 	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
 	.table		= "nat",
@@ -270,8 +271,9 @@ static struct ipt_target ipt_snat_reg = {
 	.checkentry	= ipt_snat_checkentry,
 };
 
-static struct ipt_target ipt_dnat_reg = {
+static struct xt_target ipt_dnat_reg = {
 	.name		= "DNAT",
+	.family		= AF_INET,
 	.target		= ipt_dnat_target,
 	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
 	.table		= "nat",
@@ -286,27 +288,27 @@ int __init ip_nat_rule_init(void)
 	ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
 	if (ret != 0)
 		return ret;
-	ret = ipt_register_target(&ipt_snat_reg);
+	ret = xt_register_target(&ipt_snat_reg);
 	if (ret != 0)
 		goto unregister_table;
 
-	ret = ipt_register_target(&ipt_dnat_reg);
+	ret = xt_register_target(&ipt_dnat_reg);
 	if (ret != 0)
 		goto unregister_snat;
 
 	return ret;
 
  unregister_snat:
-	ipt_unregister_target(&ipt_snat_reg);
+	xt_unregister_target(&ipt_snat_reg);
  unregister_table:
-	ipt_unregister_table(&nat_table);
+	xt_unregister_table(&nat_table);
 
 	return ret;
 }
 
 void ip_nat_rule_cleanup(void)
 {
-	ipt_unregister_target(&ipt_dnat_reg);
-	ipt_unregister_target(&ipt_snat_reg);
+	xt_unregister_target(&ipt_dnat_reg);
+	xt_unregister_target(&ipt_snat_reg);
 	ipt_unregister_table(&nat_table);
 }
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index fc1f153c86ba..0043e908b130 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -507,7 +507,7 @@ check_entry(struct ipt_entry *e, const char *name)
 static inline int check_match(struct ipt_entry_match *m, const char *name,
 				const struct ipt_ip *ip, unsigned int hookmask)
 {
-	struct ipt_match *match;
+	struct xt_match *match;
 	int ret;
 
 	match = m->u.kernel.match;
@@ -531,7 +531,7 @@ find_check_match(struct ipt_entry_match *m,
 	    unsigned int hookmask,
 	    unsigned int *i)
 {
-	struct ipt_match *match;
+	struct xt_match *match;
 	int ret;
 
 	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
@@ -557,7 +557,7 @@ err:
 static inline int check_target(struct ipt_entry *e, const char *name)
 {
  	struct ipt_entry_target *t;
- 	struct ipt_target *target;
+	struct xt_target *target;
  	int ret;
 
 	t = ipt_get_target(e);
@@ -580,7 +580,7 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 	    unsigned int *i)
 {
 	struct ipt_entry_target *t;
-	struct ipt_target *target;
+	struct xt_target *target;
 	int ret;
 	unsigned int j;
 
@@ -1437,7 +1437,7 @@ compat_check_calc_match(struct ipt_entry_match *m,
 	    unsigned int hookmask,
 	    int *size, int *i)
 {
-	struct ipt_match *match;
+	struct xt_match *match;
 
 	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
 						   m->u.user.revision),
@@ -1466,7 +1466,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 			   const char *name)
 {
 	struct ipt_entry_target *t;
-	struct ipt_target *target;
+	struct xt_target *target;
 	unsigned int entry_offset;
 	int ret, off, h, j;
 
@@ -1550,7 +1550,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 	struct xt_table_info *newinfo, unsigned char *base)
 {
 	struct ipt_entry_target *t;
-	struct ipt_target *target;
+	struct xt_target *target;
 	struct ipt_entry *de;
 	unsigned int origsize;
 	int ret, h;
@@ -2124,7 +2124,7 @@ icmp_checkentry(const char *tablename,
 }
 
 /* The built-in targets: standard (NULL) and error. */
-static struct ipt_target ipt_standard_target = {
+static struct xt_target ipt_standard_target = {
 	.name		= IPT_STANDARD_TARGET,
 	.targetsize	= sizeof(int),
 	.family		= AF_INET,
@@ -2135,7 +2135,7 @@ static struct ipt_target ipt_standard_target = {
 #endif
 };
 
-static struct ipt_target ipt_error_target = {
+static struct xt_target ipt_error_target = {
 	.name		= IPT_ERROR_TARGET,
 	.target		= ipt_error,
 	.targetsize	= IPT_FUNCTION_MAXNAMELEN,
@@ -2158,7 +2158,7 @@ static struct nf_sockopt_ops ipt_sockopts = {
 #endif
 };
 
-static struct ipt_match icmp_matchstruct = {
+static struct xt_match icmp_matchstruct = {
 	.name		= "icmp",
 	.match		= icmp_match,
 	.matchsize	= sizeof(struct ipt_icmp),
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 018fea3fcb5f..343c2abdc1a0 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -26,6 +26,7 @@
 
 #include <linux/netfilter_arp.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
 #include <net/netfilter/nf_conntrack_compat.h>
@@ -330,7 +331,7 @@ target(struct sk_buff **pskb,
 	if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
 	    && (ctinfo == IP_CT_RELATED 
 		|| ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
-		return IPT_CONTINUE;
+		return XT_CONTINUE;
 
 	/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, 
 	 * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
@@ -368,7 +369,7 @@ target(struct sk_buff **pskb,
 	 * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
 	(*pskb)->pkt_type = PACKET_HOST;
 
-	return IPT_CONTINUE;
+	return XT_CONTINUE;
 }
 
 static int
@@ -471,8 +472,9 @@ static void destroy(const struct xt_target *target, void *targinfo)
 	nf_ct_l3proto_module_put(target->family);
 }
 
-static struct ipt_target clusterip_tgt = {
+static struct xt_target clusterip_tgt = {
 	.name		= "CLUSTERIP",
+	.family		= AF_INET,
 	.target		= target,
 	.targetsize	= sizeof(struct ipt_clusterip_tgt_info),
 	.checkentry	= checkentry,
@@ -728,7 +730,7 @@ static int __init ipt_clusterip_init(void)
 {
 	int ret;
 
-	ret = ipt_register_target(&clusterip_tgt);
+	ret = xt_register_target(&clusterip_tgt);
 	if (ret < 0)
 		return ret;
 
@@ -754,7 +756,7 @@ cleanup_hook:
 	nf_unregister_hook(&cip_arp_ops);
 #endif /* CONFIG_PROC_FS */
 cleanup_target:
-	ipt_unregister_target(&clusterip_tgt);
+	xt_unregister_target(&clusterip_tgt);
 	return ret;
 }
 
@@ -766,7 +768,7 @@ static void __exit ipt_clusterip_fini(void)
 	remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
 #endif
 	nf_unregister_hook(&cip_arp_ops);
-	ipt_unregister_target(&clusterip_tgt);
+	xt_unregister_target(&clusterip_tgt);
 }
 
 module_init(ipt_clusterip_init);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index b55d670a24df..b5ca5938d1fe 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -9,12 +9,14 @@
  * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp
 */
 
+#include <linux/in.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <net/checksum.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_ECN.h>
 
@@ -95,7 +97,7 @@ target(struct sk_buff **pskb,
 		if (!set_ect_tcp(pskb, einfo))
 			return NF_DROP;
 
-	return IPT_CONTINUE;
+	return XT_CONTINUE;
 }
 
 static int
@@ -119,7 +121,7 @@ checkentry(const char *tablename,
 		return 0;
 	}
 	if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR))
-	    && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & IPT_INV_PROTO))) {
+	    && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
 		printk(KERN_WARNING "ECN: cannot use TCP operations on a "
 		       "non-tcp rule\n");
 		return 0;
@@ -127,8 +129,9 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_target ipt_ecn_reg = {
+static struct xt_target ipt_ecn_reg = {
 	.name		= "ECN",
+	.family		= AF_INET,
 	.target		= target,
 	.targetsize	= sizeof(struct ipt_ECN_info),
 	.table		= "mangle",
@@ -138,12 +141,12 @@ static struct ipt_target ipt_ecn_reg = {
 
 static int __init ipt_ecn_init(void)
 {
-	return ipt_register_target(&ipt_ecn_reg);
+	return xt_register_target(&ipt_ecn_reg);
 }
 
 static void __exit ipt_ecn_fini(void)
 {
-	ipt_unregister_target(&ipt_ecn_reg);
+	xt_unregister_target(&ipt_ecn_reg);
 }
 
 module_init(ipt_ecn_init);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 37778c72aeaa..f68370ffb43f 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -20,7 +20,7 @@
 #include <net/route.h>
 
 #include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ipt_LOG.h>
 
 MODULE_LICENSE("GPL");
@@ -432,7 +432,7 @@ ipt_log_target(struct sk_buff **pskb,
 
 	ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li,
 	               loginfo->prefix);
-	return IPT_CONTINUE;
+	return XT_CONTINUE;
 }
 
 static int ipt_log_checkentry(const char *tablename,
@@ -455,8 +455,9 @@ static int ipt_log_checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_target ipt_log_reg = {
+static struct xt_target ipt_log_reg = {
 	.name		= "LOG",
+	.family		= AF_INET,
 	.target		= ipt_log_target,
 	.targetsize	= sizeof(struct ipt_log_info),
 	.checkentry	= ipt_log_checkentry,
@@ -473,7 +474,7 @@ static int __init ipt_log_init(void)
 {
 	int ret;
 
-	ret = ipt_register_target(&ipt_log_reg);
+	ret = xt_register_target(&ipt_log_reg);
 	if (ret < 0)
 		return ret;
 	if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
@@ -489,7 +490,7 @@ static int __init ipt_log_init(void)
 static void __exit ipt_log_fini(void)
 {
 	nf_log_unregister_logger(&ipt_log_logger);
-	ipt_unregister_target(&ipt_log_reg);
+	xt_unregister_target(&ipt_log_reg);
 }
 
 module_init(ipt_log_init);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d669685afd04..91c42efcd533 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -25,7 +25,7 @@
 #else
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #endif
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -190,8 +190,9 @@ static struct notifier_block masq_inet_notifier = {
 	.notifier_call	= masq_inet_event,
 };
 
-static struct ipt_target masquerade = {
+static struct xt_target masquerade = {
 	.name		= "MASQUERADE",
+	.family		= AF_INET,
 	.target		= masquerade_target,
 	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
 	.table		= "nat",
@@ -204,7 +205,7 @@ static int __init ipt_masquerade_init(void)
 {
 	int ret;
 
-	ret = ipt_register_target(&masquerade);
+	ret = xt_register_target(&masquerade);
 
 	if (ret == 0) {
 		/* Register for device down reports */
@@ -218,7 +219,7 @@ static int __init ipt_masquerade_init(void)
 
 static void __exit ipt_masquerade_fini(void)
 {
-	ipt_unregister_target(&masquerade);
+	xt_unregister_target(&masquerade);
 	unregister_netdevice_notifier(&masq_dev_notifier);
 	unregister_inetaddr_notifier(&masq_inet_notifier);	
 }
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 9390e90f2b25..b4acc241d898 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -15,6 +15,7 @@
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/x_tables.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_rule.h>
 #else
@@ -88,8 +89,9 @@ target(struct sk_buff **pskb,
 	return ip_nat_setup_info(ct, &newrange, hooknum);
 }
 
-static struct ipt_target target_module = { 
+static struct xt_target target_module = {
 	.name 		= MODULENAME,
+	.family		= AF_INET,
 	.target 	= target, 
 	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
 	.table		= "nat",
@@ -101,12 +103,12 @@ static struct ipt_target target_module = {
 
 static int __init ipt_netmap_init(void)
 {
-	return ipt_register_target(&target_module);
+	return xt_register_target(&target_module);
 }
 
 static void __exit ipt_netmap_fini(void)
 {
-	ipt_unregister_target(&target_module);
+	xt_unregister_target(&target_module);
 }
 
 module_init(ipt_netmap_init);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 462eceb3a1b1..54cd021aa5a8 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -18,6 +18,7 @@
 #include <net/protocol.h>
 #include <net/checksum.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/x_tables.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_rule.h>
 #else
@@ -104,8 +105,9 @@ redirect_target(struct sk_buff **pskb,
 	return ip_nat_setup_info(ct, &newrange, hooknum);
 }
 
-static struct ipt_target redirect_reg = {
+static struct xt_target redirect_reg = {
 	.name		= "REDIRECT",
+	.family		= AF_INET,
 	.target		= redirect_target,
 	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
 	.table		= "nat",
@@ -116,12 +118,12 @@ static struct ipt_target redirect_reg = {
 
 static int __init ipt_redirect_init(void)
 {
-	return ipt_register_target(&redirect_reg);
+	return xt_register_target(&redirect_reg);
 }
 
 static void __exit ipt_redirect_fini(void)
 {
-	ipt_unregister_target(&redirect_reg);
+	xt_unregister_target(&redirect_reg);
 }
 
 module_init(ipt_redirect_init);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index c9cad23844d7..e4a1ddb386a7 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -22,6 +22,7 @@
 #include <net/tcp.h>
 #include <net/route.h>
 #include <net/dst.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_REJECT.h>
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -230,7 +231,7 @@ static int check(const char *tablename,
 	} else if (rejinfo->with == IPT_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ip.proto != IPPROTO_TCP
-		    || (e->ip.invflags & IPT_INV_PROTO)) {
+		    || (e->ip.invflags & XT_INV_PROTO)) {
 			DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n");
 			return 0;
 		}
@@ -238,8 +239,9 @@ static int check(const char *tablename,
 	return 1;
 }
 
-static struct ipt_target ipt_reject_reg = {
+static struct xt_target ipt_reject_reg = {
 	.name		= "REJECT",
+	.family		= AF_INET,
 	.target		= reject,
 	.targetsize	= sizeof(struct ipt_reject_info),
 	.table		= "filter",
@@ -251,12 +253,12 @@ static struct ipt_target ipt_reject_reg = {
 
 static int __init ipt_reject_init(void)
 {
-	return ipt_register_target(&ipt_reject_reg);
+	return xt_register_target(&ipt_reject_reg);
 }
 
 static void __exit ipt_reject_fini(void)
 {
-	ipt_unregister_target(&ipt_reject_reg);
+	xt_unregister_target(&ipt_reject_reg);
 }
 
 module_init(ipt_reject_init);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 3dcf29411337..a1cdd1262de2 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -34,6 +34,7 @@
 #include <net/protocol.h>
 #include <net/checksum.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/x_tables.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_rule.h>
 #else
@@ -186,8 +187,9 @@ same_target(struct sk_buff **pskb,
 	return ip_nat_setup_info(ct, &newrange, hooknum);
 }
 
-static struct ipt_target same_reg = { 
+static struct xt_target same_reg = {
 	.name		= "SAME",
+	.family		= AF_INET,
 	.target		= same_target,
 	.targetsize	= sizeof(struct ipt_same_info),
 	.table		= "nat",
@@ -199,12 +201,12 @@ static struct ipt_target same_reg = {
 
 static int __init ipt_same_init(void)
 {
-	return ipt_register_target(&same_reg);
+	return xt_register_target(&same_reg);
 }
 
 static void __exit ipt_same_fini(void)
 {
-	ipt_unregister_target(&same_reg);
+	xt_unregister_target(&same_reg);
 }
 
 module_init(ipt_same_init);
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 18e74ac4d425..29b05a6bd108 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -13,7 +13,7 @@
 #include <linux/ip.h>
 #include <net/checksum.h>
 
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ipt_TOS.h>
 
 MODULE_LICENSE("GPL");
@@ -40,7 +40,7 @@ target(struct sk_buff **pskb,
 		iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
 		nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
 	}
-	return IPT_CONTINUE;
+	return XT_CONTINUE;
 }
 
 static int
@@ -63,8 +63,9 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_target ipt_tos_reg = {
+static struct xt_target ipt_tos_reg = {
 	.name		= "TOS",
+	.family		= AF_INET,
 	.target		= target,
 	.targetsize	= sizeof(struct ipt_tos_target_info),
 	.table		= "mangle",
@@ -74,12 +75,12 @@ static struct ipt_target ipt_tos_reg = {
 
 static int __init ipt_tos_init(void)
 {
-	return ipt_register_target(&ipt_tos_reg);
+	return xt_register_target(&ipt_tos_reg);
 }
 
 static void __exit ipt_tos_fini(void)
 {
-	ipt_unregister_target(&ipt_tos_reg);
+	xt_unregister_target(&ipt_tos_reg);
 }
 
 module_init(ipt_tos_init);
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index fffe5ca82e91..d2b6fa3f9dcd 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -12,7 +12,7 @@
 #include <linux/ip.h>
 #include <net/checksum.h>
 
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ipt_TTL.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
@@ -59,7 +59,7 @@ ipt_ttl_target(struct sk_buff **pskb,
 		iph->ttl = new_ttl;
 	}
 
-	return IPT_CONTINUE;
+	return XT_CONTINUE;
 }
 
 static int ipt_ttl_checkentry(const char *tablename,
@@ -80,8 +80,9 @@ static int ipt_ttl_checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_target ipt_TTL = { 
+static struct xt_target ipt_TTL = {
 	.name 		= "TTL",
+	.family		= AF_INET,
 	.target 	= ipt_ttl_target, 
 	.targetsize	= sizeof(struct ipt_TTL_info),
 	.table		= "mangle",
@@ -91,12 +92,12 @@ static struct ipt_target ipt_TTL = {
 
 static int __init ipt_ttl_init(void)
 {
-	return ipt_register_target(&ipt_TTL);
+	return xt_register_target(&ipt_TTL);
 }
 
 static void __exit ipt_ttl_fini(void)
 {
-	ipt_unregister_target(&ipt_TTL);
+	xt_unregister_target(&ipt_TTL);
 }
 
 module_init(ipt_ttl_init);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index a47e279eaac2..7af57a3a1f36 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -57,7 +57,7 @@
 #include <linux/mm.h>
 #include <linux/moduleparam.h>
 #include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ipt_ULOG.h>
 #include <net/sock.h>
 #include <linux/bitops.h>
@@ -132,7 +132,6 @@ static void ulog_send(unsigned int nlgroupnum)
 	ub->qlen = 0;
 	ub->skb = NULL;
 	ub->lastnlh = NULL;
-
 }
 
 
@@ -314,7 +313,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb,
 
 	ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL);
  
- 	return IPT_CONTINUE;
+	return XT_CONTINUE;
 }
  
 static void ipt_logfn(unsigned int pf,
@@ -363,8 +362,9 @@ static int ipt_ulog_checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_target ipt_ulog_reg = {
+static struct xt_target ipt_ulog_reg = {
 	.name		= "ULOG",
+	.family		= AF_INET,
 	.target		= ipt_ulog_target,
 	.targetsize	= sizeof(struct ipt_ulog_info),
 	.checkentry	= ipt_ulog_checkentry,
@@ -400,7 +400,7 @@ static int __init ipt_ulog_init(void)
 	if (!nflognl)
 		return -ENOMEM;
 
-	ret = ipt_register_target(&ipt_ulog_reg);
+	ret = xt_register_target(&ipt_ulog_reg);
 	if (ret < 0) {
 		sock_release(nflognl->sk_socket);
 		return ret;
@@ -420,7 +420,7 @@ static void __exit ipt_ulog_fini(void)
 
 	if (nflog)
 		nf_log_unregister_logger(&ipt_ulog_logger);
-	ipt_unregister_target(&ipt_ulog_reg);
+	xt_unregister_target(&ipt_ulog_reg);
 	sock_release(nflognl->sk_socket);
 
 	/* remove pending timers and free allocated skb's */
@@ -436,7 +436,6 @@ static void __exit ipt_ulog_fini(void)
 			ub->skb = NULL;
 		}
 	}
-
 }
 
 module_init(ipt_ulog_init);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 7b60eb74788b..648f555c4d16 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -16,7 +16,7 @@
 #include <net/route.h>
 
 #include <linux/netfilter_ipv4/ipt_addrtype.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
@@ -44,8 +44,9 @@ static int match(const struct sk_buff *skb,
 	return ret;
 }
 
-static struct ipt_match addrtype_match = {
+static struct xt_match addrtype_match = {
 	.name		= "addrtype",
+	.family		= AF_INET,
 	.match		= match,
 	.matchsize	= sizeof(struct ipt_addrtype_info),
 	.me		= THIS_MODULE
@@ -53,12 +54,12 @@ static struct ipt_match addrtype_match = {
 
 static int __init ipt_addrtype_init(void)
 {
-	return ipt_register_match(&addrtype_match);
+	return xt_register_match(&addrtype_match);
 }
 
 static void __exit ipt_addrtype_fini(void)
 {
-	ipt_unregister_match(&addrtype_match);
+	xt_unregister_match(&addrtype_match);
 }
 
 module_init(ipt_addrtype_init);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 1798f86bc534..42f41224a43a 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -6,12 +6,13 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/in.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 
 #include <linux/netfilter_ipv4/ipt_ah.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
@@ -86,8 +87,9 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_match ah_match = {
+static struct xt_match ah_match = {
 	.name		= "ah",
+	.family		= AF_INET,
 	.match		= match,
 	.matchsize	= sizeof(struct ipt_ah),
 	.proto		= IPPROTO_AH,
@@ -97,12 +99,12 @@ static struct ipt_match ah_match = {
 
 static int __init ipt_ah_init(void)
 {
-	return ipt_register_match(&ah_match);
+	return xt_register_match(&ah_match);
 }
 
 static void __exit ipt_ah_fini(void)
 {
-	ipt_unregister_match(&ah_match);
+	xt_unregister_match(&ah_match);
 }
 
 module_init(ipt_ah_init);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index dafbdec0efc0..37508b2cfea6 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -9,10 +9,13 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/tcp.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_ecn.h>
 
@@ -109,8 +112,9 @@ static int checkentry(const char *tablename, const void *ip_void,
 	return 1;
 }
 
-static struct ipt_match ecn_match = {
+static struct xt_match ecn_match = {
 	.name		= "ecn",
+	.family		= AF_INET,
 	.match		= match,
 	.matchsize	= sizeof(struct ipt_ecn_info),
 	.checkentry	= checkentry,
@@ -119,12 +123,12 @@ static struct ipt_match ecn_match = {
 
 static int __init ipt_ecn_init(void)
 {
-	return ipt_register_match(&ecn_match);
+	return xt_register_match(&ecn_match);
 }
 
 static void __exit ipt_ecn_fini(void)
 {
-	ipt_unregister_match(&ecn_match);
+	xt_unregister_match(&ecn_match);
 }
 
 module_init(ipt_ecn_init);
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index 5202edd8d333..05de593be94c 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ipt_iprange.h>
 
 MODULE_LICENSE("GPL");
@@ -63,22 +63,22 @@ match(const struct sk_buff *skb,
 	return 1;
 }
 
-static struct ipt_match iprange_match = {
+static struct xt_match iprange_match = {
 	.name		= "iprange",
+	.family		= AF_INET,
 	.match		= match,
 	.matchsize	= sizeof(struct ipt_iprange_info),
-	.destroy	= NULL,
 	.me		= THIS_MODULE
 };
 
 static int __init ipt_iprange_init(void)
 {
-	return ipt_register_match(&iprange_match);
+	return xt_register_match(&iprange_match);
 }
 
 static void __exit ipt_iprange_fini(void)
 {
-	ipt_unregister_match(&iprange_match);
+	xt_unregister_match(&iprange_match);
 }
 
 module_init(ipt_iprange_init);
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 78c336f12a9e..9f496ac834b5 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -15,7 +15,7 @@
 #include <net/sock.h>
 
 #include <linux/netfilter_ipv4/ipt_owner.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
@@ -68,8 +68,9 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_match owner_match = {
+static struct xt_match owner_match = {
 	.name		= "owner",
+	.family		= AF_INET,
 	.match		= match,
 	.matchsize	= sizeof(struct ipt_owner_info),
 	.hooks		= (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING),
@@ -79,12 +80,12 @@ static struct ipt_match owner_match = {
 
 static int __init ipt_owner_init(void)
 {
-	return ipt_register_match(&owner_match);
+	return xt_register_match(&owner_match);
 }
 
 static void __exit ipt_owner_fini(void)
 {
-	ipt_unregister_match(&owner_match);
+	xt_unregister_match(&owner_match);
 }
 
 module_init(ipt_owner_init);
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 4db0e73c56f1..6b97b6796173 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -12,6 +12,7 @@
  * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
  */
 #include <linux/init.h>
+#include <linux/ip.h>
 #include <linux/moduleparam.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -24,7 +25,7 @@
 #include <linux/skbuff.h>
 #include <linux/inet.h>
 
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ipt_recent.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
@@ -462,8 +463,9 @@ static struct file_operations recent_fops = {
 };
 #endif /* CONFIG_PROC_FS */
 
-static struct ipt_match recent_match = {
+static struct xt_match recent_match = {
 	.name		= "recent",
+	.family		= AF_INET,
 	.match		= ipt_recent_match,
 	.matchsize	= sizeof(struct ipt_recent_info),
 	.checkentry	= ipt_recent_checkentry,
@@ -479,13 +481,13 @@ static int __init ipt_recent_init(void)
 		return -EINVAL;
 	ip_list_hash_size = 1 << fls(ip_list_tot);
 
-	err = ipt_register_match(&recent_match);
+	err = xt_register_match(&recent_match);
 #ifdef CONFIG_PROC_FS
 	if (err)
 		return err;
 	proc_dir = proc_mkdir("ipt_recent", proc_net);
 	if (proc_dir == NULL) {
-		ipt_unregister_match(&recent_match);
+		xt_unregister_match(&recent_match);
 		err = -ENOMEM;
 	}
 #endif
@@ -495,7 +497,7 @@ static int __init ipt_recent_init(void)
 static void __exit ipt_recent_exit(void)
 {
 	BUG_ON(!list_empty(&tables));
-	ipt_unregister_match(&recent_match);
+	xt_unregister_match(&recent_match);
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry("ipt_recent", proc_net);
 #endif
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 5549c39c7851..5d33b51d49d8 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -8,11 +8,12 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
 #include <linux/netfilter_ipv4/ipt_tos.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("iptables TOS match module");
@@ -32,8 +33,9 @@ match(const struct sk_buff *skb,
 	return (skb->nh.iph->tos == info->tos) ^ info->invert;
 }
 
-static struct ipt_match tos_match = {
+static struct xt_match tos_match = {
 	.name		= "tos",
+	.family		= AF_INET,
 	.match		= match,
 	.matchsize	= sizeof(struct ipt_tos_info),
 	.me		= THIS_MODULE,
@@ -41,12 +43,12 @@ static struct ipt_match tos_match = {
 
 static int __init ipt_multiport_init(void)
 {
-	return ipt_register_match(&tos_match);
+	return xt_register_match(&tos_match);
 }
 
 static void __exit ipt_multiport_fini(void)
 {
-	ipt_unregister_match(&tos_match);
+	xt_unregister_match(&tos_match);
 }
 
 module_init(ipt_multiport_init);
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index a5243bdb87d7..d5cd984e5ed2 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -9,11 +9,12 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
 #include <linux/netfilter_ipv4/ipt_ttl.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("IP tables TTL matching module");
@@ -48,8 +49,9 @@ static int match(const struct sk_buff *skb,
 	return 0;
 }
 
-static struct ipt_match ttl_match = {
+static struct xt_match ttl_match = {
 	.name		= "ttl",
+	.family		= AF_INET,
 	.match		= match,
 	.matchsize	= sizeof(struct ipt_ttl_info),
 	.me		= THIS_MODULE,
@@ -57,13 +59,12 @@ static struct ipt_match ttl_match = {
 
 static int __init ipt_ttl_init(void)
 {
-	return ipt_register_match(&ttl_match);
+	return xt_register_match(&ttl_match);
 }
 
 static void __exit ipt_ttl_fini(void)
 {
-	ipt_unregister_match(&ttl_match);
-
+	xt_unregister_match(&ttl_match);
 }
 
 module_init(ipt_ttl_init);
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 3745efe70302..de25d63f543c 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -294,7 +294,7 @@ int nf_nat_rule_find(struct sk_buff **pskb,
 	return ret;
 }
 
-static struct ipt_target ipt_snat_reg = {
+static struct xt_target ipt_snat_reg = {
 	.name		= "SNAT",
 	.target		= ipt_snat_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 99502c5da4c4..7083e1cfb2f5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -530,7 +530,7 @@ check_match(struct ip6t_entry_match *m,
 	    unsigned int hookmask,
 	    unsigned int *i)
 {
-	struct ip6t_match *match;
+	struct xt_match *match;
 	int ret;
 
 	match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
@@ -564,14 +564,14 @@ err:
 	return ret;
 }
 
-static struct ip6t_target ip6t_standard_target;
+static struct xt_target ip6t_standard_target;
 
 static inline int
 check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 	    unsigned int *i)
 {
 	struct ip6t_entry_target *t;
-	struct ip6t_target *target;
+	struct xt_target *target;
 	int ret;
 	unsigned int j;
 
@@ -1348,13 +1348,13 @@ icmp6_checkentry(const char *tablename,
 }
 
 /* The built-in targets: standard (NULL) and error. */
-static struct ip6t_target ip6t_standard_target = {
+static struct xt_target ip6t_standard_target = {
 	.name		= IP6T_STANDARD_TARGET,
 	.targetsize	= sizeof(int),
 	.family		= AF_INET6,
 };
 
-static struct ip6t_target ip6t_error_target = {
+static struct xt_target ip6t_error_target = {
 	.name		= IP6T_ERROR_TARGET,
 	.target		= ip6t_error,
 	.targetsize	= IP6T_FUNCTION_MAXNAMELEN,
@@ -1371,7 +1371,7 @@ static struct nf_sockopt_ops ip6t_sockopts = {
 	.get		= do_ip6t_get_ctl,
 };
 
-static struct ip6t_match icmp6_matchstruct = {
+static struct xt_match icmp6_matchstruct = {
 	.name		= "icmp6",
 	.match		= &icmp6_match,
 	.matchsize	= sizeof(struct ip6t_icmp),
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index 7e5d51386f56..04e500172fb4 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -9,12 +9,13 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
+#include <linux/ipv6.h>
 
-#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6t_HL.h>
 
 MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
-MODULE_DESCRIPTION("IP tables Hop Limit modification module");
+MODULE_DESCRIPTION("IP6 tables Hop Limit modification module");
 MODULE_LICENSE("GPL");
 
 static unsigned int ip6t_hl_target(struct sk_buff **pskb, 
@@ -54,7 +55,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
 
 	ip6h->hop_limit = new_hl;
 
-	return IP6T_CONTINUE;
+	return XT_CONTINUE;
 }
 
 static int ip6t_hl_checkentry(const char *tablename,
@@ -78,8 +79,9 @@ static int ip6t_hl_checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ip6t_target ip6t_HL = { 
+static struct xt_target ip6t_HL = {
 	.name 		= "HL", 
+	.family		= AF_INET6,
 	.target		= ip6t_hl_target, 
 	.targetsize	= sizeof(struct ip6t_HL_info),
 	.table		= "mangle",
@@ -89,12 +91,12 @@ static struct ip6t_target ip6t_HL = {
 
 static int __init ip6t_hl_init(void)
 {
-	return ip6t_register_target(&ip6t_HL);
+	return xt_register_target(&ip6t_HL);
 }
 
 static void __exit ip6t_hl_fini(void)
 {
-	ip6t_unregister_target(&ip6t_HL);
+	xt_unregister_target(&ip6t_HL);
 }
 
 module_init(ip6t_hl_init);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 9fe0816bb21d..5587a77b884c 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -21,6 +21,7 @@
 #include <net/tcp.h>
 #include <net/ipv6.h>
 #include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
 MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
@@ -442,7 +443,7 @@ ip6t_log_target(struct sk_buff **pskb,
 
 	ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li,
 	                loginfo->prefix);
-	return IP6T_CONTINUE;
+	return XT_CONTINUE;
 }
 
 
@@ -466,8 +467,9 @@ static int ip6t_log_checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ip6t_target ip6t_log_reg = {
+static struct xt_target ip6t_log_reg = {
 	.name 		= "LOG",
+	.family		= AF_INET6,
 	.target 	= ip6t_log_target, 
 	.targetsize	= sizeof(struct ip6t_log_info),
 	.checkentry	= ip6t_log_checkentry, 
@@ -484,7 +486,7 @@ static int __init ip6t_log_init(void)
 {
 	int ret;
 
-	ret = ip6t_register_target(&ip6t_log_reg);
+	ret = xt_register_target(&ip6t_log_reg);
 	if (ret < 0)
 		return ret;
 	if (nf_log_register(PF_INET6, &ip6t_logger) < 0) {
@@ -500,7 +502,7 @@ static int __init ip6t_log_init(void)
 static void __exit ip6t_log_fini(void)
 {
 	nf_log_unregister_logger(&ip6t_logger);
-	ip6t_unregister_target(&ip6t_log_reg);
+	xt_unregister_target(&ip6t_log_reg);
 }
 
 module_init(ip6t_log_init);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 311eae82feb3..278349c18793 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -26,6 +26,7 @@
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
 #include <net/flow.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_REJECT.h>
 
@@ -234,7 +235,7 @@ static int check(const char *tablename,
 	} else if (rejinfo->with == IP6T_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ipv6.proto != IPPROTO_TCP
-		    || (e->ipv6.invflags & IP6T_INV_PROTO)) {
+		    || (e->ipv6.invflags & XT_INV_PROTO)) {
 			DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n");
 			return 0;
 		}
@@ -242,8 +243,9 @@ static int check(const char *tablename,
 	return 1;
 }
 
-static struct ip6t_target ip6t_reject_reg = {
+static struct xt_target ip6t_reject_reg = {
 	.name		= "REJECT",
+	.family		= AF_INET6,
 	.target		= reject6_target,
 	.targetsize	= sizeof(struct ip6t_reject_info),
 	.table		= "filter",
@@ -255,12 +257,12 @@ static struct ip6t_target ip6t_reject_reg = {
 
 static int __init ip6t_reject_init(void)
 {
-	return ip6t_register_target(&ip6t_reject_reg);
+	return xt_register_target(&ip6t_reject_reg);
 }
 
 static void __exit ip6t_reject_fini(void)
 {
-	ip6t_unregister_target(&ip6t_reject_reg);
+	xt_unregister_target(&ip6t_reject_reg);
 }
 
 module_init(ip6t_reject_init);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 46486645eb75..456c76adcbf6 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -15,6 +15,7 @@
 #include <net/checksum.h>
 #include <net/ipv6.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_ah.h>
 
@@ -118,8 +119,9 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ip6t_match ah_match = {
+static struct xt_match ah_match = {
 	.name		= "ah",
+	.family		= AF_INET6,
 	.match		= match,
 	.matchsize	= sizeof(struct ip6t_ah),
 	.checkentry	= checkentry,
@@ -128,12 +130,12 @@ static struct ip6t_match ah_match = {
 
 static int __init ip6t_ah_init(void)
 {
-	return ip6t_register_match(&ah_match);
+	return xt_register_match(&ah_match);
 }
 
 static void __exit ip6t_ah_fini(void)
 {
-	ip6t_unregister_match(&ah_match);
+	xt_unregister_match(&ah_match);
 }
 
 module_init(ip6t_ah_init);
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 4f6b84c8f4ab..967bed71d4a8 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -12,6 +12,7 @@
 #include <linux/ipv6.h>
 #include <linux/if_ether.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
 MODULE_DESCRIPTION("IPv6 EUI64 address checking match");
@@ -61,8 +62,9 @@ match(const struct sk_buff *skb,
 	return 0;
 }
 
-static struct ip6t_match eui64_match = {
+static struct xt_match eui64_match = {
 	.name		= "eui64",
+	.family		= AF_INET6,
 	.match		= match,
 	.matchsize	= sizeof(int),
 	.hooks		= (1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) |
@@ -72,12 +74,12 @@ static struct ip6t_match eui64_match = {
 
 static int __init ip6t_eui64_init(void)
 {
-	return ip6t_register_match(&eui64_match);
+	return xt_register_match(&eui64_match);
 }
 
 static void __exit ip6t_eui64_fini(void)
 {
-	ip6t_unregister_match(&eui64_match);
+	xt_unregister_match(&eui64_match);
 }
 
 module_init(ip6t_eui64_init);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index cd22eaaccdca..5a5da71321b6 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -14,6 +14,7 @@
 #include <net/checksum.h>
 #include <net/ipv6.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_frag.h>
 
@@ -135,8 +136,9 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ip6t_match frag_match = {
+static struct xt_match frag_match = {
 	.name		= "frag",
+	.family		= AF_INET6,
 	.match		= match,
 	.matchsize	= sizeof(struct ip6t_frag),
 	.checkentry	= checkentry,
@@ -145,12 +147,12 @@ static struct ip6t_match frag_match = {
 
 static int __init ip6t_frag_init(void)
 {
-	return ip6t_register_match(&frag_match);
+	return xt_register_match(&frag_match);
 }
 
 static void __exit ip6t_frag_fini(void)
 {
-	ip6t_unregister_match(&frag_match);
+	xt_unregister_match(&frag_match);
 }
 
 module_init(ip6t_frag_init);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 3f25babe0440..d2373c7cd354 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -16,6 +16,7 @@
 
 #include <asm/byteorder.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_opts.h>
 
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index 44a729e17c48..601cc1211c62 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -8,11 +8,12 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/ipv6.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
 #include <linux/netfilter_ipv6/ip6t_hl.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
 MODULE_DESCRIPTION("IP tables Hop Limit matching module");
@@ -48,8 +49,9 @@ static int match(const struct sk_buff *skb,
 	return 0;
 }
 
-static struct ip6t_match hl_match = {
+static struct xt_match hl_match = {
 	.name		= "hl",
+	.family		= AF_INET6,
 	.match		= match,
 	.matchsize	= sizeof(struct ip6t_hl_info),
 	.me		= THIS_MODULE,
@@ -57,13 +59,12 @@ static struct ip6t_match hl_match = {
 
 static int __init ip6t_hl_init(void)
 {
-	return ip6t_register_match(&hl_match);
+	return xt_register_match(&hl_match);
 }
 
 static void __exit ip6t_hl_fini(void)
 {
-	ip6t_unregister_match(&hl_match);
-
+	xt_unregister_match(&hl_match);
 }
 
 module_init(ip6t_hl_init);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 3093c398002f..26ac084adefc 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -18,6 +18,7 @@
 #include <net/checksum.h>
 #include <net/ipv6.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_ipv6header.h>
 
@@ -140,8 +141,9 @@ ipv6header_checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ip6t_match ip6t_ipv6header_match = {
+static struct xt_match ip6t_ipv6header_match = {
 	.name		= "ipv6header",
+	.family		= AF_INET6,
 	.match		= &ipv6header_match,
 	.matchsize	= sizeof(struct ip6t_ipv6header_info),
 	.checkentry	= &ipv6header_checkentry,
@@ -151,12 +153,12 @@ static struct ip6t_match ip6t_ipv6header_match = {
 
 static int __init ipv6header_init(void)
 {
-	return ip6t_register_match(&ip6t_ipv6header_match);
+	return xt_register_match(&ip6t_ipv6header_match);
 }
 
 static void __exit ipv6header_exit(void)
 {
-	ip6t_unregister_match(&ip6t_ipv6header_match);
+	xt_unregister_match(&ip6t_ipv6header_match);
 }
 
 module_init(ipv6header_init);
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index 4eb9bbc4ebc3..43738bba00b5 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -16,6 +16,7 @@
 
 #include <linux/netfilter_ipv6/ip6t_owner.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("IP6 tables owner matching module");
@@ -69,8 +70,9 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ip6t_match owner_match = {
+static struct xt_match owner_match = {
 	.name		= "owner",
+	.family		= AF_INET6,
 	.match		= match,
 	.matchsize	= sizeof(struct ip6t_owner_info),
 	.hooks		= (1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING),
@@ -80,12 +82,12 @@ static struct ip6t_match owner_match = {
 
 static int __init ip6t_owner_init(void)
 {
-	return ip6t_register_match(&owner_match);
+	return xt_register_match(&owner_match);
 }
 
 static void __exit ip6t_owner_fini(void)
 {
-	ip6t_unregister_match(&owner_match);
+	xt_unregister_match(&owner_match);
 }
 
 module_init(ip6t_owner_init);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 54d7d14134fd..81ab00d8c182 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -16,6 +16,7 @@
 
 #include <asm/byteorder.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_rt.h>
 
@@ -221,8 +222,9 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ip6t_match rt_match = {
+static struct xt_match rt_match = {
 	.name		= "rt",
+	.family		= AF_INET6,
 	.match		= match,
 	.matchsize	= sizeof(struct ip6t_rt),
 	.checkentry	= checkentry,
@@ -231,12 +233,12 @@ static struct ip6t_match rt_match = {
 
 static int __init ip6t_rt_init(void)
 {
-	return ip6t_register_match(&rt_match);
+	return xt_register_match(&rt_match);
 }
 
 static void __exit ip6t_rt_fini(void)
 {
-	ip6t_unregister_match(&rt_match);
+	xt_unregister_match(&rt_match);
 }
 
 module_init(ip6t_rt_init);
-- 
cgit v1.2.3


From e60a13e030867078f3c9fef8dca6cd8a5b883478 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Wed, 7 Feb 2007 15:12:33 -0800
Subject: [NETFILTER]: {ip,ip6}_tables: use struct xt_table instead of
 redefined structure names

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_tables.h  |  9 +++------
 include/linux/netfilter_ipv6/ip6_tables.h |  6 +++---
 net/ipv4/netfilter/ip_nat_rule.c          |  2 +-
 net/ipv4/netfilter/ip_tables.c            | 20 ++++++++++----------
 net/ipv4/netfilter/iptable_filter.c       |  2 +-
 net/ipv4/netfilter/iptable_mangle.c       |  2 +-
 net/ipv4/netfilter/iptable_raw.c          |  2 +-
 net/ipv4/netfilter/nf_nat_rule.c          |  2 +-
 net/ipv6/netfilter/ip6table_filter.c      |  2 +-
 net/ipv6/netfilter/ip6table_mangle.c      |  2 +-
 net/sched/act_ipt.c                       |  2 +-
 11 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index c59bc6ff2280..fccf4b873232 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -272,12 +272,9 @@ ipt_get_target(struct ipt_entry *e)
 #include <linux/init.h>
 extern void ipt_init(void) __init;
 
-//#define ipt_register_table(tbl, repl) xt_register_table(AF_INET, tbl, repl)
-//#define ipt_unregister_table(tbl) xt_unregister_table(AF_INET, tbl)
-
-extern int ipt_register_table(struct ipt_table *table,
+extern int ipt_register_table(struct xt_table *table,
 			      const struct ipt_replace *repl);
-extern void ipt_unregister_table(struct ipt_table *table);
+extern void ipt_unregister_table(struct xt_table *table);
 
 /* net/sched/ipt.c: Gimme access to your targets!  Gets target->me. */
 extern struct xt_target *ipt_find_target(const char *name, u8 revision);
@@ -305,7 +302,7 @@ extern unsigned int ipt_do_table(struct sk_buff **pskb,
 				 unsigned int hook,
 				 const struct net_device *in,
 				 const struct net_device *out,
-				 struct ipt_table *table);
+				 struct xt_table *table);
 
 #define IPT_ALIGN(s) XT_ALIGN(s)
 
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 2fbabab30d21..e37698c337ad 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -286,14 +286,14 @@ ip6t_get_target(struct ip6t_entry *e)
 #include <linux/init.h>
 extern void ip6t_init(void) __init;
 
-extern int ip6t_register_table(struct ip6t_table *table,
+extern int ip6t_register_table(struct xt_table *table,
 			       const struct ip6t_replace *repl);
-extern void ip6t_unregister_table(struct ip6t_table *table);
+extern void ip6t_unregister_table(struct xt_table *table);
 extern unsigned int ip6t_do_table(struct sk_buff **pskb,
 				  unsigned int hook,
 				  const struct net_device *in,
 				  const struct net_device *out,
-				  struct ip6t_table *table);
+				  struct xt_table *table);
 
 /* Check for an extension */
 extern int ip6t_ext_hdr(u8 nexthdr);
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 7a8e7bb577e2..e1c8a05f3dc6 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -86,7 +86,7 @@ static struct
     }
 };
 
-static struct ipt_table nat_table = {
+static struct xt_table nat_table = {
 	.name		= "nat",
 	.valid_hooks	= NAT_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 0043e908b130..5a7b3a341389 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -216,7 +216,7 @@ ipt_do_table(struct sk_buff **pskb,
 	     unsigned int hook,
 	     const struct net_device *in,
 	     const struct net_device *out,
-	     struct ipt_table *table)
+	     struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	u_int16_t offset;
@@ -818,7 +818,7 @@ get_counters(const struct xt_table_info *t,
 	}
 }
 
-static inline struct xt_counters * alloc_counters(struct ipt_table *table)
+static inline struct xt_counters * alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
@@ -843,7 +843,7 @@ static inline struct xt_counters * alloc_counters(struct ipt_table *table)
 
 static int
 copy_entries_to_user(unsigned int total_size,
-		     struct ipt_table *table,
+		     struct xt_table *table,
 		     void __user *userptr)
 {
 	unsigned int off, num;
@@ -1046,7 +1046,7 @@ static int compat_table_info(struct xt_table_info *info,
 static int get_info(void __user *user, int *len, int compat)
 {
 	char name[IPT_TABLE_MAXNAMELEN];
-	struct ipt_table *t;
+	struct xt_table *t;
 	int ret;
 
 	if (*len != sizeof(struct ipt_getinfo)) {
@@ -1107,7 +1107,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len)
 {
 	int ret;
 	struct ipt_get_entries get;
-	struct ipt_table *t;
+	struct xt_table *t;
 
 	if (*len < sizeof(get)) {
 		duprintf("get_entries: %u < %d\n", *len,
@@ -1151,7 +1151,7 @@ __do_replace(const char *name, unsigned int valid_hooks,
 		void __user *counters_ptr)
 {
 	int ret;
-	struct ipt_table *t;
+	struct xt_table *t;
 	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
 	void *loc_cpu_old_entry;
@@ -1302,7 +1302,7 @@ do_add_counters(void __user *user, unsigned int len, int compat)
 	char *name;
 	int size;
 	void *ptmp;
-	struct ipt_table *t;
+	struct xt_table *t;
 	struct xt_table_info *private;
 	int ret = 0;
 	void *loc_cpu_entry;
@@ -1795,7 +1795,7 @@ struct compat_ipt_get_entries
 };
 
 static int compat_copy_entries_to_user(unsigned int total_size,
-		     struct ipt_table *table, void __user *userptr)
+		     struct xt_table *table, void __user *userptr)
 {
 	unsigned int off, num;
 	struct compat_ipt_entry e;
@@ -1869,7 +1869,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
 {
 	int ret;
 	struct compat_ipt_get_entries get;
-	struct ipt_table *t;
+	struct xt_table *t;
 
 
 	if (*len < sizeof(get)) {
@@ -2052,7 +2052,7 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
 	return 0;
 }
 
-void ipt_unregister_table(struct ipt_table *table)
+void ipt_unregister_table(struct xt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index e2e7dd8d7903..51053cb42f43 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -74,7 +74,7 @@ static struct
     }
 };
 
-static struct ipt_table packet_filter = {
+static struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index af2939889444..a532e4d84332 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -103,7 +103,7 @@ static struct
     }
 };
 
-static struct ipt_table packet_mangler = {
+static struct xt_table packet_mangler = {
 	.name		= "mangle",
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index bcbeb4aeacd9..5277550fa6b5 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -79,7 +79,7 @@ static struct
 	}
 };
 
-static struct ipt_table packet_raw = { 
+static struct xt_table packet_raw = {
 	.name = "raw", 
 	.valid_hooks =  RAW_VALID_HOOKS, 
 	.lock = RW_LOCK_UNLOCKED, 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index de25d63f543c..7f95b4e2eb31 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -119,7 +119,7 @@ static struct
 	}
 };
 
-static struct ipt_table nat_table = {
+static struct xt_table nat_table = {
 	.name		= "nat",
 	.valid_hooks	= NAT_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 2fc07c74decf..45b8342875a8 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -92,7 +92,7 @@ static struct
     }
 };
 
-static struct ip6t_table packet_filter = {
+static struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 6250e86a6ddc..6f67ee190fa7 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -122,7 +122,7 @@ static struct
     }
 };
 
-static struct ip6t_table packet_mangler = {
+static struct xt_table packet_mangler = {
 	.name		= "mangle",
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 01e69138578d..4c68c718f5ec 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -52,7 +52,7 @@ static struct tcf_hashinfo ipt_hash_info = {
 
 static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
 {
-	struct ipt_target *target;
+	struct xt_target *target;
 	int ret = 0;
 
 	target = xt_request_find_target(AF_INET, t->u.user.name,
-- 
cgit v1.2.3


From a0ca215a730b2c4d5024143e64b0d80d50858667 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 7 Feb 2007 15:12:57 -0800
Subject: [NETFILTER]: ip6_tables: support MH match

This introduces match for Mobility Header (MH) described by Mobile IPv6
specification (RFC3775). User can specify the MH type or its range to be
matched.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: Yasuyuki Kozakai <kozakai@linux-ipv6.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv6/ip6t_mh.h |  15 +++++
 net/ipv6/netfilter/Kconfig             |   8 +++
 net/ipv6/netfilter/Makefile            |   1 +
 net/ipv6/netfilter/ip6t_mh.c           | 108 +++++++++++++++++++++++++++++++++
 4 files changed, 132 insertions(+)
 create mode 100644 include/linux/netfilter_ipv6/ip6t_mh.h
 create mode 100644 net/ipv6/netfilter/ip6t_mh.c

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h
new file mode 100644
index 000000000000..b9ca9a5f74d0
--- /dev/null
+++ b/include/linux/netfilter_ipv6/ip6t_mh.h
@@ -0,0 +1,15 @@
+#ifndef _IP6T_MH_H
+#define _IP6T_MH_H
+
+/* MH matching stuff */
+struct ip6t_mh
+{
+	u_int8_t types[2];	/* MH type range */
+	u_int8_t invflags;	/* Inverse flags */
+};
+
+/* Values for "invflags" field in struct ip6t_mh. */
+#define IP6T_MH_INV_TYPE	0x01	/* Invert the sense of type. */
+#define IP6T_MH_INV_MASK	0x01	/* All possible flags. */
+
+#endif /*_IP6T_MH_H*/
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index adcd6131df2a..cd549aea84f0 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -114,6 +114,14 @@ config IP6_NF_MATCH_AH
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP6_NF_MATCH_MH
+	tristate "MH match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  This module allows one to match MH packets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config IP6_NF_MATCH_EUI64
 	tristate "EUI64 address check"
 	depends on IP6_NF_IPTABLES
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index ac1dfebde175..4513eab77397 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
 obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
+obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
 
 # objects for l3 independent conntrack
 nf_conntrack_ipv6-objs  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
new file mode 100644
index 000000000000..2c7efc6a506d
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C)2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *	Masahide NAKAMURA @USAGI <masahide.nakamura.cz@hitachi.com>
+ *
+ * Based on net/netfilter/xt_tcpudp.c
+ *
+ */
+#include <linux/types.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/mip6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6t_mh.h>
+
+MODULE_DESCRIPTION("ip6t_tables match for MH");
+MODULE_LICENSE("GPL");
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Returns 1 if the type is matched by the range, 0 otherwise */
+static inline int
+type_match(u_int8_t min, u_int8_t max, u_int8_t type, int invert)
+{
+	int ret;
+
+	ret = (type >= min && type <= max) ^ invert;
+	return ret;
+}
+
+static int
+match(const struct sk_buff *skb,
+	 const struct net_device *in,
+	 const struct net_device *out,
+	 const struct xt_match *match,
+	 const void *matchinfo,
+	 int offset,
+	 unsigned int protoff,
+	 int *hotdrop)
+{
+	struct ip6_mh _mh, *mh;
+	const struct ip6t_mh *mhinfo = matchinfo;
+
+	/* Must not be a fragment. */
+	if (offset)
+		return 0;
+
+	mh = skb_header_pointer(skb, protoff, sizeof(_mh), &_mh);
+	if (mh == NULL) {
+		/* We've been asked to examine this packet, and we
+		   can't.  Hence, no choice but to drop. */
+		duprintf("Dropping evil MH tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type,
+			  !!(mhinfo->invflags & IP6T_MH_INV_TYPE));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+mh_checkentry(const char *tablename,
+	      const void *entry,
+	      const struct xt_match *match,
+	      void *matchinfo,
+	      unsigned int hook_mask)
+{
+	const struct ip6t_mh *mhinfo = matchinfo;
+
+	/* Must specify no unknown invflags */
+	return !(mhinfo->invflags & ~IP6T_MH_INV_MASK);
+}
+
+static struct xt_match mh_match = {
+	.name		= "mh",
+	.family		= AF_INET6,
+	.checkentry	= mh_checkentry,
+	.match		= match,
+	.matchsize	= sizeof(struct ip6t_mh),
+	.proto		= IPPROTO_MH,
+	.me		= THIS_MODULE,
+};
+
+static int __init ip6t_mh_init(void)
+{
+	return xt_register_match(&mh_match);
+}
+
+static void __exit ip6t_mh_fini(void)
+{
+	xt_unregister_match(&mh_match);
+}
+
+module_init(ip6t_mh_init);
+module_exit(ip6t_mh_fini);
-- 
cgit v1.2.3


From c3e79c05b45c3d6115d8c46e3012939c71573f13 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 7 Feb 2007 15:13:20 -0800
Subject: [NETFILTER]: ip_tables: remove declaration of non-existant
 ipt_find_target function

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_tables.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index fccf4b873232..9527296595cd 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -276,9 +276,6 @@ extern int ipt_register_table(struct xt_table *table,
 			      const struct ipt_replace *repl);
 extern void ipt_unregister_table(struct xt_table *table);
 
-/* net/sched/ipt.c: Gimme access to your targets!  Gets target->me. */
-extern struct xt_target *ipt_find_target(const char *name, u8 revision);
-
 /* Standard entry. */
 struct ipt_standard
 {
-- 
cgit v1.2.3


From 9934e81c8c4981342dab3e386aff5d4499bea0d2 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 7 Feb 2007 15:14:28 -0800
Subject: [NETFILTER]: ip6_tables: remove redundant structure definitions

Move ip6t_standard/ip6t_error_target/ip6t_error definitions to ip6_tables.h
instead of defining them in each table individually.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv6/ip6_tables.h | 19 +++++++++++++++++++
 net/ipv6/netfilter/ip6table_filter.c      | 19 -------------------
 net/ipv6/netfilter/ip6table_mangle.c      | 19 -------------------
 net/ipv6/netfilter/ip6table_raw.c         | 19 -------------------
 4 files changed, 19 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index e37698c337ad..61aa10412fc8 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -104,6 +104,25 @@ struct ip6t_entry
 	unsigned char elems[0];
 };
 
+/* Standard entry */
+struct ip6t_standard
+{
+	struct ip6t_entry entry;
+	struct ip6t_standard_target target;
+};
+
+struct ip6t_error_target
+{
+	struct ip6t_entry_target target;
+	char errorname[IP6T_FUNCTION_MAXNAMELEN];
+};
+
+struct ip6t_error
+{
+	struct ip6t_entry entry;
+	struct ip6t_error_target target;
+};
+
 /*
  * New IP firewall options for [gs]etsockopt at the RAW IP level.
  * Unlike BSD Linux inherits IP options so you don't have to use
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 45b8342875a8..112a21d0c6da 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -19,25 +19,6 @@ MODULE_DESCRIPTION("ip6tables filter table");
 
 #define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT))
 
-/* Standard entry. */
-struct ip6t_standard
-{
-	struct ip6t_entry entry;
-	struct ip6t_standard_target target;
-};
-
-struct ip6t_error_target
-{
-	struct ip6t_entry_target target;
-	char errorname[IP6T_FUNCTION_MAXNAMELEN];
-};
-
-struct ip6t_error
-{
-	struct ip6t_entry entry;
-	struct ip6t_error_target target;
-};
-
 static struct
 {
 	struct ip6t_replace repl;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 6f67ee190fa7..5f5aa0e51478 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -29,25 +29,6 @@ MODULE_DESCRIPTION("ip6tables mangle table");
 #define DEBUGP(x, args...)
 #endif
 
-/* Standard entry. */
-struct ip6t_standard
-{
-	struct ip6t_entry entry;
-	struct ip6t_standard_target target;
-};
-
-struct ip6t_error_target
-{
-	struct ip6t_entry_target target;
-	char errorname[IP6T_FUNCTION_MAXNAMELEN];
-};
-
-struct ip6t_error
-{
-	struct ip6t_entry entry;
-	struct ip6t_error_target target;
-};
-
 static struct
 {
 	struct ip6t_replace repl;
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index b4154da575c0..277bf34638b4 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -14,25 +14,6 @@
 #define DEBUGP(x, args...)
 #endif
 
-/* Standard entry. */
-struct ip6t_standard
-{
-	struct ip6t_entry entry;
-	struct ip6t_standard_target target;
-};
-
-struct ip6t_error_target
-{
-	struct ip6t_entry_target target;
-	char errorname[IP6T_FUNCTION_MAXNAMELEN];
-};
-
-struct ip6t_error
-{
-	struct ip6t_entry entry;
-	struct ip6t_error_target target;
-};
-
 static struct
 {
 	struct ip6t_replace repl;
-- 
cgit v1.2.3


From 80c9abaabf4283f7cf4a0b3597cd302506635b7f Mon Sep 17 00:00:00 2001
From: Shinta Sugimoto <shinta.sugimoto@ericsson.com>
Date: Thu, 8 Feb 2007 13:11:42 -0800
Subject: [XFRM]: Extension for dynamic update of endpoint address(es)

Extend the XFRM framework so that endpoint address(es) in the XFRM
databases could be dynamically updated according to a request (MIGRATE
message) from user application. Target XFRM policy is first identified
by the selector in the MIGRATE message. Next, the endpoint addresses
of the matching templates and XFRM states are updated according to
the MIGRATE message.

Signed-off-by: Shinta Sugimoto <shinta.sugimoto@ericsson.com>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h   |  19 ++++
 include/net/xfrm.h     |  44 ++++++++++
 net/xfrm/xfrm_policy.c | 230 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_state.c  | 174 +++++++++++++++++++++++++++++++++++++
 4 files changed, 467 insertions(+)

(limited to 'include')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 9529ea1ae392..15ca89e9961b 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -178,6 +178,9 @@ enum {
 	XFRM_MSG_REPORT,
 #define XFRM_MSG_REPORT XFRM_MSG_REPORT
 
+	XFRM_MSG_MIGRATE,
+#define XFRM_MSG_MIGRATE XFRM_MSG_MIGRATE
+
 	__XFRM_MSG_MAX
 };
 #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
@@ -256,6 +259,7 @@ enum xfrm_attr_type_t {
 	XFRMA_COADDR,		/* xfrm_address_t */
 	XFRMA_LASTUSED,
 	XFRMA_POLICY_TYPE,	/* struct xfrm_userpolicy_type */
+	XFRMA_MIGRATE,
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
@@ -351,6 +355,19 @@ struct xfrm_user_report {
 	struct xfrm_selector		sel;
 };
 
+struct xfrm_user_migrate {
+	xfrm_address_t			old_daddr;
+	xfrm_address_t			old_saddr;
+	xfrm_address_t			new_daddr;
+	xfrm_address_t			new_saddr;
+	__u8				proto;
+	__u8				mode;
+	__u16				reserved;
+	__u32				reqid;
+	__u16				old_family;
+	__u16				new_family;
+};
+
 #ifndef __KERNEL__
 /* backwards compatibility for userspace */
 #define XFRMGRP_ACQUIRE		1
@@ -375,6 +392,8 @@ enum xfrm_nlgroups {
 #define XFRMNLGRP_AEVENTS	XFRMNLGRP_AEVENTS
 	XFRMNLGRP_REPORT,
 #define XFRMNLGRP_REPORT	XFRMNLGRP_REPORT
+	XFRMNLGRP_MIGRATE,
+#define XFRMNLGRP_MIGRATE	XFRMNLGRP_MIGRATE
 	__XFRMNLGRP_MAX
 };
 #define XFRMNLGRP_MAX	(__XFRMNLGRP_MAX - 1)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index bf91d632901d..16924cb772c9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -362,6 +362,19 @@ struct xfrm_policy
 	struct xfrm_tmpl       	xfrm_vec[XFRM_MAX_DEPTH];
 };
 
+struct xfrm_migrate {
+	xfrm_address_t		old_daddr;
+	xfrm_address_t		old_saddr;
+	xfrm_address_t		new_daddr;
+	xfrm_address_t		new_saddr;
+	u8			proto;
+	u8			mode;
+	u16			reserved;
+	u32			reqid;
+	u16			old_family;
+	u16			new_family;
+};
+
 #define XFRM_KM_TIMEOUT                30
 /* which seqno */
 #define XFRM_REPLAY_SEQ		1
@@ -388,6 +401,7 @@ struct xfrm_mgr
 	int			(*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
 	int			(*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c);
 	int			(*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
+	int			(*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles);
 };
 
 extern int xfrm_register_km(struct xfrm_mgr *km);
@@ -988,6 +1002,16 @@ extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
 			  struct flowi *fl, int family, int strict);
 extern void xfrm_init_pmtu(struct dst_entry *dst);
 
+#ifdef CONFIG_XFRM_MIGRATE
+extern int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
+		      struct xfrm_migrate *m, int num_bundles);
+extern struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m);
+extern struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
+					      struct xfrm_migrate *m);
+extern int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
+			struct xfrm_migrate *m, int num_bundles);
+#endif
+
 extern wait_queue_head_t km_waitq;
 extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
 extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid);
@@ -1053,5 +1077,25 @@ static inline void xfrm_aevent_doreplay(struct xfrm_state *x)
 		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 }
 
+#ifdef CONFIG_XFRM_MIGRATE
+static inline struct xfrm_algo *xfrm_algo_clone(struct xfrm_algo *orig)
+{
+	return (struct xfrm_algo *)kmemdup(orig, sizeof(*orig) + orig->alg_key_len, GFP_KERNEL);
+}
+
+static inline void xfrm_states_put(struct xfrm_state **states, int n)
+{
+	int i;
+	for (i = 0; i < n; i++)
+		xfrm_state_put(*(states + i));
+}
+
+static inline void xfrm_states_delete(struct xfrm_state **states, int n)
+{
+	int i;
+	for (i = 0; i < n; i++)
+		xfrm_state_delete(*(states + i));
+}
+#endif
 
 #endif	/* _NET_XFRM_H */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b7e537fe2d75..825c60af7723 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2236,3 +2236,233 @@ void __init xfrm_init(void)
 	xfrm_input_init();
 }
 
+#ifdef CONFIG_XFRM_MIGRATE
+static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
+				       struct xfrm_selector *sel_tgt)
+{
+	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
+		if (sel_tgt->family == sel_cmp->family &&
+		    xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
+			          sel_cmp->family) == 0 &&
+		    xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
+				  sel_cmp->family) == 0 &&
+		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
+		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
+			return 1;
+		}
+	} else {
+		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
+						     u8 dir, u8 type)
+{
+	struct xfrm_policy *pol, *ret = NULL;
+	struct hlist_node *entry;
+	struct hlist_head *chain;
+	u32 priority = ~0U;
+
+	read_lock_bh(&xfrm_policy_lock);
+	chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
+		    pol->type == type) {
+			ret = pol;
+			priority = ret->priority;
+			break;
+		}
+	}
+	chain = &xfrm_policy_inexact[dir];
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
+		    pol->type == type &&
+		    pol->priority < priority) {
+			ret = pol;
+			break;
+		}
+	}
+
+	if (ret)
+		xfrm_pol_hold(ret);
+
+	read_unlock_bh(&xfrm_policy_lock);
+
+	return ret;
+}
+
+static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
+{
+	int match = 0;
+
+	if (t->mode == m->mode && t->id.proto == m->proto &&
+	    (m->reqid == 0 || t->reqid == m->reqid)) {
+		switch (t->mode) {
+		case XFRM_MODE_TUNNEL:
+		case XFRM_MODE_BEET:
+			if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
+					  m->old_family) == 0 &&
+			    xfrm_addr_cmp(&t->saddr, &m->old_saddr,
+					  m->old_family) == 0) {
+				match = 1;
+			}
+			break;
+		case XFRM_MODE_TRANSPORT:
+			/* in case of transport mode, template does not store
+			   any IP addresses, hence we just compare mode and
+			   protocol */
+			match = 1;
+			break;
+		default:
+			break;
+		}
+	}
+	return match;
+}
+
+/* update endpoint address(es) of template(s) */
+static int xfrm_policy_migrate(struct xfrm_policy *pol,
+			       struct xfrm_migrate *m, int num_migrate)
+{
+	struct xfrm_migrate *mp;
+	struct dst_entry *dst;
+	int i, j, n = 0;
+
+	write_lock_bh(&pol->lock);
+	if (unlikely(pol->dead)) {
+		/* target policy has been deleted */
+		write_unlock_bh(&pol->lock);
+		return -ENOENT;
+	}
+
+	for (i = 0; i < pol->xfrm_nr; i++) {
+		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
+			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
+				continue;
+			n++;
+			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL)
+				continue;
+			/* update endpoints */
+			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
+			       sizeof(pol->xfrm_vec[i].id.daddr));
+			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
+			       sizeof(pol->xfrm_vec[i].saddr));
+			pol->xfrm_vec[i].encap_family = mp->new_family;
+			/* flush bundles */
+			while ((dst = pol->bundles) != NULL) {
+				pol->bundles = dst->next;
+				dst_free(dst);
+			}
+		}
+	}
+
+	write_unlock_bh(&pol->lock);
+
+	if (!n)
+		return -ENODATA;
+
+	return 0;
+}
+
+static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
+{
+	int i, j;
+
+	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
+		return -EINVAL;
+
+	for (i = 0; i < num_migrate; i++) {
+		if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
+				   m[i].old_family) == 0) &&
+		    (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
+				   m[i].old_family) == 0))
+			return -EINVAL;
+		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
+		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
+			return -EINVAL;
+
+		/* check if there is any duplicated entry */
+		for (j = i + 1; j < num_migrate; j++) {
+			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
+				    sizeof(m[i].old_daddr)) &&
+			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
+				    sizeof(m[i].old_saddr)) &&
+			    m[i].proto == m[j].proto &&
+			    m[i].mode == m[j].mode &&
+			    m[i].reqid == m[j].reqid &&
+			    m[i].old_family == m[j].old_family)
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
+		 struct xfrm_migrate *m, int num_migrate)
+{
+	int i, err, nx_cur = 0, nx_new = 0;
+	struct xfrm_policy *pol = NULL;
+	struct xfrm_state *x, *xc;
+	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
+	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
+	struct xfrm_migrate *mp;
+
+	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
+		goto out;
+
+	/* Stage 1 - find policy */
+	if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	/* Stage 2 - find and update state(s) */
+	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
+		if ((x = xfrm_migrate_state_find(mp))) {
+			x_cur[nx_cur] = x;
+			nx_cur++;
+			if ((xc = xfrm_state_migrate(x, mp))) {
+				x_new[nx_new] = xc;
+				nx_new++;
+			} else {
+				err = -ENODATA;
+				goto restore_state;
+			}
+		}
+	}
+
+	/* Stage 3 - update policy */
+	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
+		goto restore_state;
+
+	/* Stage 4 - delete old state(s) */
+	if (nx_cur) {
+		xfrm_states_put(x_cur, nx_cur);
+		xfrm_states_delete(x_cur, nx_cur);
+	}
+
+	/* Stage 5 - announce */
+	km_migrate(sel, dir, type, m, num_migrate);
+
+	xfrm_pol_put(pol);
+
+	return 0;
+out:
+	return err;
+
+restore_state:
+	if (pol)
+		xfrm_pol_put(pol);
+	if (nx_cur)
+		xfrm_states_put(x_cur, nx_cur);
+	if (nx_new)
+		xfrm_states_delete(x_new, nx_new);
+
+	return err;
+}
+#endif
+
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 24f7bfd07af2..91b02687db52 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -828,6 +828,160 @@ out:
 }
 EXPORT_SYMBOL(xfrm_state_add);
 
+#ifdef CONFIG_XFRM_MIGRATE
+struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
+{
+	int err = -ENOMEM;
+	struct xfrm_state *x = xfrm_state_alloc();
+	if (!x)
+		goto error;
+
+	memcpy(&x->id, &orig->id, sizeof(x->id));
+	memcpy(&x->sel, &orig->sel, sizeof(x->sel));
+	memcpy(&x->lft, &orig->lft, sizeof(x->lft));
+	x->props.mode = orig->props.mode;
+	x->props.replay_window = orig->props.replay_window;
+	x->props.reqid = orig->props.reqid;
+	x->props.family = orig->props.family;
+	x->props.saddr = orig->props.saddr;
+
+	if (orig->aalg) {
+		x->aalg = xfrm_algo_clone(orig->aalg);
+		if (!x->aalg)
+			goto error;
+	}
+	x->props.aalgo = orig->props.aalgo;
+
+	if (orig->ealg) {
+		x->ealg = xfrm_algo_clone(orig->ealg);
+		if (!x->ealg)
+			goto error;
+	}
+	x->props.ealgo = orig->props.ealgo;
+
+	if (orig->calg) {
+		x->calg = xfrm_algo_clone(orig->calg);
+		if (!x->calg)
+			goto error;
+	}
+	x->props.calgo = orig->props.calgo;
+
+        if (orig->encap) {
+		x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
+		if (!x->encap)
+			goto error;
+	}
+
+	if (orig->coaddr) {
+		x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
+				    GFP_KERNEL);
+		if (!x->coaddr)
+			goto error;
+	}
+
+	err = xfrm_init_state(x);
+	if (err)
+		goto error;
+
+	x->props.flags = orig->props.flags;
+
+	x->curlft.add_time = orig->curlft.add_time;
+	x->km.state = orig->km.state;
+	x->km.seq = orig->km.seq;
+
+	return x;
+
+ error:
+	if (errp)
+		*errp = err;
+	if (x) {
+		kfree(x->aalg);
+		kfree(x->ealg);
+		kfree(x->calg);
+		kfree(x->encap);
+		kfree(x->coaddr);
+	}
+	kfree(x);
+	return NULL;
+}
+EXPORT_SYMBOL(xfrm_state_clone);
+
+/* xfrm_state_lock is held */
+struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
+{
+	unsigned int h;
+	struct xfrm_state *x;
+	struct hlist_node *entry;
+
+	if (m->reqid) {
+		h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
+				  m->reqid, m->old_family);
+		hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+			if (x->props.mode != m->mode ||
+			    x->id.proto != m->proto)
+				continue;
+			if (m->reqid && x->props.reqid != m->reqid)
+				continue;
+			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
+					  m->old_family) ||
+			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
+					  m->old_family))
+				continue;
+			xfrm_state_hold(x);
+			return x;
+		}
+	} else {
+		h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
+				  m->old_family);
+		hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
+			if (x->props.mode != m->mode ||
+			    x->id.proto != m->proto)
+				continue;
+			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
+					  m->old_family) ||
+			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
+					  m->old_family))
+				continue;
+			xfrm_state_hold(x);
+			return x;
+		}
+	}
+
+        return NULL;
+}
+EXPORT_SYMBOL(xfrm_migrate_state_find);
+
+struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
+				       struct xfrm_migrate *m)
+{
+	struct xfrm_state *xc;
+	int err;
+
+	xc = xfrm_state_clone(x, &err);
+	if (!xc)
+		return NULL;
+
+	memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
+	memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
+
+	/* add state */
+	if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
+		/* a care is needed when the destination address of the
+		   state is to be updated as it is a part of triplet */
+		xfrm_state_insert(xc);
+	} else {
+		if ((err = xfrm_state_add(xc)) < 0)
+			goto error;
+	}
+
+	return xc;
+error:
+	kfree(xc);
+	return NULL;
+}
+EXPORT_SYMBOL(xfrm_state_migrate);
+#endif
+
 int xfrm_state_update(struct xfrm_state *x)
 {
 	struct xfrm_state *x1;
@@ -1342,6 +1496,26 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
 }
 EXPORT_SYMBOL(km_policy_expired);
 
+int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
+	       struct xfrm_migrate *m, int num_migrate)
+{
+	int err = -EINVAL;
+	int ret;
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		if (km->migrate) {
+			ret = km->migrate(sel, dir, type, m, num_migrate);
+			if (!ret)
+				err = ret;
+		}
+	}
+	read_unlock(&xfrm_km_lock);
+	return err;
+}
+EXPORT_SYMBOL(km_migrate);
+
 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
 {
 	int err = -EINVAL;
-- 
cgit v1.2.3


From 08de61beab8a21c8e0b3906a97defda5f1f66ece Mon Sep 17 00:00:00 2001
From: Shinta Sugimoto <shinta.sugimoto@ericsson.com>
Date: Thu, 8 Feb 2007 13:14:33 -0800
Subject: [PFKEYV2]: Extension for dynamic update of endpoint address(es)

Extend PF_KEYv2 framework so that user application can take advantage
of MIGRATE feature via PF_KEYv2 interface. User application can either
send or receive an MIGRATE message to/from PF_KEY socket.

Detail information can be found in the internet-draft
<draft-sugimoto-mip6-pfkey-migrate>.

Signed-off-by: Shinta Sugimoto <shinta.sugimoto@ericsson.com>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pfkeyv2.h |   3 +-
 net/key/af_key.c        | 422 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 424 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h
index 265bafab6494..26a518b67c02 100644
--- a/include/linux/pfkeyv2.h
+++ b/include/linux/pfkeyv2.h
@@ -251,7 +251,8 @@ struct sadb_x_sec_ctx {
 #define SADB_X_SPDEXPIRE	21
 #define SADB_X_SPDDELETE2	22
 #define SADB_X_NAT_T_NEW_MAPPING	23
-#define SADB_MAX		23
+#define SADB_X_MIGRATE		24
+#define SADB_MAX		24
 
 /* Security Association flags */
 #define SADB_SAFLAGS_PFS	1
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 5dd5094659a1..b4e444063d1f 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2345,6 +2345,196 @@ out:
 	return err;
 }
 
+#ifdef CONFIG_NET_KEY_MIGRATE
+static int pfkey_sockaddr_pair_size(sa_family_t family)
+{
+	switch (family) {
+	case AF_INET:
+		return PFKEY_ALIGN8(sizeof(struct sockaddr_in) * 2);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		return PFKEY_ALIGN8(sizeof(struct sockaddr_in6) * 2);
+#endif
+	default:
+		return 0;
+	}
+	/* NOTREACHED */
+}
+
+static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq,
+			       xfrm_address_t *saddr, xfrm_address_t *daddr,
+			       u16 *family)
+{
+	struct sockaddr *sa = (struct sockaddr *)(rq + 1);
+	if (rq->sadb_x_ipsecrequest_len <
+	    pfkey_sockaddr_pair_size(sa->sa_family))
+		return -EINVAL;
+
+	switch (sa->sa_family) {
+	case AF_INET:
+		{
+			struct sockaddr_in *sin;
+			sin = (struct sockaddr_in *)sa;
+			if ((sin+1)->sin_family != AF_INET)
+				return -EINVAL;
+			memcpy(&saddr->a4, &sin->sin_addr, sizeof(saddr->a4));
+			sin++;
+			memcpy(&daddr->a4, &sin->sin_addr, sizeof(daddr->a4));
+			*family = AF_INET;
+			break;
+		}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		{
+			struct sockaddr_in6 *sin6;
+			sin6 = (struct sockaddr_in6 *)sa;
+			if ((sin6+1)->sin6_family != AF_INET6)
+				return -EINVAL;
+			memcpy(&saddr->a6, &sin6->sin6_addr,
+			       sizeof(saddr->a6));
+			sin6++;
+			memcpy(&daddr->a6, &sin6->sin6_addr,
+			       sizeof(daddr->a6));
+			*family = AF_INET6;
+			break;
+		}
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
+				    struct xfrm_migrate *m)
+{
+	int err;
+	struct sadb_x_ipsecrequest *rq2;
+
+	if (len <= sizeof(struct sadb_x_ipsecrequest) ||
+	    len < rq1->sadb_x_ipsecrequest_len)
+		return -EINVAL;
+
+	/* old endoints */
+	err = parse_sockaddr_pair(rq1, &m->old_saddr, &m->old_daddr,
+				  &m->old_family);
+	if (err)
+		return err;
+
+	rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len);
+	len -= rq1->sadb_x_ipsecrequest_len;
+
+	if (len <= sizeof(struct sadb_x_ipsecrequest) ||
+	    len < rq2->sadb_x_ipsecrequest_len)
+		return -EINVAL;
+
+	/* new endpoints */
+	err = parse_sockaddr_pair(rq2, &m->new_saddr, &m->new_daddr,
+				  &m->new_family);
+	if (err)
+		return err;
+
+	if (rq1->sadb_x_ipsecrequest_proto != rq2->sadb_x_ipsecrequest_proto ||
+	    rq1->sadb_x_ipsecrequest_mode != rq2->sadb_x_ipsecrequest_mode ||
+	    rq1->sadb_x_ipsecrequest_reqid != rq2->sadb_x_ipsecrequest_reqid)
+		return -EINVAL;
+
+	m->proto = rq1->sadb_x_ipsecrequest_proto;
+	m->mode = rq1->sadb_x_ipsecrequest_mode - 1;
+	m->reqid = rq1->sadb_x_ipsecrequest_reqid;
+
+	return ((int)(rq1->sadb_x_ipsecrequest_len +
+		      rq2->sadb_x_ipsecrequest_len));
+}
+
+static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
+			 struct sadb_msg *hdr, void **ext_hdrs)
+{
+	int i, len, ret, err = -EINVAL;
+	u8 dir;
+	struct sadb_address *sa;
+	struct sadb_x_policy *pol;
+	struct sadb_x_ipsecrequest *rq;
+	struct xfrm_selector sel;
+	struct xfrm_migrate m[XFRM_MAX_DEPTH];
+
+	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1],
+	    ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) ||
+	    !ext_hdrs[SADB_X_EXT_POLICY - 1]) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	pol = ext_hdrs[SADB_X_EXT_POLICY - 1];
+	if (!pol) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	dir = pol->sadb_x_policy_dir - 1;
+	memset(&sel, 0, sizeof(sel));
+
+	/* set source address info of selector */
+	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC - 1];
+	sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr);
+	sel.prefixlen_s = sa->sadb_address_prefixlen;
+	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
+	sel.sport = ((struct sockaddr_in *)(sa + 1))->sin_port;
+	if (sel.sport)
+		sel.sport_mask = ~0;
+
+	/* set destination address info of selector */
+	sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1],
+	pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr);
+	sel.prefixlen_d = sa->sadb_address_prefixlen;
+	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
+	sel.dport = ((struct sockaddr_in *)(sa + 1))->sin_port;
+	if (sel.dport)
+		sel.dport_mask = ~0;
+
+	rq = (struct sadb_x_ipsecrequest *)(pol + 1);
+
+	/* extract ipsecrequests */
+	i = 0;
+	len = pol->sadb_x_policy_len * 8 - sizeof(struct sadb_x_policy);
+
+	while (len > 0 && i < XFRM_MAX_DEPTH) {
+		ret = ipsecrequests_to_migrate(rq, len, &m[i]);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		} else {
+			rq = (struct sadb_x_ipsecrequest *)((u8 *)rq + ret);
+			len -= ret;
+			i++;
+		}
+	}
+
+	if (!i || len > 0) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i);
+
+ out:
+	return err;
+}
+#else
+static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
+			 struct sadb_msg *hdr, void **ext_hdrs)
+{
+	return -ENOPROTOOPT;
+}
+#endif
+
+
 static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
 {
 	unsigned int dir;
@@ -2473,6 +2663,7 @@ static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
 	[SADB_X_SPDFLUSH]	= pfkey_spdflush,
 	[SADB_X_SPDSETIDX]	= pfkey_spdadd,
 	[SADB_X_SPDDELETE2]	= pfkey_spdget,
+	[SADB_X_MIGRATE]	= pfkey_migrate,
 };
 
 static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr)
@@ -3118,6 +3309,236 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
 }
 
+#ifdef CONFIG_NET_KEY_MIGRATE
+static int set_sadb_address(struct sk_buff *skb, int sasize, int type,
+			    struct xfrm_selector *sel)
+{
+	struct sadb_address *addr;
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+	addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize);
+	addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8;
+	addr->sadb_address_exttype = type;
+	addr->sadb_address_proto = sel->proto;
+	addr->sadb_address_reserved = 0;
+
+	switch (type) {
+	case SADB_EXT_ADDRESS_SRC:
+		if (sel->family == AF_INET) {
+			addr->sadb_address_prefixlen = sel->prefixlen_s;
+			sin = (struct sockaddr_in *)(addr + 1);
+			sin->sin_family = AF_INET;
+			memcpy(&sin->sin_addr.s_addr, &sel->saddr,
+			       sizeof(sin->sin_addr.s_addr));
+			sin->sin_port = 0;
+			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+		}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		else if (sel->family == AF_INET6) {
+			addr->sadb_address_prefixlen = sel->prefixlen_s;
+			sin6 = (struct sockaddr_in6 *)(addr + 1);
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_port = 0;
+			sin6->sin6_flowinfo = 0;
+			sin6->sin6_scope_id = 0;
+			memcpy(&sin6->sin6_addr.s6_addr, &sel->saddr,
+			       sizeof(sin6->sin6_addr.s6_addr));
+		}
+#endif
+		break;
+	case SADB_EXT_ADDRESS_DST:
+		if (sel->family == AF_INET) {
+			addr->sadb_address_prefixlen = sel->prefixlen_d;
+			sin = (struct sockaddr_in *)(addr + 1);
+			sin->sin_family = AF_INET;
+			memcpy(&sin->sin_addr.s_addr, &sel->daddr,
+			       sizeof(sin->sin_addr.s_addr));
+			sin->sin_port = 0;
+			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+		}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		else if (sel->family == AF_INET6) {
+			addr->sadb_address_prefixlen = sel->prefixlen_d;
+			sin6 = (struct sockaddr_in6 *)(addr + 1);
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_port = 0;
+			sin6->sin6_flowinfo = 0;
+			sin6->sin6_scope_id = 0;
+			memcpy(&sin6->sin6_addr.s6_addr, &sel->daddr,
+			       sizeof(sin6->sin6_addr.s6_addr));
+		}
+#endif
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_ipsecrequest(struct sk_buff *skb,
+			    uint8_t proto, uint8_t mode, int level,
+			    uint32_t reqid, uint8_t family,
+			    xfrm_address_t *src, xfrm_address_t *dst)
+{
+	struct sadb_x_ipsecrequest *rq;
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+	int size_req;
+
+	size_req = sizeof(struct sadb_x_ipsecrequest) +
+		   pfkey_sockaddr_pair_size(family);
+
+	rq = (struct sadb_x_ipsecrequest *)skb_put(skb, size_req);
+	memset(rq, 0, size_req);
+	rq->sadb_x_ipsecrequest_len = size_req;
+	rq->sadb_x_ipsecrequest_proto = proto;
+	rq->sadb_x_ipsecrequest_mode = mode;
+	rq->sadb_x_ipsecrequest_level = level;
+	rq->sadb_x_ipsecrequest_reqid = reqid;
+
+	switch (family) {
+	case AF_INET:
+		sin = (struct sockaddr_in *)(rq + 1);
+		sin->sin_family = AF_INET;
+		memcpy(&sin->sin_addr.s_addr, src,
+		       sizeof(sin->sin_addr.s_addr));
+		sin++;
+		sin->sin_family = AF_INET;
+		memcpy(&sin->sin_addr.s_addr, dst,
+		       sizeof(sin->sin_addr.s_addr));
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		sin6 = (struct sockaddr_in6 *)(rq + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		sin6->sin6_scope_id = 0;
+		memcpy(&sin6->sin6_addr.s6_addr, src,
+		       sizeof(sin6->sin6_addr.s6_addr));
+		sin6++;
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		sin6->sin6_scope_id = 0;
+		memcpy(&sin6->sin6_addr.s6_addr, dst,
+		       sizeof(sin6->sin6_addr.s6_addr));
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_NET_KEY_MIGRATE
+static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
+			      struct xfrm_migrate *m, int num_bundles)
+{
+	int i;
+	int sasize_sel;
+	int size = 0;
+	int size_pol = 0;
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+	struct sadb_x_policy *pol;
+	struct xfrm_migrate *mp;
+
+	if (type != XFRM_POLICY_TYPE_MAIN)
+		return 0;
+
+	if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH)
+		return -EINVAL;
+
+	/* selector */
+	sasize_sel = pfkey_sockaddr_size(sel->family);
+	if (!sasize_sel)
+		return -EINVAL;
+	size += (sizeof(struct sadb_address) + sasize_sel) * 2;
+
+	/* policy info */
+	size_pol += sizeof(struct sadb_x_policy);
+
+	/* ipsecrequests */
+	for (i = 0, mp = m; i < num_bundles; i++, mp++) {
+		/* old locator pair */
+		size_pol += sizeof(struct sadb_x_ipsecrequest) +
+			    pfkey_sockaddr_pair_size(mp->old_family);
+		/* new locator pair */
+		size_pol += sizeof(struct sadb_x_ipsecrequest) +
+			    pfkey_sockaddr_pair_size(mp->new_family);
+	}
+
+	size += sizeof(struct sadb_msg) + size_pol;
+
+	/* alloc buffer */
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	hdr = (struct sadb_msg *)skb_put(skb, sizeof(struct sadb_msg));
+	hdr->sadb_msg_version = PF_KEY_V2;
+	hdr->sadb_msg_type = SADB_X_MIGRATE;
+	hdr->sadb_msg_satype = pfkey_proto2satype(m->proto);
+	hdr->sadb_msg_len = size / 8;
+	hdr->sadb_msg_errno = 0;
+	hdr->sadb_msg_reserved = 0;
+	hdr->sadb_msg_seq = 0;
+	hdr->sadb_msg_pid = 0;
+
+	/* selector src */
+	set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel);
+
+	/* selector dst */
+	set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_DST, sel);
+
+	/* policy information */
+	pol = (struct sadb_x_policy *)skb_put(skb, sizeof(struct sadb_x_policy));
+	pol->sadb_x_policy_len = size_pol / 8;
+	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
+	pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
+	pol->sadb_x_policy_dir = dir + 1;
+	pol->sadb_x_policy_id = 0;
+	pol->sadb_x_policy_priority = 0;
+
+	for (i = 0, mp = m; i < num_bundles; i++, mp++) {
+		/* old ipsecrequest */
+		if (set_ipsecrequest(skb, mp->proto, mp->mode + 1,
+				     (mp->reqid ?  IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE),
+				     mp->reqid, mp->old_family,
+				     &mp->old_saddr, &mp->old_daddr) < 0) {
+			return -EINVAL;
+		}
+
+		/* new ipsecrequest */
+		if (set_ipsecrequest(skb, mp->proto, mp->mode + 1,
+				     (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE),
+				     mp->reqid, mp->new_family,
+				     &mp->new_saddr, &mp->new_daddr) < 0) {
+			return -EINVAL;
+		}
+	}
+
+	/* broadcast migrate message to sockets */
+	pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
+
+	return 0;
+}
+#else
+static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
+			      struct xfrm_migrate *m, int num_bundles)
+{
+	return -ENOPROTOOPT;
+}
+#endif
+
 static int pfkey_sendmsg(struct kiocb *kiocb,
 			 struct socket *sock, struct msghdr *msg, size_t len)
 {
@@ -3287,6 +3708,7 @@ static struct xfrm_mgr pfkeyv2_mgr =
 	.compile_policy	= pfkey_compile_policy,
 	.new_mapping	= pfkey_send_new_mapping,
 	.notify_policy	= pfkey_send_policy_notify,
+	.migrate	= pfkey_send_migrate,
 };
 
 static void __exit ipsec_pfkey_exit(void)
-- 
cgit v1.2.3


From 95a9dc4390c8215d922e0ca2ebb95279261fe795 Mon Sep 17 00:00:00 2001
From: Andrew Hendry <andrew.hendry@gmail.com>
Date: Thu, 8 Feb 2007 13:34:02 -0800
Subject: [X.25]: Add call forwarding

Adds call forwarding to X.25, allowing it to operate like an X.25 router.
Useful if one needs to manipulate X.25 traffic with tools like tc.
This is an update/cleanup based off a patch submitted by Daniel Ferenci a few years ago.

Thanks Alan for the feedback.
Added the null check to the clones.
Moved the skb_clone's into the forwarding functions.

Worked ok with Cisco XoT, linux X.25 back to back, and some old NTUs/PADs.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/x25.h     |  17 ++++++
 net/x25/Makefile      |   2 +-
 net/x25/af_x25.c      |  30 ++++++++--
 net/x25/x25_dev.c     |  13 +++-
 net/x25/x25_forward.c | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/x25/x25_route.c   |   3 +
 6 files changed, 220 insertions(+), 8 deletions(-)
 create mode 100644 net/x25/x25_forward.c

(limited to 'include')

diff --git a/include/net/x25.h b/include/net/x25.h
index e47fe440d9d7..3b1190514d92 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -161,6 +161,14 @@ struct x25_sock {
 	unsigned long 		vc_facil_mask;	/* inc_call facilities mask */
 };
 
+struct x25_forward {
+	struct list_head	node;
+	unsigned int		lci;
+	struct net_device	*dev1;
+	struct net_device	*dev2;
+	atomic_t		refcnt;
+};
+
 static inline struct x25_sock *x25_sk(const struct sock *sk)
 {
 	return (struct x25_sock *)sk;
@@ -198,6 +206,13 @@ extern int x25_negotiate_facilities(struct sk_buff *, struct sock *,
 				struct x25_dte_facilities *);
 extern void x25_limit_facilities(struct x25_facilities *, struct x25_neigh *);
 
+/* x25_forward.c */
+extern void x25_clear_forward_by_lci(unsigned int lci);
+extern void x25_clear_forward_by_dev(struct net_device *);
+extern int x25_forward_data(int, struct x25_neigh *, struct sk_buff *);
+extern int x25_forward_call(struct x25_address *, struct x25_neigh *,
+				struct sk_buff *, int);
+
 /* x25_in.c */
 extern int  x25_process_rx_frame(struct sock *, struct sk_buff *);
 extern int  x25_backlog_rcv(struct sock *, struct sk_buff *);
@@ -282,6 +297,8 @@ extern struct hlist_head x25_list;
 extern rwlock_t x25_list_lock;
 extern struct list_head x25_route_list;
 extern rwlock_t x25_route_list_lock;
+extern struct list_head x25_forward_list;
+extern rwlock_t x25_forward_list_lock;
 
 extern int x25_proc_init(void);
 extern void x25_proc_exit(void);
diff --git a/net/x25/Makefile b/net/x25/Makefile
index 587a71aa411d..a2c34ab6f194 100644
--- a/net/x25/Makefile
+++ b/net/x25/Makefile
@@ -6,5 +6,5 @@ obj-$(CONFIG_X25) += x25.o
 
 x25-y			:= af_x25.o x25_dev.o x25_facilities.o x25_in.o \
 			   x25_link.o x25_out.o x25_route.o x25_subr.o \
-			   x25_timer.o x25_proc.o
+			   x25_timer.o x25_proc.o x25_forward.o
 x25-$(CONFIG_SYSCTL)	+= sysctl_net_x25.o
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index b5c80b189902..0872025821c5 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -846,7 +846,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
 	struct x25_address source_addr, dest_addr;
 	struct x25_facilities facilities;
 	struct x25_dte_facilities dte_facilities;
-	int len, rc;
+	int len, addr_len, rc;
 
 	/*
 	 *	Remove the LCI and frame type.
@@ -857,7 +857,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
 	 *	Extract the X.25 addresses and convert them to ASCII strings,
 	 *	and remove them.
 	 */
-	skb_pull(skb, x25_addr_ntoa(skb->data, &source_addr, &dest_addr));
+	addr_len = x25_addr_ntoa(skb->data, &source_addr, &dest_addr);
+	skb_pull(skb, addr_len);
 
 	/*
 	 *	Get the length of the facilities, skip past them for the moment
@@ -873,11 +874,27 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
 	sk = x25_find_listener(&source_addr,skb);
 	skb_push(skb,len);
 
+	if (sk != NULL && sk_acceptq_is_full(sk)) {
+		goto out_sock_put;
+	}
+
 	/*
-	 *	We can't accept the Call Request.
+	 *	We dont have any listeners for this incoming call.
+	 *	Try forwarding it.
 	 */
-	if (sk == NULL || sk_acceptq_is_full(sk))
-		goto out_clear_request;
+	if (sk == NULL) {
+		skb_push(skb, addr_len + X25_STD_MIN_LEN);
+		if (x25_forward_call(&dest_addr, nb, skb, lci) > 0)
+		{
+			/* Call was forwarded, dont process it any more */
+			kfree_skb(skb);
+			rc = 1;
+			goto out;
+		} else {
+			/* No listeners, can't forward, clear the call */
+			goto out_clear_request;
+		}
+	}
 
 	/*
 	 *	Try to reach a compromise on the requested facilities.
@@ -1598,6 +1615,9 @@ void x25_kill_by_neigh(struct x25_neigh *nb)
 			x25_disconnect(s, ENETUNREACH, 0, 0);
 
 	write_unlock_bh(&x25_list_lock);
+
+	/* Remove any related forwards */
+	x25_clear_forward_by_dev(nb->dev);
 }
 
 static int __init x25_init(void)
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 328d80f000ad..f099fd6a7c0e 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -67,9 +67,18 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 		return x25_rx_call_request(skb, nb, lci);
 
 	/*
-	 *	Its not a Call Request, nor is it a control frame.
-	 *      Let caller throw it away.
+	 * 	Its not a Call Request, nor is it a control frame.
+	 *	Can we forward it?
 	 */
+
+	if (x25_forward_data(lci, nb, skb)) {
+		if (frametype == X25_CLEAR_CONFIRMATION) {
+			x25_clear_forward_by_lci(lci);
+		}
+		kfree_skb(skb);
+		return 1;
+	}
+
 /*
 	x25_transmit_clear_request(nb, lci, 0x0D);
 */
diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c
new file mode 100644
index 000000000000..d339e0c810a8
--- /dev/null
+++ b/net/x25/x25_forward.c
@@ -0,0 +1,163 @@
+/*
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	03-01-2007	Added forwarding for x.25	Andrew Hendry
+ */
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <net/x25.h>
+
+struct list_head x25_forward_list = LIST_HEAD_INIT(x25_forward_list);
+DEFINE_RWLOCK(x25_forward_list_lock);
+
+int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
+			struct sk_buff *skb, int lci)
+{
+	struct x25_route *rt;
+	struct x25_neigh *neigh_new = NULL;
+	struct list_head *entry;
+	struct x25_forward *x25_frwd, *new_frwd;
+	struct sk_buff *skbn;
+	short same_lci = 0;
+	int rc = 0;
+
+	if ((rt = x25_get_route(dest_addr)) != NULL) {
+
+		if ((neigh_new = x25_get_neigh(rt->dev)) == NULL) {
+			/* This shouldnt happen, if it occurs somehow
+			 * do something sensible
+			 */
+			goto out_put_route;
+		}
+
+		/* Avoid a loop. This is the normal exit path for a
+		 * system with only one x.25 iface and default route
+		 */
+		if (rt->dev == from->dev) {
+			goto out_put_nb;
+		}
+
+		/* Remote end sending a call request on an already
+		 * established LCI? It shouldnt happen, just in case..
+		 */
+		read_lock_bh(&x25_forward_list_lock);
+		list_for_each(entry, &x25_forward_list) {
+			x25_frwd = list_entry(entry, struct x25_forward, node);
+			if (x25_frwd->lci == lci) {
+				printk(KERN_WARNING "X.25: call request for lci which is already registered!, transmitting but not registering new pair\n");
+				same_lci = 1;
+			}
+		}
+		read_unlock_bh(&x25_forward_list_lock);
+
+		/* Save the forwarding details for future traffic */
+		if (!same_lci){
+			if ((new_frwd = kmalloc(sizeof(struct x25_forward),
+							GFP_ATOMIC)) == NULL){
+				rc = -ENOMEM;
+				goto out_put_nb;
+			}
+			new_frwd->lci = lci;
+			new_frwd->dev1 = rt->dev;
+			new_frwd->dev2 = from->dev;
+			write_lock_bh(&x25_forward_list_lock);
+			list_add(&new_frwd->node, &x25_forward_list);
+			write_unlock_bh(&x25_forward_list_lock);
+		}
+
+		/* Forward the call request */
+		if ( (skbn = skb_clone(skb, GFP_ATOMIC)) == NULL){
+			goto out_put_nb;
+		}
+		x25_transmit_link(skbn, neigh_new);
+		rc = 1;
+	}
+
+
+out_put_nb:
+	x25_neigh_put(neigh_new);
+
+out_put_route:
+	x25_route_put(rt);
+	return rc;
+}
+
+
+int x25_forward_data(int lci, struct x25_neigh *from, struct sk_buff *skb) {
+
+	struct x25_forward *frwd;
+	struct list_head *entry;
+	struct net_device *peer = NULL;
+	struct x25_neigh *nb;
+	struct sk_buff *skbn;
+	int rc = 0;
+
+	read_lock_bh(&x25_forward_list_lock);
+	list_for_each(entry, &x25_forward_list) {
+		frwd = list_entry(entry, struct x25_forward, node);
+		if (frwd->lci == lci) {
+			/* The call is established, either side can send */
+			if (from->dev == frwd->dev1) {
+				peer = frwd->dev2;
+			} else {
+				peer = frwd->dev1;
+			}
+			break;
+		}
+	}
+	read_unlock_bh(&x25_forward_list_lock);
+
+	if ( (nb = x25_get_neigh(peer)) == NULL)
+		goto out;
+
+	if ( (skbn = pskb_copy(skb, GFP_ATOMIC)) == NULL){
+		goto out;
+
+	}
+	x25_transmit_link(skbn, nb);
+
+	x25_neigh_put(nb);
+	rc = 1;
+out:
+	return rc;
+}
+
+void x25_clear_forward_by_lci(unsigned int lci)
+{
+	struct x25_forward *fwd;
+	struct list_head *entry, *tmp;
+
+	write_lock_bh(&x25_forward_list_lock);
+
+	list_for_each_safe(entry, tmp, &x25_forward_list) {
+		fwd = list_entry(entry, struct x25_forward, node);
+		if (fwd->lci == lci) {
+			list_del(&fwd->node);
+			kfree(fwd);
+		}
+	}
+	write_unlock_bh(&x25_forward_list_lock);
+}
+
+
+void x25_clear_forward_by_dev(struct net_device *dev)
+{
+	struct x25_forward *fwd;
+	struct list_head *entry, *tmp;
+
+	write_lock_bh(&x25_forward_list_lock);
+
+	list_for_each_safe(entry, tmp, &x25_forward_list) {
+		fwd = list_entry(entry, struct x25_forward, node);
+		if ((fwd->dev1 == dev) || (fwd->dev2 == dev)){
+			list_del(&fwd->node);
+			kfree(fwd);
+		}
+	}
+	write_unlock_bh(&x25_forward_list_lock);
+}
diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
index 2a3fe986b245..883a848bca5b 100644
--- a/net/x25/x25_route.c
+++ b/net/x25/x25_route.c
@@ -119,6 +119,9 @@ void x25_route_device_down(struct net_device *dev)
 			__x25_remove_route(rt);
 	}
 	write_unlock_bh(&x25_route_list_lock);
+
+	/* Remove any related forwarding */
+	x25_clear_forward_by_dev(dev);
 }
 
 /*
-- 
cgit v1.2.3


From 39e21c0d34fe769d06839679fa920217359a58b0 Mon Sep 17 00:00:00 2001
From: Andrew Hendry <andrew.hendry@gmail.com>
Date: Thu, 8 Feb 2007 13:34:36 -0800
Subject: [X.25]: Adds /proc/sys/net/x25/x25_forward to control forwarding.

echo "1" > /proc/sys/net/x25/x25_forward
To turn on x25_forwarding, defaults to off
Requires the previous patch.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h   | 3 ++-
 include/net/x25.h        | 1 +
 net/x25/af_x25.c         | 4 +++-
 net/x25/sysctl_net_x25.c | 8 ++++++++
 4 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 81480e613467..665412c4f4b9 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -699,7 +699,8 @@ enum {
 	NET_X25_CALL_REQUEST_TIMEOUT=2,
 	NET_X25_RESET_REQUEST_TIMEOUT=3,
 	NET_X25_CLEAR_REQUEST_TIMEOUT=4,
-	NET_X25_ACK_HOLD_BACK_TIMEOUT=5
+	NET_X25_ACK_HOLD_BACK_TIMEOUT=5,
+	NET_X25_FORWARD=6
 };
 
 /* /proc/sys/net/token-ring */
diff --git a/include/net/x25.h b/include/net/x25.h
index 3b1190514d92..fc3f03d976f8 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -180,6 +180,7 @@ extern int  sysctl_x25_call_request_timeout;
 extern int  sysctl_x25_reset_request_timeout;
 extern int  sysctl_x25_clear_request_timeout;
 extern int  sysctl_x25_ack_holdback_timeout;
+extern int  sysctl_x25_forward;
 
 extern int  x25_addr_ntoa(unsigned char *, struct x25_address *,
 			  struct x25_address *);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 0872025821c5..b37d894358ec 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -63,6 +63,7 @@ int sysctl_x25_call_request_timeout    = X25_DEFAULT_T21;
 int sysctl_x25_reset_request_timeout   = X25_DEFAULT_T22;
 int sysctl_x25_clear_request_timeout   = X25_DEFAULT_T23;
 int sysctl_x25_ack_holdback_timeout    = X25_DEFAULT_T2;
+int sysctl_x25_forward                 = 0;
 
 HLIST_HEAD(x25_list);
 DEFINE_RWLOCK(x25_list_lock);
@@ -884,7 +885,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
 	 */
 	if (sk == NULL) {
 		skb_push(skb, addr_len + X25_STD_MIN_LEN);
-		if (x25_forward_call(&dest_addr, nb, skb, lci) > 0)
+		if (sysctl_x25_forward &&
+				x25_forward_call(&dest_addr, nb, skb, lci) > 0)
 		{
 			/* Call was forwarded, dont process it any more */
 			kfree_skb(skb);
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index aabda59c824e..2b2e7fd689f3 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -73,6 +73,14 @@ static struct ctl_table x25_table[] = {
 		.extra1 =	&min_timer,
 		.extra2 =	&max_timer,
 	},
+	{
+		.ctl_name =	NET_X25_FORWARD,
+		.procname =	"x25_forward",
+		.data = 	&sysctl_x25_forward,
+		.maxlen = 	sizeof(int),
+		.mode = 	0644,
+		.proc_handler = &proc_dointvec,
+	},
 	{ 0, },
 };
 
-- 
cgit v1.2.3


From 2356f4cb191100a5e92d537f13e5efdbc697e9cb Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 8 Feb 2007 13:37:42 -0800
Subject: [S390]: Rewrite of the IUCV base code, part 2

Add rewritten IUCV base code to net/iucv.

Signed-off-by: Frank Pavlic <fpavlic@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/defconfig     |    1 +
 include/net/iucv/iucv.h |  415 ++++++++++++
 net/Kconfig             |    1 +
 net/Makefile            |    1 +
 net/iucv/Kconfig        |    7 +
 net/iucv/Makefile       |    5 +
 net/iucv/iucv.c         | 1619 +++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 2049 insertions(+)
 create mode 100644 include/net/iucv/iucv.h
 create mode 100644 net/iucv/Kconfig
 create mode 100644 net/iucv/Makefile
 create mode 100644 net/iucv/iucv.c

(limited to 'include')

diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 7729ca114cc9..46f2d4578e6b 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -179,6 +179,7 @@ CONFIG_XFRM=y
 # CONFIG_XFRM_USER is not set
 # CONFIG_XFRM_SUB_POLICY is not set
 CONFIG_NET_KEY=y
+CONFIG_IUCV=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h
new file mode 100644
index 000000000000..746e7416261e
--- /dev/null
+++ b/include/net/iucv/iucv.h
@@ -0,0 +1,415 @@
+/*
+ *  drivers/s390/net/iucv.h
+ *    IUCV base support.
+ *
+ *  S390 version
+ *    Copyright 2000, 2006 IBM Corporation
+ *    Author(s):Alan Altmark (Alan_Altmark@us.ibm.com)
+ *		Xenia Tkatschow (xenia@us.ibm.com)
+ *    Rewritten for af_iucv:
+ *	Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ *
+ * Functionality:
+ * To explore any of the IUCV functions, one must first register their
+ * program using iucv_register(). Once your program has successfully
+ * completed a register, it can exploit the other functions.
+ * For furthur reference on all IUCV functionality, refer to the
+ * CP Programming Services book, also available on the web thru
+ * www.ibm.com/s390/vm/pubs, manual # SC24-5760
+ *
+ * Definition of Return Codes
+ * - All positive return codes including zero are reflected back
+ *   from CP. The definition of each return code can be found in
+ *   CP Programming Services book.
+ * - Return Code of:
+ *   -EINVAL: Invalid value
+ *   -ENOMEM: storage allocation failed
+ */
+
+#include <linux/types.h>
+#include <asm/debug.h>
+
+/*
+ * IUCV option flags usable by device drivers:
+ *
+ * IUCV_IPRMDATA  Indicates that your program can handle a message in the
+ *		  parameter list / a message is sent in the parameter list.
+ *		  Used for iucv_path_accept, iucv_path_connect,
+ *		  iucv_message_reply, iucv_message_send, iucv_message_send2way.
+ * IUCV_IPQUSCE	  Indicates that you do not want to receive messages on this
+ *		  path until an iucv_path_resume is issued.
+ *		  Used for iucv_path_accept, iucv_path_connect.
+ * IUCV_IPBUFLST  Indicates that an address list is used for the message data.
+ *		  Used for iucv_message_receive, iucv_message_send,
+ *		  iucv_message_send2way.
+ * IUCV_IPPRTY	  Specifies that you want to send priority messages.
+ *		  Used for iucv_path_accept, iucv_path_connect,
+ *		  iucv_message_reply, iucv_message_send, iucv_message_send2way.
+ * IUCV_IPSYNC	  Indicates a synchronous send request.
+ *		  Used for iucv_message_send, iucv_message_send2way.
+ * IUCV_IPANSLST  Indicates that an address list is used for the reply data.
+ *		  Used for iucv_message_reply, iucv_message_send2way.
+ * IUCV_IPLOCAL	  Specifies that the communication partner has to be on the
+ *		  local system. If local is specified no target class can be
+ *		  specified.
+ *		  Used for iucv_path_connect.
+ *
+ * All flags are defined in the input field IPFLAGS1 of each function
+ * and can be found in CP Programming Services.
+ */
+#define IUCV_IPRMDATA	0x80
+#define IUCV_IPQUSCE	0x40
+#define IUCV_IPBUFLST	0x40
+#define IUCV_IPPRTY	0x20
+#define IUCV_IPANSLST	0x08
+#define IUCV_IPSYNC	0x04
+#define IUCV_IPLOCAL	0x01
+
+/*
+ * iucv_array : Defines buffer array.
+ * Inside the array may be 31- bit addresses and 31-bit lengths.
+ * Use a pointer to an iucv_array as the buffer, reply or answer
+ * parameter on iucv_message_send, iucv_message_send2way, iucv_message_receive
+ * and iucv_message_reply if IUCV_IPBUFLST or IUCV_IPANSLST are used.
+ */
+struct iucv_array {
+	u32 address;
+	u32 length;
+} __attribute__ ((aligned (8)));
+
+extern struct bus_type iucv_bus;
+extern struct device *iucv_root;
+
+/*
+ * struct iucv_path
+ * pathid: 16 bit path identification
+ * msglim: 16 bit message limit
+ * flags: properties of the path: IPRMDATA, IPQUSCE, IPPRTY
+ * handler:  address of iucv handler structure
+ * private: private information of the handler associated with the path
+ * list: list_head for the iucv_handler path list.
+ */
+struct iucv_path {
+	u16 pathid;
+	u16 msglim;
+	u8  flags;
+	void *private;
+	struct iucv_handler *handler;
+	struct list_head list;
+};
+
+/*
+ * struct iucv_message
+ * id: 32 bit message id
+ * audit: 32 bit error information of purged or replied messages
+ * class: 32 bit target class of a message (source class for replies)
+ * tag: 32 bit tag to be associated with the message
+ * length: 32 bit length of the message / reply
+ * reply_size: 32 bit maximum allowed length of the reply
+ * rmmsg: 8 byte inline message
+ * flags: message properties (IUCV_IPPRTY)
+ */
+struct iucv_message {
+	u32 id;
+	u32 audit;
+	u32 class;
+	u32 tag;
+	u32 length;
+	u32 reply_size;
+	u8  rmmsg[8];
+	u8  flags;
+};
+
+/*
+ * struct iucv_handler
+ *
+ * A vector of functions that handle IUCV interrupts. Each functions gets
+ * a parameter area as defined by the CP Programming Services and private
+ * pointer that is provided by the user of the interface.
+ */
+struct iucv_handler {
+	 /*
+	  * The path_pending function is called after an iucv interrupt
+	  * type 0x01 has been received. The base code allocates a path
+	  * structure and "asks" the handler if this path belongs to the
+	  * handler. To accept the path the path_pending function needs
+	  * to call iucv_path_accept and return 0. If the callback returns
+	  * a value != 0 the iucv base code will continue with the next
+	  * handler. The order in which the path_pending functions are
+	  * called is the order of the registration of the iucv handlers
+	  * to the base code.
+	  */
+	int  (*path_pending)(struct iucv_path *, u8 ipvmid[8], u8 ipuser[16]);
+	/*
+	 * The path_complete function is called after an iucv interrupt
+	 * type 0x02 has been received for a path that has been established
+	 * for this handler with iucv_path_connect and got accepted by the
+	 * peer with iucv_path_accept.
+	 */
+	void (*path_complete)(struct iucv_path *, u8 ipuser[16]);
+	 /*
+	  * The path_severed function is called after an iucv interrupt
+	  * type 0x03 has been received. The communication peer shutdown
+	  * his end of the communication path. The path still exists and
+	  * remaining messages can be received until a iucv_path_sever
+	  * shuts down the other end of the path as well.
+	  */
+	void (*path_severed)(struct iucv_path *, u8 ipuser[16]);
+	/*
+	 * The path_quiesced function is called after an icuv interrupt
+	 * type 0x04 has been received. The communication peer has quiesced
+	 * the path. Delivery of messages is stopped until iucv_path_resume
+	 * has been called.
+	 */
+	void (*path_quiesced)(struct iucv_path *, u8 ipuser[16]);
+	/*
+	 * The path_resumed function is called after an icuv interrupt
+	 * type 0x05 has been received. The communication peer has resumed
+	 * the path.
+	 */
+	void (*path_resumed)(struct iucv_path *, u8 ipuser[16]);
+	/*
+	 * The message_pending function is called after an icuv interrupt
+	 * type 0x06 or type 0x07 has been received. A new message is
+	 * availabe and can be received with iucv_message_receive.
+	 */
+	void (*message_pending)(struct iucv_path *, struct iucv_message *);
+	/*
+	 * The message_complete function is called after an icuv interrupt
+	 * type 0x08 or type 0x09 has been received. A message send with
+	 * iucv_message_send2way has been replied to. The reply can be
+	 * received with iucv_message_receive.
+	 */
+	void (*message_complete)(struct iucv_path *, struct iucv_message *);
+
+	struct list_head list;
+	struct list_head paths;
+};
+
+/**
+ * iucv_register:
+ * @handler: address of iucv handler structure
+ * @smp: != 0 indicates that the handler can deal with out of order messages
+ *
+ * Registers a driver with IUCV.
+ *
+ * Returns 0 on success, -ENOMEM if the memory allocation for the pathid
+ * table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus.
+ */
+int iucv_register(struct iucv_handler *handler, int smp);
+
+/**
+ * iucv_unregister
+ * @handler:  address of iucv handler structure
+ * @smp: != 0 indicates that the handler can deal with out of order messages
+ *
+ * Unregister driver from IUCV.
+ */
+void iucv_unregister(struct iucv_handler *handle, int smp);
+
+/**
+ * iucv_path_alloc
+ * @msglim: initial message limit
+ * @flags: initial flags
+ * @gfp: kmalloc allocation flag
+ *
+ * Allocate a new path structure for use with iucv_connect.
+ *
+ * Returns NULL if the memory allocation failed or a pointer to the
+ * path structure.
+ */
+static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp)
+{
+	struct iucv_path *path;
+
+	path = kzalloc(sizeof(struct iucv_path), gfp);
+	if (path) {
+		path->msglim = msglim;
+		path->flags = flags;
+	}
+	return path;
+}
+
+/**
+ * iucv_path_free
+ * @path: address of iucv path structure
+ *
+ * Frees a path structure.
+ */
+static inline void iucv_path_free(struct iucv_path *path)
+{
+	kfree(path);
+}
+
+/**
+ * iucv_path_accept
+ * @path: address of iucv path structure
+ * @handler: address of iucv handler structure
+ * @userdata: 16 bytes of data reflected to the communication partner
+ * @private: private data passed to interrupt handlers for this path
+ *
+ * This function is issued after the user received a connection pending
+ * external interrupt and now wishes to complete the IUCV communication path.
+ *
+ * Returns the result of the CP IUCV call.
+ */
+int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
+		     u8 userdata[16], void *private);
+
+/**
+ * iucv_path_connect
+ * @path: address of iucv path structure
+ * @handler: address of iucv handler structure
+ * @userid: 8-byte user identification
+ * @system: 8-byte target system identification
+ * @userdata: 16 bytes of data reflected to the communication partner
+ * @private: private data passed to interrupt handlers for this path
+ *
+ * This function establishes an IUCV path. Although the connect may complete
+ * successfully, you are not able to use the path until you receive an IUCV
+ * Connection Complete external interrupt.
+ *
+ * Returns the result of the CP IUCV call.
+ */
+int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
+		      u8 userid[8], u8 system[8], u8 userdata[16],
+		      void *private);
+
+/**
+ * iucv_path_quiesce:
+ * @path: address of iucv path structure
+ * @userdata: 16 bytes of data reflected to the communication partner
+ *
+ * This function temporarily suspends incoming messages on an IUCV path.
+ * You can later reactivate the path by invoking the iucv_resume function.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16]);
+
+/**
+ * iucv_path_resume:
+ * @path: address of iucv path structure
+ * @userdata: 16 bytes of data reflected to the communication partner
+ *
+ * This function resumes incoming messages on an IUCV path that has
+ * been stopped with iucv_path_quiesce.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_path_resume(struct iucv_path *path, u8 userdata[16]);
+
+/**
+ * iucv_path_sever
+ * @path: address of iucv path structure
+ * @userdata: 16 bytes of data reflected to the communication partner
+ *
+ * This function terminates an IUCV path.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_path_sever(struct iucv_path *path, u8 userdata[16]);
+
+/**
+ * iucv_message_purge
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @srccls: source class of message
+ *
+ * Cancels a message you have sent.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
+		       u32 srccls);
+
+/**
+ * iucv_message_receive
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: flags that affect how the message is received (IUCV_IPBUFLST)
+ * @buffer: address of data buffer or address of struct iucv_array
+ * @size: length of data buffer
+ * @residual:
+ *
+ * This function receives messages that are being sent to you over
+ * established paths. This function will deal with RMDATA messages
+ * embedded in struct iucv_message as well.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
+			 u8 flags, void *buffer, size_t size, size_t *residual);
+
+/**
+ * iucv_message_reject
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ *
+ * The reject function refuses a specified message. Between the time you
+ * are notified of a message and the time that you complete the message,
+ * the message may be rejected.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg);
+
+/**
+ * iucv_message_reply
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: how the reply is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST)
+ * @reply: address of data buffer or address of struct iucv_array
+ * @size: length of reply data buffer
+ *
+ * This function responds to the two-way messages that you receive. You
+ * must identify completely the message to which you wish to reply. ie,
+ * pathid, msgid, and trgcls. Prmmsg signifies the data is moved into
+ * the parameter list.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
+		       u8 flags, void *reply, size_t size);
+
+/**
+ * iucv_message_send
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST)
+ * @srccls: source class of message
+ * @buffer: address of data buffer or address of struct iucv_array
+ * @size: length of send buffer
+ *
+ * This function transmits data to another application. Data to be
+ * transmitted is in a buffer and this is a one-way message and the
+ * receiver will not reply to the message.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
+		      u8 flags, u32 srccls, void *buffer, size_t size);
+
+/**
+ * iucv_message_send2way
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: how the message is sent and the reply is received
+ *	   (IUCV_IPRMDATA, IUCV_IPBUFLST, IUCV_IPPRTY, IUCV_ANSLST)
+ * @srccls: source class of message
+ * @buffer: address of data buffer or address of struct iucv_array
+ * @size: length of send buffer
+ * @ansbuf: address of answer buffer or address of struct iucv_array
+ * @asize: size of reply buffer
+ *
+ * This function transmits data to another application. Data to be
+ * transmitted is in a buffer. The receiver of the send is expected to
+ * reply to the message and a buffer is provided into which IUCV moves
+ * the reply to this message.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
+			  u8 flags, u32 srccls, void *buffer, size_t size,
+			  void *answer, size_t asize, size_t *residual);
diff --git a/net/Kconfig b/net/Kconfig
index 7dfc94920697..915657832d94 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -37,6 +37,7 @@ config NETDEBUG
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
+source "net/iucv/Kconfig"
 
 config INET
 	bool "TCP/IP networking"
diff --git a/net/Makefile b/net/Makefile
index ad4d14f4bb29..4854ac506313 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_IP_SCTP)		+= sctp/
 obj-$(CONFIG_IEEE80211)		+= ieee80211/
 obj-$(CONFIG_TIPC)		+= tipc/
 obj-$(CONFIG_NETLABEL)		+= netlabel/
+obj-$(CONFIG_IUCV)		+= iucv/
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig
new file mode 100644
index 000000000000..45ecf0b62d52
--- /dev/null
+++ b/net/iucv/Kconfig
@@ -0,0 +1,7 @@
+config IUCV
+	tristate "IUCV support (VM only)"
+	depends on S390
+	help
+	  Select this option if you want to use inter-user communication under
+	  VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast
+	  communication link between VM guests.
diff --git a/net/iucv/Makefile b/net/iucv/Makefile
new file mode 100644
index 000000000000..875941720d6a
--- /dev/null
+++ b/net/iucv/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for IUCV
+#
+
+obj-$(CONFIG_IUCV)	+= iucv.o
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
new file mode 100644
index 000000000000..1b10d576f222
--- /dev/null
+++ b/net/iucv/iucv.c
@@ -0,0 +1,1619 @@
+/*
+ * IUCV base infrastructure.
+ *
+ * Copyright 2001, 2006 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Author(s):
+ *    Original source:
+ *	Alan Altmark (Alan_Altmark@us.ibm.com)	Sept. 2000
+ *	Xenia Tkatschow (xenia@us.ibm.com)
+ *    2Gb awareness and general cleanup:
+ *	Fritz Elfert (elfert@de.ibm.com, felfert@millenux.com)
+ *    Rewritten for af_iucv:
+ *	Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * Documentation used:
+ *    The original source
+ *    CP Programming Service, IBM document # SC24-5760
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <net/iucv/iucv.h>
+#include <asm/atomic.h>
+#include <asm/ebcdic.h>
+#include <asm/io.h>
+#include <asm/s390_ext.h>
+#include <asm/s390_rdev.h>
+#include <asm/smp.h>
+
+/*
+ * FLAGS:
+ * All flags are defined in the field IPFLAGS1 of each function
+ * and can be found in CP Programming Services.
+ * IPSRCCLS - Indicates you have specified a source class.
+ * IPTRGCLS - Indicates you have specified a target class.
+ * IPFGPID  - Indicates you have specified a pathid.
+ * IPFGMID  - Indicates you have specified a message ID.
+ * IPNORPY  - Indicates a one-way message. No reply expected.
+ * IPALL    - Indicates that all paths are affected.
+ */
+#define IUCV_IPSRCCLS	0x01
+#define IUCV_IPTRGCLS	0x01
+#define IUCV_IPFGPID	0x02
+#define IUCV_IPFGMID	0x04
+#define IUCV_IPNORPY	0x10
+#define IUCV_IPALL	0x80
+
+static int iucv_bus_match (struct device *dev, struct device_driver *drv)
+{
+	return 0;
+}
+
+struct bus_type iucv_bus = {
+	.name = "iucv",
+	.match = iucv_bus_match,
+};
+
+struct device *iucv_root;
+static int iucv_available;
+
+/* General IUCV interrupt structure */
+struct iucv_irq_data {
+	u16 ippathid;
+	u8  ipflags1;
+	u8  iptype;
+	u32 res2[8];
+};
+
+struct iucv_work {
+	struct list_head list;
+	struct iucv_irq_data data;
+};
+
+static LIST_HEAD(iucv_work_queue);
+static DEFINE_SPINLOCK(iucv_work_lock);
+
+static struct iucv_irq_data *iucv_irq_data;
+static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE;
+static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE;
+
+static void iucv_tasklet_handler(unsigned long);
+static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_handler,0);
+
+enum iucv_command_codes {
+	IUCV_QUERY = 0,
+	IUCV_RETRIEVE_BUFFER = 2,
+	IUCV_SEND = 4,
+	IUCV_RECEIVE = 5,
+	IUCV_REPLY = 6,
+	IUCV_REJECT = 8,
+	IUCV_PURGE = 9,
+	IUCV_ACCEPT = 10,
+	IUCV_CONNECT = 11,
+	IUCV_DECLARE_BUFFER = 12,
+	IUCV_QUIESCE = 13,
+	IUCV_RESUME = 14,
+	IUCV_SEVER = 15,
+	IUCV_SETMASK = 16,
+};
+
+/*
+ * Error messages that are used with the iucv_sever function. They get
+ * converted to EBCDIC.
+ */
+static char iucv_error_no_listener[16] = "NO LISTENER";
+static char iucv_error_no_memory[16] = "NO MEMORY";
+static char iucv_error_pathid[16] = "INVALID PATHID";
+
+/*
+ * iucv_handler_list: List of registered handlers.
+ */
+static LIST_HEAD(iucv_handler_list);
+
+/*
+ * iucv_path_table: an array of iucv_path structures.
+ */
+static struct iucv_path **iucv_path_table;
+static unsigned long iucv_max_pathid;
+
+/*
+ * iucv_lock: spinlock protecting iucv_handler_list and iucv_pathid_table
+ */
+static DEFINE_SPINLOCK(iucv_table_lock);
+
+/*
+ * iucv_tasklet_cpu: contains the number of the cpu executing the tasklet.
+ * Needed for iucv_path_sever called from tasklet.
+ */
+static int iucv_tasklet_cpu = -1;
+
+/*
+ * Mutex and wait queue for iucv_register/iucv_unregister.
+ */
+static DEFINE_MUTEX(iucv_register_mutex);
+
+/*
+ * Counter for number of non-smp capable handlers.
+ */
+static int iucv_nonsmp_handler;
+
+/*
+ * IUCV control data structure. Used by iucv_path_accept, iucv_path_connect,
+ * iucv_path_quiesce and iucv_path_sever.
+ */
+struct iucv_cmd_control {
+	u16 ippathid;
+	u8  ipflags1;
+	u8  iprcode;
+	u16 ipmsglim;
+	u16 res1;
+	u8  ipvmid[8];
+	u8  ipuser[16];
+	u8  iptarget[8];
+} __attribute__ ((packed,aligned(8)));
+
+/*
+ * Data in parameter list iucv structure. Used by iucv_message_send,
+ * iucv_message_send2way and iucv_message_reply.
+ */
+struct iucv_cmd_dpl {
+	u16 ippathid;
+	u8  ipflags1;
+	u8  iprcode;
+	u32 ipmsgid;
+	u32 iptrgcls;
+	u8  iprmmsg[8];
+	u32 ipsrccls;
+	u32 ipmsgtag;
+	u32 ipbfadr2;
+	u32 ipbfln2f;
+	u32 res;
+} __attribute__ ((packed,aligned(8)));
+
+/*
+ * Data in buffer iucv structure. Used by iucv_message_receive,
+ * iucv_message_reject, iucv_message_send, iucv_message_send2way
+ * and iucv_declare_cpu.
+ */
+struct iucv_cmd_db {
+	u16 ippathid;
+	u8  ipflags1;
+	u8  iprcode;
+	u32 ipmsgid;
+	u32 iptrgcls;
+	u32 ipbfadr1;
+	u32 ipbfln1f;
+	u32 ipsrccls;
+	u32 ipmsgtag;
+	u32 ipbfadr2;
+	u32 ipbfln2f;
+	u32 res;
+} __attribute__ ((packed,aligned(8)));
+
+/*
+ * Purge message iucv structure. Used by iucv_message_purge.
+ */
+struct iucv_cmd_purge {
+	u16 ippathid;
+	u8  ipflags1;
+	u8  iprcode;
+	u32 ipmsgid;
+	u8  ipaudit[3];
+	u8  res1[5];
+	u32 res2;
+	u32 ipsrccls;
+	u32 ipmsgtag;
+	u32 res3[3];
+} __attribute__ ((packed,aligned(8)));
+
+/*
+ * Set mask iucv structure. Used by iucv_enable_cpu.
+ */
+struct iucv_cmd_set_mask {
+	u8  ipmask;
+	u8  res1[2];
+	u8  iprcode;
+	u32 res2[9];
+} __attribute__ ((packed,aligned(8)));
+
+union iucv_param {
+	struct iucv_cmd_control ctrl;
+	struct iucv_cmd_dpl dpl;
+	struct iucv_cmd_db db;
+	struct iucv_cmd_purge purge;
+	struct iucv_cmd_set_mask set_mask;
+};
+
+/*
+ * Anchor for per-cpu IUCV command parameter block.
+ */
+static union iucv_param *iucv_param;
+
+/**
+ * iucv_call_b2f0
+ * @code: identifier of IUCV call to CP.
+ * @parm: pointer to a struct iucv_parm block
+ *
+ * Calls CP to execute IUCV commands.
+ *
+ * Returns the result of the CP IUCV call.
+ */
+static inline int iucv_call_b2f0(int command, union iucv_param *parm)
+{
+	register unsigned long reg0 asm ("0");
+	register unsigned long reg1 asm ("1");
+	int ccode;
+
+	reg0 = command;
+	reg1 = virt_to_phys(parm);
+	asm volatile(
+		"	.long 0xb2f01000\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1)
+		:  "m" (*parm) : "cc");
+	return (ccode == 1) ? parm->ctrl.iprcode : ccode;
+}
+
+/**
+ * iucv_query_maxconn
+ *
+ * Determines the maximum number of connections that may be established.
+ *
+ * Returns the maximum number of connections or -EPERM is IUCV is not
+ * available.
+ */
+static int iucv_query_maxconn(void)
+{
+	register unsigned long reg0 asm ("0");
+	register unsigned long reg1 asm ("1");
+	void *param;
+	int ccode;
+
+	param = kzalloc(sizeof(union iucv_param), GFP_KERNEL|GFP_DMA);
+	if (!param)
+		return -ENOMEM;
+	reg0 = IUCV_QUERY;
+	reg1 = (unsigned long) param;
+	asm volatile (
+		"	.long	0xb2f01000\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc");
+	if (ccode == 0)
+		iucv_max_pathid = reg0;
+	kfree(param);
+	return ccode ? -EPERM : 0;
+}
+
+/**
+ * iucv_allow_cpu
+ * @data: unused
+ *
+ * Allow iucv interrupts on this cpu.
+ */
+static void iucv_allow_cpu(void *data)
+{
+	int cpu = smp_processor_id();
+	union iucv_param *parm;
+
+	/*
+	 * Enable all iucv interrupts.
+	 * ipmask contains bits for the different interrupts
+	 *	0x80 - Flag to allow nonpriority message pending interrupts
+	 *	0x40 - Flag to allow priority message pending interrupts
+	 *	0x20 - Flag to allow nonpriority message completion interrupts
+	 *	0x10 - Flag to allow priority message completion interrupts
+	 *	0x08 - Flag to allow IUCV control interrupts
+	 */
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	parm->set_mask.ipmask = 0xf8;
+	iucv_call_b2f0(IUCV_SETMASK, parm);
+
+	/* Set indication that iucv interrupts are allowed for this cpu. */
+	cpu_set(cpu, iucv_irq_cpumask);
+}
+
+/**
+ * iucv_block_cpu
+ * @data: unused
+ *
+ * Block iucv interrupts on this cpu.
+ */
+static void iucv_block_cpu(void *data)
+{
+	int cpu = smp_processor_id();
+	union iucv_param *parm;
+
+	/* Disable all iucv interrupts. */
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	iucv_call_b2f0(IUCV_SETMASK, parm);
+
+	/* Clear indication that iucv interrupts are allowed for this cpu. */
+	cpu_clear(cpu, iucv_irq_cpumask);
+}
+
+/**
+ * iucv_declare_cpu
+ * @data: unused
+ *
+ * Declare a interupt buffer on this cpu.
+ */
+static void iucv_declare_cpu(void *data)
+{
+	int cpu = smp_processor_id();
+	union iucv_param *parm;
+	int rc;
+
+	if (cpu_isset(cpu, iucv_buffer_cpumask))
+		return;
+
+	/* Declare interrupt buffer. */
+	parm = percpu_ptr(iucv_param, cpu);
+	memset(parm, 0, sizeof(union iucv_param));
+	parm->db.ipbfadr1 = virt_to_phys(percpu_ptr(iucv_irq_data, cpu));
+	rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm);
+	if (rc) {
+		char *err = "Unknown";
+		switch(rc) {
+		case 0x03:
+			err = "Directory error";
+			break;
+		case 0x0a:
+			err = "Invalid length";
+			break;
+		case 0x13:
+			err = "Buffer already exists";
+			break;
+		case 0x3e:
+			err = "Buffer overlap";
+			break;
+		case 0x5c:
+			err = "Paging or storage error";
+			break;
+		}
+		printk(KERN_WARNING "iucv_register: iucv_declare_buffer "
+		       "on cpu %i returned error 0x%02x (%s)\n", cpu, rc, err);
+		return;
+	}
+
+	/* Set indication that an iucv buffer exists for this cpu. */
+	cpu_set(cpu, iucv_buffer_cpumask);
+
+	if (iucv_nonsmp_handler == 0 || cpus_empty(iucv_irq_cpumask))
+		/* Enable iucv interrupts on this cpu. */
+		iucv_allow_cpu(NULL);
+	else
+		/* Disable iucv interrupts on this cpu. */
+		iucv_block_cpu(NULL);
+}
+
+/**
+ * iucv_retrieve_cpu
+ * @data: unused
+ *
+ * Retrieve interrupt buffer on this cpu.
+ */
+static void iucv_retrieve_cpu(void *data)
+{
+	int cpu = smp_processor_id();
+	union iucv_param *parm;
+
+	if (!cpu_isset(cpu, iucv_buffer_cpumask))
+		return;
+
+	/* Block iucv interrupts. */
+	iucv_block_cpu(NULL);
+
+	/* Retrieve interrupt buffer. */
+	parm = percpu_ptr(iucv_param, cpu);
+	iucv_call_b2f0(IUCV_RETRIEVE_BUFFER, parm);
+
+	/* Clear indication that an iucv buffer exists for this cpu. */
+	cpu_clear(cpu, iucv_buffer_cpumask);
+}
+
+/**
+ * iucv_setmask_smp
+ *
+ * Allow iucv interrupts on all cpus.
+ */
+static void iucv_setmask_mp(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		/* Enable all cpus with a declared buffer. */
+		if (cpu_isset(cpu, iucv_buffer_cpumask) &&
+		    !cpu_isset(cpu, iucv_irq_cpumask))
+			smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, cpu);
+}
+
+/**
+ * iucv_setmask_up
+ *
+ * Allow iucv interrupts on a single cpus.
+ */
+static void iucv_setmask_up(void)
+{
+	cpumask_t cpumask;
+	int cpu;
+
+	/* Disable all cpu but the first in cpu_irq_cpumask. */
+	cpumask = iucv_irq_cpumask;
+	cpu_clear(first_cpu(iucv_irq_cpumask), cpumask);
+	for_each_cpu_mask(cpu, cpumask)
+		smp_call_function_on(iucv_block_cpu, NULL, 0, 1, cpu);
+}
+
+/**
+ * iucv_enable
+ *
+ * This function makes iucv ready for use. It allocates the pathid
+ * table, declares an iucv interrupt buffer and enables the iucv
+ * interrupts. Called when the first user has registered an iucv
+ * handler.
+ */
+static int iucv_enable(void)
+{
+	size_t alloc_size;
+	int cpu, rc;
+
+	rc = -ENOMEM;
+	alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
+	iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
+	if (!iucv_path_table)
+		goto out;
+	/* Declare per cpu buffers. */
+	rc = -EIO;
+	for_each_online_cpu(cpu)
+		smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu);
+	if (cpus_empty(iucv_buffer_cpumask))
+		/* No cpu could declare an iucv buffer. */
+		goto out_path;
+	return 0;
+
+out_path:
+	kfree(iucv_path_table);
+out:
+	return rc;
+}
+
+/**
+ * iucv_disable
+ *
+ * This function shuts down iucv. It disables iucv interrupts, retrieves
+ * the iucv interrupt buffer and frees the pathid table. Called after the
+ * last user unregister its iucv handler.
+ */
+static void iucv_disable(void)
+{
+	on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1);
+	kfree(iucv_path_table);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
+				     unsigned long action, void *hcpu)
+{
+	cpumask_t cpumask;
+	long cpu = (long) hcpu;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		if (!percpu_populate(iucv_irq_data,
+				     sizeof(struct iucv_irq_data),
+				     GFP_KERNEL|GFP_DMA, cpu))
+			return NOTIFY_BAD;
+		if (!percpu_populate(iucv_param, sizeof(union iucv_param),
+				     GFP_KERNEL|GFP_DMA, cpu)) {
+			percpu_depopulate(iucv_irq_data, cpu);
+			return NOTIFY_BAD;
+		}
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_DEAD:
+		percpu_depopulate(iucv_param, cpu);
+		percpu_depopulate(iucv_irq_data, cpu);
+		break;
+	case CPU_ONLINE:
+	case CPU_DOWN_FAILED:
+		smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		cpumask = iucv_buffer_cpumask;
+		cpu_clear(cpu, cpumask);
+		if (cpus_empty(cpumask))
+			/* Can't offline last IUCV enabled cpu. */
+			return NOTIFY_BAD;
+		smp_call_function_on(iucv_retrieve_cpu, NULL, 0, 1, cpu);
+		if (cpus_empty(iucv_irq_cpumask))
+			smp_call_function_on(iucv_allow_cpu, NULL, 0, 1,
+					     first_cpu(iucv_buffer_cpumask));
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block iucv_cpu_notifier = {
+	.notifier_call = iucv_cpu_notify,
+};
+#endif
+
+/**
+ * iucv_sever_pathid
+ * @pathid: path identification number.
+ * @userdata: 16-bytes of user data.
+ *
+ * Sever an iucv path to free up the pathid. Used internally.
+ */
+static int iucv_sever_pathid(u16 pathid, u8 userdata[16])
+{
+	union iucv_param *parm;
+
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	if (userdata)
+		memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser));
+	parm->ctrl.ippathid = pathid;
+	return iucv_call_b2f0(IUCV_SEVER, parm);
+}
+
+/**
+ * __iucv_cleanup_pathid
+ * @dummy: unused dummy argument
+ *
+ * Nop function called via smp_call_function to force work items from
+ * pending external iucv interrupts to the work queue.
+ */
+static void __iucv_cleanup_pathid(void *dummy)
+{
+}
+
+/**
+ * iucv_cleanup_pathid
+ * @pathid: 16 bit pathid
+ *
+ * Function called after a path has been severed to find all remaining
+ * work items for the now stale pathid. The caller needs to hold the
+ * iucv_table_lock.
+ */
+static void iucv_cleanup_pathid(u16 pathid)
+{
+	struct iucv_work *p, *n;
+
+	/*
+	 * Path is severed, the pathid can be reused immediatly on
+	 * a iucv connect or a connection pending interrupt.
+	 * iucv_path_connect and connection pending interrupt will
+	 * wait until the iucv_table_lock is released before the
+	 * recycled pathid enters the system.
+	 * Force remaining interrupts to the work queue, then
+	 * scan the work queue for items of this path.
+	 */
+	smp_call_function(__iucv_cleanup_pathid, NULL, 0, 1);
+	spin_lock_irq(&iucv_work_lock);
+	list_for_each_entry_safe(p, n, &iucv_work_queue, list) {
+		/* Remove work items for pathid except connection pending */
+		if (p->data.ippathid == pathid && p->data.iptype != 0x01) {
+			list_del(&p->list);
+			kfree(p);
+		}
+	}
+	spin_unlock_irq(&iucv_work_lock);
+}
+
+/**
+ * iucv_register:
+ * @handler: address of iucv handler structure
+ * @smp: != 0 indicates that the handler can deal with out of order messages
+ *
+ * Registers a driver with IUCV.
+ *
+ * Returns 0 on success, -ENOMEM if the memory allocation for the pathid
+ * table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus.
+ */
+int iucv_register(struct iucv_handler *handler, int smp)
+{
+	int rc;
+
+	if (!iucv_available)
+		return -ENOSYS;
+	mutex_lock(&iucv_register_mutex);
+	if (!smp)
+		iucv_nonsmp_handler++;
+	if (list_empty(&iucv_handler_list)) {
+		rc = iucv_enable();
+		if (rc)
+			goto out_mutex;
+	} else if (!smp && iucv_nonsmp_handler == 1)
+		iucv_setmask_up();
+	INIT_LIST_HEAD(&handler->paths);
+
+	spin_lock_irq(&iucv_table_lock);
+	list_add_tail(&handler->list, &iucv_handler_list);
+	spin_unlock_irq(&iucv_table_lock);
+	rc = 0;
+out_mutex:
+	mutex_unlock(&iucv_register_mutex);
+	return rc;
+}
+
+/**
+ * iucv_unregister
+ * @handler:  address of iucv handler structure
+ * @smp: != 0 indicates that the handler can deal with out of order messages
+ *
+ * Unregister driver from IUCV.
+ */
+void iucv_unregister(struct iucv_handler *handler, int smp)
+{
+	struct iucv_path *p, *n;
+
+	mutex_lock(&iucv_register_mutex);
+	spin_lock_bh(&iucv_table_lock);
+	/* Remove handler from the iucv_handler_list. */
+	list_del_init(&handler->list);
+	/* Sever all pathids still refering to the handler. */
+	list_for_each_entry_safe(p, n, &handler->paths, list) {
+		iucv_sever_pathid(p->pathid, NULL);
+		iucv_path_table[p->pathid] = NULL;
+		list_del(&p->list);
+		iucv_cleanup_pathid(p->pathid);
+		iucv_path_free(p);
+	}
+	spin_unlock_bh(&iucv_table_lock);
+	if (!smp)
+		iucv_nonsmp_handler--;
+	if (list_empty(&iucv_handler_list))
+		iucv_disable();
+	else if (!smp && iucv_nonsmp_handler == 0)
+		iucv_setmask_mp();
+	mutex_unlock(&iucv_register_mutex);
+}
+
+/**
+ * iucv_path_accept
+ * @path: address of iucv path structure
+ * @handler: address of iucv handler structure
+ * @userdata: 16 bytes of data reflected to the communication partner
+ * @private: private data passed to interrupt handlers for this path
+ *
+ * This function is issued after the user received a connection pending
+ * external interrupt and now wishes to complete the IUCV communication path.
+ *
+ * Returns the result of the CP IUCV call.
+ */
+int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
+		     u8 userdata[16], void *private)
+{
+	union iucv_param *parm;
+	int rc;
+
+	local_bh_disable();
+	/* Prepare parameter block. */
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	parm->ctrl.ippathid = path->pathid;
+	parm->ctrl.ipmsglim = path->msglim;
+	if (userdata)
+		memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser));
+	parm->ctrl.ipflags1 = path->flags;
+
+	rc = iucv_call_b2f0(IUCV_ACCEPT, parm);
+	if (!rc) {
+		path->private = private;
+		path->msglim = parm->ctrl.ipmsglim;
+		path->flags = parm->ctrl.ipflags1;
+	}
+	local_bh_enable();
+	return rc;
+}
+
+/**
+ * iucv_path_connect
+ * @path: address of iucv path structure
+ * @handler: address of iucv handler structure
+ * @userid: 8-byte user identification
+ * @system: 8-byte target system identification
+ * @userdata: 16 bytes of data reflected to the communication partner
+ * @private: private data passed to interrupt handlers for this path
+ *
+ * This function establishes an IUCV path. Although the connect may complete
+ * successfully, you are not able to use the path until you receive an IUCV
+ * Connection Complete external interrupt.
+ *
+ * Returns the result of the CP IUCV call.
+ */
+int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
+		      u8 userid[8], u8 system[8], u8 userdata[16],
+		      void *private)
+{
+	union iucv_param *parm;
+	int rc;
+
+	preempt_disable();
+	if (iucv_tasklet_cpu != smp_processor_id())
+		spin_lock_bh(&iucv_table_lock);
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	parm->ctrl.ipmsglim = path->msglim;
+	parm->ctrl.ipflags1 = path->flags;
+	if (userid) {
+		memcpy(parm->ctrl.ipvmid, userid, sizeof(parm->ctrl.ipvmid));
+		ASCEBC(parm->ctrl.ipvmid, sizeof(parm->ctrl.ipvmid));
+		EBC_TOUPPER(parm->ctrl.ipvmid, sizeof(parm->ctrl.ipvmid));
+	}
+	if (system) {
+		memcpy(parm->ctrl.iptarget, system,
+		       sizeof(parm->ctrl.iptarget));
+		ASCEBC(parm->ctrl.iptarget, sizeof(parm->ctrl.iptarget));
+		EBC_TOUPPER(parm->ctrl.iptarget, sizeof(parm->ctrl.iptarget));
+	}
+	if (userdata)
+		memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser));
+
+	rc = iucv_call_b2f0(IUCV_CONNECT, parm);
+	if (!rc) {
+		if (parm->ctrl.ippathid < iucv_max_pathid) {
+			path->pathid = parm->ctrl.ippathid;
+			path->msglim = parm->ctrl.ipmsglim;
+			path->flags = parm->ctrl.ipflags1;
+			path->handler = handler;
+			path->private = private;
+			list_add_tail(&path->list, &handler->paths);
+			iucv_path_table[path->pathid] = path;
+		} else {
+			iucv_sever_pathid(parm->ctrl.ippathid,
+					  iucv_error_pathid);
+			rc = -EIO;
+		}
+	}
+	if (iucv_tasklet_cpu != smp_processor_id())
+		spin_unlock_bh(&iucv_table_lock);
+	preempt_enable();
+	return rc;
+}
+
+/**
+ * iucv_path_quiesce:
+ * @path: address of iucv path structure
+ * @userdata: 16 bytes of data reflected to the communication partner
+ *
+ * This function temporarily suspends incoming messages on an IUCV path.
+ * You can later reactivate the path by invoking the iucv_resume function.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16])
+{
+	union iucv_param *parm;
+	int rc;
+
+	local_bh_disable();
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	if (userdata)
+		memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser));
+	parm->ctrl.ippathid = path->pathid;
+	rc = iucv_call_b2f0(IUCV_QUIESCE, parm);
+	local_bh_enable();
+	return rc;
+}
+
+/**
+ * iucv_path_resume:
+ * @path: address of iucv path structure
+ * @userdata: 16 bytes of data reflected to the communication partner
+ *
+ * This function resumes incoming messages on an IUCV path that has
+ * been stopped with iucv_path_quiesce.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_path_resume(struct iucv_path *path, u8 userdata[16])
+{
+	union iucv_param *parm;
+	int rc;
+
+	local_bh_disable();
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	if (userdata)
+		memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser));
+	parm->ctrl.ippathid = path->pathid;
+	rc = iucv_call_b2f0(IUCV_RESUME, parm);
+	local_bh_enable();
+	return rc;
+}
+
+/**
+ * iucv_path_sever
+ * @path: address of iucv path structure
+ * @userdata: 16 bytes of data reflected to the communication partner
+ *
+ * This function terminates an IUCV path.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_path_sever(struct iucv_path *path, u8 userdata[16])
+{
+	int rc;
+
+
+	preempt_disable();
+	if (iucv_tasklet_cpu != smp_processor_id())
+		spin_lock_bh(&iucv_table_lock);
+	rc = iucv_sever_pathid(path->pathid, userdata);
+	if (!rc) {
+		iucv_path_table[path->pathid] = NULL;
+		list_del_init(&path->list);
+		iucv_cleanup_pathid(path->pathid);
+	}
+	if (iucv_tasklet_cpu != smp_processor_id())
+		spin_unlock_bh(&iucv_table_lock);
+	preempt_enable();
+	return rc;
+}
+
+/**
+ * iucv_message_purge
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @srccls: source class of message
+ *
+ * Cancels a message you have sent.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
+		       u32 srccls)
+{
+	union iucv_param *parm;
+	int rc;
+
+	local_bh_disable();
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	parm->purge.ippathid = path->pathid;
+	parm->purge.ipmsgid = msg->id;
+	parm->purge.ipsrccls = srccls;
+	parm->purge.ipflags1 = IUCV_IPSRCCLS | IUCV_IPFGMID | IUCV_IPFGPID;
+	rc = iucv_call_b2f0(IUCV_PURGE, parm);
+	if (!rc) {
+		msg->audit = (*(u32 *) &parm->purge.ipaudit) >> 8;
+		msg->tag = parm->purge.ipmsgtag;
+	}
+	local_bh_enable();
+	return rc;
+}
+
+/**
+ * iucv_message_receive
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: how the message is received (IUCV_IPBUFLST)
+ * @buffer: address of data buffer or address of struct iucv_array
+ * @size: length of data buffer
+ * @residual:
+ *
+ * This function receives messages that are being sent to you over
+ * established paths. This function will deal with RMDATA messages
+ * embedded in struct iucv_message as well.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
+			 u8 flags, void *buffer, size_t size, size_t *residual)
+{
+	union iucv_param *parm;
+	struct iucv_array *array;
+	u8 *rmmsg;
+	size_t copy;
+	int rc;
+
+	if (msg->flags & IUCV_IPRMDATA) {
+		/*
+		 * Message is 8 bytes long and has been stored to the
+		 * message descriptor itself.
+		 */
+		rc = (size < 8) ? 5 : 0;
+		if (residual)
+			*residual = abs(size - 8);
+		rmmsg = msg->rmmsg;
+		if (flags & IUCV_IPBUFLST) {
+			/* Copy to struct iucv_array. */
+			size = (size < 8) ? size : 8;
+			for (array = buffer; size > 0; array++) {
+				copy = min_t(size_t, size, array->length);
+				memcpy((u8 *)(addr_t) array->address,
+				       rmmsg, copy);
+				rmmsg += copy;
+				size -= copy;
+			}
+		} else {
+			/* Copy to direct buffer. */
+			memcpy(buffer, rmmsg, min_t(size_t, size, 8));
+		}
+		return 0;
+	}
+
+	local_bh_disable();
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	parm->db.ipbfadr1 = (u32)(addr_t) buffer;
+	parm->db.ipbfln1f = (u32) size;
+	parm->db.ipmsgid = msg->id;
+	parm->db.ippathid = path->pathid;
+	parm->db.iptrgcls = msg->class;
+	parm->db.ipflags1 = (flags | IUCV_IPFGPID |
+			     IUCV_IPFGMID | IUCV_IPTRGCLS);
+	rc = iucv_call_b2f0(IUCV_RECEIVE, parm);
+	if (!rc || rc == 5) {
+		msg->flags = parm->db.ipflags1;
+		if (residual)
+			*residual = parm->db.ipbfln1f;
+	}
+	local_bh_enable();
+	return rc;
+}
+
+/**
+ * iucv_message_reject
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ *
+ * The reject function refuses a specified message. Between the time you
+ * are notified of a message and the time that you complete the message,
+ * the message may be rejected.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg)
+{
+	union iucv_param *parm;
+	int rc;
+
+	local_bh_disable();
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	parm->db.ippathid = path->pathid;
+	parm->db.ipmsgid = msg->id;
+	parm->db.iptrgcls = msg->class;
+	parm->db.ipflags1 = (IUCV_IPTRGCLS | IUCV_IPFGMID | IUCV_IPFGPID);
+	rc = iucv_call_b2f0(IUCV_REJECT, parm);
+	local_bh_enable();
+	return rc;
+}
+
+/**
+ * iucv_message_reply
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: how the reply is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST)
+ * @reply: address of reply data buffer or address of struct iucv_array
+ * @size: length of reply data buffer
+ *
+ * This function responds to the two-way messages that you receive. You
+ * must identify completely the message to which you wish to reply. ie,
+ * pathid, msgid, and trgcls. Prmmsg signifies the data is moved into
+ * the parameter list.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
+		       u8 flags, void *reply, size_t size)
+{
+	union iucv_param *parm;
+	int rc;
+
+	local_bh_disable();
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	if (flags & IUCV_IPRMDATA) {
+		parm->dpl.ippathid = path->pathid;
+		parm->dpl.ipflags1 = flags;
+		parm->dpl.ipmsgid = msg->id;
+		parm->dpl.iptrgcls = msg->class;
+		memcpy(parm->dpl.iprmmsg, reply, min_t(size_t, size, 8));
+	} else {
+		parm->db.ipbfadr1 = (u32)(addr_t) reply;
+		parm->db.ipbfln1f = (u32) size;
+		parm->db.ippathid = path->pathid;
+		parm->db.ipflags1 = flags;
+		parm->db.ipmsgid = msg->id;
+		parm->db.iptrgcls = msg->class;
+	}
+	rc = iucv_call_b2f0(IUCV_REPLY, parm);
+	local_bh_enable();
+	return rc;
+}
+
+/**
+ * iucv_message_send
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST)
+ * @srccls: source class of message
+ * @buffer: address of send buffer or address of struct iucv_array
+ * @size: length of send buffer
+ *
+ * This function transmits data to another application. Data to be
+ * transmitted is in a buffer and this is a one-way message and the
+ * receiver will not reply to the message.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
+		      u8 flags, u32 srccls, void *buffer, size_t size)
+{
+	union iucv_param *parm;
+	int rc;
+
+	local_bh_disable();
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	if (flags & IUCV_IPRMDATA) {
+		/* Message of 8 bytes can be placed into the parameter list. */
+		parm->dpl.ippathid = path->pathid;
+		parm->dpl.ipflags1 = flags | IUCV_IPNORPY;
+		parm->dpl.iptrgcls = msg->class;
+		parm->dpl.ipsrccls = srccls;
+		parm->dpl.ipmsgtag = msg->tag;
+		memcpy(parm->dpl.iprmmsg, buffer, 8);
+	} else {
+		parm->db.ipbfadr1 = (u32)(addr_t) buffer;
+		parm->db.ipbfln1f = (u32) size;
+		parm->db.ippathid = path->pathid;
+		parm->db.ipflags1 = flags | IUCV_IPNORPY;
+		parm->db.iptrgcls = msg->class;
+		parm->db.ipsrccls = srccls;
+		parm->db.ipmsgtag = msg->tag;
+	}
+	rc = iucv_call_b2f0(IUCV_SEND, parm);
+	if (!rc)
+		msg->id = parm->db.ipmsgid;
+	local_bh_enable();
+	return rc;
+}
+
+/**
+ * iucv_message_send2way
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: how the message is sent and the reply is received
+ *	   (IUCV_IPRMDATA, IUCV_IPBUFLST, IUCV_IPPRTY, IUCV_ANSLST)
+ * @srccls: source class of message
+ * @buffer: address of send buffer or address of struct iucv_array
+ * @size: length of send buffer
+ * @ansbuf: address of answer buffer or address of struct iucv_array
+ * @asize: size of reply buffer
+ *
+ * This function transmits data to another application. Data to be
+ * transmitted is in a buffer. The receiver of the send is expected to
+ * reply to the message and a buffer is provided into which IUCV moves
+ * the reply to this message.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
+			  u8 flags, u32 srccls, void *buffer, size_t size,
+			  void *answer, size_t asize, size_t *residual)
+{
+	union iucv_param *parm;
+	int rc;
+
+	local_bh_disable();
+	parm = percpu_ptr(iucv_param, smp_processor_id());
+	memset(parm, 0, sizeof(union iucv_param));
+	if (flags & IUCV_IPRMDATA) {
+		parm->dpl.ippathid = path->pathid;
+		parm->dpl.ipflags1 = path->flags;	/* priority message */
+		parm->dpl.iptrgcls = msg->class;
+		parm->dpl.ipsrccls = srccls;
+		parm->dpl.ipmsgtag = msg->tag;
+		parm->dpl.ipbfadr2 = (u32)(addr_t) answer;
+		parm->dpl.ipbfln2f = (u32) asize;
+		memcpy(parm->dpl.iprmmsg, buffer, 8);
+	} else {
+		parm->db.ippathid = path->pathid;
+		parm->db.ipflags1 = path->flags;	/* priority message */
+		parm->db.iptrgcls = msg->class;
+		parm->db.ipsrccls = srccls;
+		parm->db.ipmsgtag = msg->tag;
+		parm->db.ipbfadr1 = (u32)(addr_t) buffer;
+		parm->db.ipbfln1f = (u32) size;
+		parm->db.ipbfadr2 = (u32)(addr_t) answer;
+		parm->db.ipbfln2f = (u32) asize;
+	}
+	rc = iucv_call_b2f0(IUCV_SEND, parm);
+	if (!rc)
+		msg->id = parm->db.ipmsgid;
+	local_bh_enable();
+	return rc;
+}
+
+/**
+ * iucv_path_pending
+ * @data: Pointer to external interrupt buffer
+ *
+ * Process connection pending work item. Called from tasklet while holding
+ * iucv_table_lock.
+ */
+struct iucv_path_pending {
+	u16 ippathid;
+	u8  ipflags1;
+	u8  iptype;
+	u16 ipmsglim;
+	u16 res1;
+	u8  ipvmid[8];
+	u8  ipuser[16];
+	u32 res3;
+	u8  ippollfg;
+	u8  res4[3];
+} __attribute__ ((packed));
+
+static void iucv_path_pending(struct iucv_irq_data *data)
+{
+	struct iucv_path_pending *ipp = (void *) data;
+	struct iucv_handler *handler;
+	struct iucv_path *path;
+	char *error;
+
+	BUG_ON(iucv_path_table[ipp->ippathid]);
+	/* New pathid, handler found. Create a new path struct. */
+	error = iucv_error_no_memory;
+	path = iucv_path_alloc(ipp->ipmsglim, ipp->ipflags1, GFP_ATOMIC);
+	if (!path)
+		goto out_sever;
+	path->pathid = ipp->ippathid;
+	iucv_path_table[path->pathid] = path;
+	EBCASC(ipp->ipvmid, 8);
+
+	/* Call registered handler until one is found that wants the path. */
+	list_for_each_entry(handler, &iucv_handler_list, list) {
+		if (!handler->path_pending)
+			continue;
+		/*
+		 * Add path to handler to allow a call to iucv_path_sever
+		 * inside the path_pending function. If the handler returns
+		 * an error remove the path from the handler again.
+		 */
+		list_add(&path->list, &handler->paths);
+		path->handler = handler;
+		if (!handler->path_pending(path, ipp->ipvmid, ipp->ipuser))
+			return;
+		list_del(&path->list);
+		path->handler = NULL;
+	}
+	/* No handler wanted the path. */
+	iucv_path_table[path->pathid] = NULL;
+	iucv_path_free(path);
+	error = iucv_error_no_listener;
+out_sever:
+	iucv_sever_pathid(ipp->ippathid, error);
+}
+
+/**
+ * iucv_path_complete
+ * @data: Pointer to external interrupt buffer
+ *
+ * Process connection complete work item. Called from tasklet while holding
+ * iucv_table_lock.
+ */
+struct iucv_path_complete {
+	u16 ippathid;
+	u8  ipflags1;
+	u8  iptype;
+	u16 ipmsglim;
+	u16 res1;
+	u8  res2[8];
+	u8  ipuser[16];
+	u32 res3;
+	u8  ippollfg;
+	u8  res4[3];
+} __attribute__ ((packed));
+
+static void iucv_path_complete(struct iucv_irq_data *data)
+{
+	struct iucv_path_complete *ipc = (void *) data;
+	struct iucv_path *path = iucv_path_table[ipc->ippathid];
+
+	BUG_ON(!path || !path->handler);
+	if (path->handler->path_complete)
+		path->handler->path_complete(path, ipc->ipuser);
+}
+
+/**
+ * iucv_path_severed
+ * @data: Pointer to external interrupt buffer
+ *
+ * Process connection severed work item. Called from tasklet while holding
+ * iucv_table_lock.
+ */
+struct iucv_path_severed {
+	u16 ippathid;
+	u8  res1;
+	u8  iptype;
+	u32 res2;
+	u8  res3[8];
+	u8  ipuser[16];
+	u32 res4;
+	u8  ippollfg;
+	u8  res5[3];
+} __attribute__ ((packed));
+
+static void iucv_path_severed(struct iucv_irq_data *data)
+{
+	struct iucv_path_severed *ips = (void *) data;
+	struct iucv_path *path = iucv_path_table[ips->ippathid];
+
+	BUG_ON(!path || !path->handler);
+	if (path->handler->path_severed)
+		path->handler->path_severed(path, ips->ipuser);
+	else {
+		iucv_sever_pathid(path->pathid, NULL);
+		iucv_path_table[path->pathid] = NULL;
+		list_del_init(&path->list);
+		iucv_cleanup_pathid(path->pathid);
+		iucv_path_free(path);
+	}
+}
+
+/**
+ * iucv_path_quiesced
+ * @data: Pointer to external interrupt buffer
+ *
+ * Process connection quiesced work item. Called from tasklet while holding
+ * iucv_table_lock.
+ */
+struct iucv_path_quiesced {
+	u16 ippathid;
+	u8  res1;
+	u8  iptype;
+	u32 res2;
+	u8  res3[8];
+	u8  ipuser[16];
+	u32 res4;
+	u8  ippollfg;
+	u8  res5[3];
+} __attribute__ ((packed));
+
+static void iucv_path_quiesced(struct iucv_irq_data *data)
+{
+	struct iucv_path_quiesced *ipq = (void *) data;
+	struct iucv_path *path = iucv_path_table[ipq->ippathid];
+
+	BUG_ON(!path || !path->handler);
+	if (path->handler->path_quiesced)
+		path->handler->path_quiesced(path, ipq->ipuser);
+}
+
+/**
+ * iucv_path_resumed
+ * @data: Pointer to external interrupt buffer
+ *
+ * Process connection resumed work item. Called from tasklet while holding
+ * iucv_table_lock.
+ */
+struct iucv_path_resumed {
+	u16 ippathid;
+	u8  res1;
+	u8  iptype;
+	u32 res2;
+	u8  res3[8];
+	u8  ipuser[16];
+	u32 res4;
+	u8  ippollfg;
+	u8  res5[3];
+} __attribute__ ((packed));
+
+static void iucv_path_resumed(struct iucv_irq_data *data)
+{
+	struct iucv_path_resumed *ipr = (void *) data;
+	struct iucv_path *path = iucv_path_table[ipr->ippathid];
+
+	BUG_ON(!path || !path->handler);
+	if (path->handler->path_resumed)
+		path->handler->path_resumed(path, ipr->ipuser);
+}
+
+/**
+ * iucv_message_complete
+ * @data: Pointer to external interrupt buffer
+ *
+ * Process message complete work item. Called from tasklet while holding
+ * iucv_table_lock.
+ */
+struct iucv_message_complete {
+	u16 ippathid;
+	u8  ipflags1;
+	u8  iptype;
+	u32 ipmsgid;
+	u32 ipaudit;
+	u8  iprmmsg[8];
+	u32 ipsrccls;
+	u32 ipmsgtag;
+	u32 res;
+	u32 ipbfln2f;
+	u8  ippollfg;
+	u8  res2[3];
+} __attribute__ ((packed));
+
+static void iucv_message_complete(struct iucv_irq_data *data)
+{
+	struct iucv_message_complete *imc = (void *) data;
+	struct iucv_path *path = iucv_path_table[imc->ippathid];
+	struct iucv_message msg;
+
+	BUG_ON(!path || !path->handler);
+	if (path->handler->message_complete) {
+		msg.flags = imc->ipflags1;
+		msg.id = imc->ipmsgid;
+		msg.audit = imc->ipaudit;
+		memcpy(msg.rmmsg, imc->iprmmsg, 8);
+		msg.class = imc->ipsrccls;
+		msg.tag = imc->ipmsgtag;
+		msg.length = imc->ipbfln2f;
+		path->handler->message_complete(path, &msg);
+	}
+}
+
+/**
+ * iucv_message_pending
+ * @data: Pointer to external interrupt buffer
+ *
+ * Process message pending work item. Called from tasklet while holding
+ * iucv_table_lock.
+ */
+struct iucv_message_pending {
+	u16 ippathid;
+	u8  ipflags1;
+	u8  iptype;
+	u32 ipmsgid;
+	u32 iptrgcls;
+	union {
+		u32 iprmmsg1_u32;
+		u8  iprmmsg1[4];
+	} ln1msg1;
+	union {
+		u32 ipbfln1f;
+		u8  iprmmsg2[4];
+	} ln1msg2;
+	u32 res1[3];
+	u32 ipbfln2f;
+	u8  ippollfg;
+	u8  res2[3];
+} __attribute__ ((packed));
+
+static void iucv_message_pending(struct iucv_irq_data *data)
+{
+	struct iucv_message_pending *imp = (void *) data;
+	struct iucv_path *path = iucv_path_table[imp->ippathid];
+	struct iucv_message msg;
+
+	BUG_ON(!path || !path->handler);
+	if (path->handler->message_pending) {
+		msg.flags = imp->ipflags1;
+		msg.id = imp->ipmsgid;
+		msg.class = imp->iptrgcls;
+		if (imp->ipflags1 & IUCV_IPRMDATA) {
+			memcpy(msg.rmmsg, imp->ln1msg1.iprmmsg1, 8);
+			msg.length = 8;
+		} else
+			msg.length = imp->ln1msg2.ipbfln1f;
+		msg.reply_size = imp->ipbfln2f;
+		path->handler->message_pending(path, &msg);
+	}
+}
+
+/**
+ * iucv_tasklet_handler:
+ *
+ * This tasklet loops over the queue of irq buffers created by
+ * iucv_external_interrupt, calls the appropriate action handler
+ * and then frees the buffer.
+ */
+static void iucv_tasklet_handler(unsigned long ignored)
+{
+	typedef void iucv_irq_fn(struct iucv_irq_data *);
+	static iucv_irq_fn *irq_fn[] = {
+		[0x01] = iucv_path_pending,
+		[0x02] = iucv_path_complete,
+		[0x03] = iucv_path_severed,
+		[0x04] = iucv_path_quiesced,
+		[0x05] = iucv_path_resumed,
+		[0x06] = iucv_message_complete,
+		[0x07] = iucv_message_complete,
+		[0x08] = iucv_message_pending,
+		[0x09] = iucv_message_pending,
+	};
+	struct iucv_work *p;
+
+	/* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
+	spin_lock(&iucv_table_lock);
+	iucv_tasklet_cpu = smp_processor_id();
+
+	spin_lock_irq(&iucv_work_lock);
+	while (!list_empty(&iucv_work_queue)) {
+		p = list_entry(iucv_work_queue.next, struct iucv_work, list);
+		list_del_init(&p->list);
+		spin_unlock_irq(&iucv_work_lock);
+		irq_fn[p->data.iptype](&p->data);
+		kfree(p);
+		spin_lock_irq(&iucv_work_lock);
+	}
+	spin_unlock_irq(&iucv_work_lock);
+
+	iucv_tasklet_cpu = -1;
+	spin_unlock(&iucv_table_lock);
+}
+
+/**
+ * iucv_external_interrupt
+ * @code: irq code
+ *
+ * Handles external interrupts coming in from CP.
+ * Places the interrupt buffer on a queue and schedules iucv_tasklet_handler().
+ */
+static void iucv_external_interrupt(u16 code)
+{
+	struct iucv_irq_data *p;
+	struct iucv_work *work;
+
+	p = percpu_ptr(iucv_irq_data, smp_processor_id());
+	if (p->ippathid >= iucv_max_pathid) {
+		printk(KERN_WARNING "iucv_do_int: Got interrupt with "
+		       "pathid %d > max_connections (%ld)\n",
+		       p->ippathid, iucv_max_pathid - 1);
+		iucv_sever_pathid(p->ippathid, iucv_error_no_listener);
+		return;
+	}
+	if (p->iptype  < 0x01 || p->iptype > 0x09) {
+		printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n");
+		return;
+	}
+	work = kmalloc(sizeof(struct iucv_work), GFP_ATOMIC);
+	if (!work) {
+		printk(KERN_WARNING "iucv_external_interrupt: out of memory\n");
+		return;
+	}
+	memcpy(&work->data, p, sizeof(work->data));
+	spin_lock(&iucv_work_lock);
+	list_add_tail(&work->list, &iucv_work_queue);
+	spin_unlock(&iucv_work_lock);
+	tasklet_schedule(&iucv_tasklet);
+}
+
+/**
+ * iucv_init
+ *
+ * Allocates and initializes various data structures.
+ */
+static int iucv_init(void)
+{
+	int rc;
+
+	if (!MACHINE_IS_VM) {
+		rc = -EPROTONOSUPPORT;
+		goto out;
+	}
+	rc = iucv_query_maxconn();
+	if (rc)
+		goto out;
+	rc = register_external_interrupt (0x4000, iucv_external_interrupt);
+	if (rc)
+		goto out;
+	rc = bus_register(&iucv_bus);
+	if (rc)
+		goto out_int;
+	iucv_root = s390_root_dev_register("iucv");
+	if (IS_ERR(iucv_root)) {
+		rc = PTR_ERR(iucv_root);
+		goto out_bus;
+	}
+	/* Note: GFP_DMA used used to get memory below 2G */
+	iucv_irq_data = percpu_alloc(sizeof(struct iucv_irq_data),
+				     GFP_KERNEL|GFP_DMA);
+	if (!iucv_irq_data) {
+		rc = -ENOMEM;
+		goto out_root;
+	}
+	/* Allocate parameter blocks. */
+	iucv_param = percpu_alloc(sizeof(union iucv_param),
+				  GFP_KERNEL|GFP_DMA);
+	if (!iucv_param) {
+		rc = -ENOMEM;
+		goto out_extint;
+	}
+	register_hotcpu_notifier(&iucv_cpu_notifier);
+	ASCEBC(iucv_error_no_listener, 16);
+	ASCEBC(iucv_error_no_memory, 16);
+	ASCEBC(iucv_error_pathid, 16);
+	iucv_available = 1;
+	return 0;
+
+out_extint:
+	percpu_free(iucv_irq_data);
+out_root:
+	s390_root_dev_unregister(iucv_root);
+out_bus:
+	bus_unregister(&iucv_bus);
+out_int:
+	unregister_external_interrupt(0x4000, iucv_external_interrupt);
+out:
+	return rc;
+}
+
+/**
+ * iucv_exit
+ *
+ * Frees everything allocated from iucv_init.
+ */
+static void iucv_exit(void)
+{
+	struct iucv_work *p, *n;
+
+	spin_lock_irq(&iucv_work_lock);
+	list_for_each_entry_safe(p, n, &iucv_work_queue, list)
+		kfree(p);
+	spin_unlock_irq(&iucv_work_lock);
+	unregister_hotcpu_notifier(&iucv_cpu_notifier);
+	percpu_free(iucv_param);
+	percpu_free(iucv_irq_data);
+	s390_root_dev_unregister(iucv_root);
+	bus_unregister(&iucv_bus);
+	unregister_external_interrupt(0x4000, iucv_external_interrupt);
+}
+
+subsys_initcall(iucv_init);
+module_exit(iucv_exit);
+
+/**
+ * Export all public stuff
+ */
+EXPORT_SYMBOL (iucv_bus);
+EXPORT_SYMBOL (iucv_root);
+EXPORT_SYMBOL (iucv_register);
+EXPORT_SYMBOL (iucv_unregister);
+EXPORT_SYMBOL (iucv_path_accept);
+EXPORT_SYMBOL (iucv_path_connect);
+EXPORT_SYMBOL (iucv_path_quiesce);
+EXPORT_SYMBOL (iucv_path_sever);
+EXPORT_SYMBOL (iucv_message_purge);
+EXPORT_SYMBOL (iucv_message_receive);
+EXPORT_SYMBOL (iucv_message_reject);
+EXPORT_SYMBOL (iucv_message_reply);
+EXPORT_SYMBOL (iucv_message_send);
+EXPORT_SYMBOL (iucv_message_send2way);
+
+MODULE_AUTHOR("(C) 2001 IBM Corp. by Fritz Elfert (felfert@millenux.com)");
+MODULE_DESCRIPTION("Linux for S/390 IUCV lowlevel driver");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From eac3731bd04c7131478722a3c148b78774553116 Mon Sep 17 00:00:00 2001
From: Jennifer Hunt <jenhunt@us.ibm.com>
Date: Thu, 8 Feb 2007 13:51:54 -0800
Subject: [S390]: Add AF_IUCV socket support

From: Jennifer Hunt <jenhunt@us.ibm.com>

This patch adds AF_IUCV socket support.

Signed-off-by: Frank Pavlic <fpavlic@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/defconfig        |    1 +
 include/linux/net.h        |    2 +-
 include/linux/socket.h     |    4 +-
 include/net/iucv/af_iucv.h |  106 +++++
 net/iucv/Kconfig           |    8 +
 net/iucv/Makefile          |    1 +
 net/iucv/af_iucv.c         | 1077 ++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 1197 insertions(+), 2 deletions(-)
 create mode 100644 include/net/iucv/af_iucv.h
 create mode 100644 net/iucv/af_iucv.c

(limited to 'include')

diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 46f2d4578e6b..1406400bf3ea 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -180,6 +180,7 @@ CONFIG_XFRM=y
 # CONFIG_XFRM_SUB_POLICY is not set
 CONFIG_NET_KEY=y
 CONFIG_IUCV=m
+CONFIG_AFIUCV=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
diff --git a/include/linux/net.h b/include/linux/net.h
index f28d8a2e2c91..4db21e63d8d2 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -24,7 +24,7 @@
 struct poll_table_struct;
 struct inode;
 
-#define NPROTO		32		/* should be enough for now..	*/
+#define NPROTO		33		/* should be enough for now..	*/
 
 #define SYS_SOCKET	1		/* sys_socket(2)		*/
 #define SYS_BIND	2		/* sys_bind(2)			*/
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 92cd38efad7f..fcd35a210e7f 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -187,7 +187,8 @@ struct ucred {
 #define AF_LLC		26	/* Linux LLC			*/
 #define AF_TIPC		30	/* TIPC sockets			*/
 #define AF_BLUETOOTH	31	/* Bluetooth sockets 		*/
-#define AF_MAX		32	/* For now.. */
+#define AF_IUCV		32	/* IUCV sockets			*/
+#define AF_MAX		33	/* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -220,6 +221,7 @@ struct ucred {
 #define PF_LLC		AF_LLC
 #define PF_TIPC		AF_TIPC
 #define PF_BLUETOOTH	AF_BLUETOOTH
+#define PF_IUCV		AF_IUCV
 #define PF_MAX		AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
new file mode 100644
index 000000000000..04d1abb72d25
--- /dev/null
+++ b/include/net/iucv/af_iucv.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2006 IBM Corporation
+ * IUCV protocol stack for Linux on zSeries
+ * Version 1.0
+ * Author(s): Jennifer Hunt <jenhunt@us.ibm.com>
+ *
+ */
+
+#ifndef __AFIUCV_H
+#define __AFIUCV_H
+
+#include <asm/types.h>
+#include <asm/byteorder.h>
+#include <linux/list.h>
+#include <linux/poll.h>
+#include <linux/socket.h>
+
+#ifndef AF_IUCV
+#define AF_IUCV		32
+#define PF_IUCV		AF_IUCV
+#endif
+
+/* Connection and socket states */
+enum {
+	IUCV_CONNECTED = 1,
+	IUCV_OPEN,
+	IUCV_BOUND,
+	IUCV_LISTEN,
+	IUCV_SEVERED,
+	IUCV_DISCONN,
+	IUCV_CLOSED
+};
+
+#define IUCV_QUEUELEN_DEFAULT	65535
+#define IUCV_CONN_TIMEOUT	(HZ * 40)
+#define IUCV_DISCONN_TIMEOUT	(HZ * 2)
+#define IUCV_CONN_IDLE_TIMEOUT	(HZ * 60)
+#define IUCV_BUFSIZE_DEFAULT	32768
+
+/* IUCV socket address */
+struct sockaddr_iucv {
+	sa_family_t	siucv_family;
+	unsigned short	siucv_port;		/* Reserved */
+	unsigned int	siucv_addr;		/* Reserved */
+	char		siucv_nodeid[8];	/* Reserved */
+	char		siucv_user_id[8];	/* Guest User Id */
+	char		siucv_name[8];		/* Application Name */
+};
+
+
+/* Common socket structures and functions */
+
+#define iucv_sk(__sk) ((struct iucv_sock *) __sk)
+
+struct iucv_sock {
+	struct sock		sk;
+	char			src_user_id[8];
+	char			src_name[8];
+	char			dst_user_id[8];
+	char			dst_name[8];
+	struct list_head	accept_q;
+	struct sock		*parent;
+	struct iucv_path	*path;
+	struct sk_buff_head	send_skb_q;
+	unsigned int		send_tag;
+};
+
+struct iucv_sock_list {
+	struct hlist_head head;
+	rwlock_t	  lock;
+	atomic_t	  autobind_name;
+};
+
+static void iucv_sock_destruct(struct sock *sk);
+static void iucv_sock_cleanup_listen(struct sock *parent);
+static void iucv_sock_kill(struct sock *sk);
+static void iucv_sock_close(struct sock *sk);
+static int  iucv_sock_create(struct socket *sock, int proto);
+static int  iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
+			int addr_len);
+static int  iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
+			      int alen, int flags);
+static int  iucv_sock_listen(struct socket *sock, int backlog);
+static int  iucv_sock_accept(struct socket *sock, struct socket *newsock,
+			     int flags);
+static int  iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
+			      int *len, int peer);
+static int  iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+			      struct msghdr *msg, size_t len);
+static int  iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+			      struct msghdr *msg, size_t len, int flags);
+unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
+			    poll_table *wait);
+static int iucv_sock_release(struct socket *sock);
+static int iucv_sock_shutdown(struct socket *sock, int how);
+
+void iucv_sock_link(struct iucv_sock_list *l, struct sock *s);
+void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s);
+int  iucv_sock_wait_state(struct sock *sk, int state, int state2,
+			  unsigned long timeo);
+int  iucv_sock_wait_cnt(struct sock *sk, unsigned long timeo);
+void iucv_accept_enqueue(struct sock *parent, struct sock *sk);
+void iucv_accept_unlink(struct sock *sk);
+struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock);
+
+#endif /* __IUCV_H */
diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig
index 45ecf0b62d52..f8fcc3d10327 100644
--- a/net/iucv/Kconfig
+++ b/net/iucv/Kconfig
@@ -5,3 +5,11 @@ config IUCV
 	  Select this option if you want to use inter-user communication under
 	  VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast
 	  communication link between VM guests.
+
+config AFIUCV
+	tristate "AF_IUCV support (VM only)"
+	depends on IUCV
+	help
+	  Select this option if you want to use inter-user communication under
+	  VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast
+	  communication link between VM guests.
diff --git a/net/iucv/Makefile b/net/iucv/Makefile
index 875941720d6a..7bfdc8532675 100644
--- a/net/iucv/Makefile
+++ b/net/iucv/Makefile
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_IUCV)	+= iucv.o
+obj-$(CONFIG_AFIUCV)	+= af_iucv.o
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
new file mode 100644
index 000000000000..acc94214bde6
--- /dev/null
+++ b/net/iucv/af_iucv.c
@@ -0,0 +1,1077 @@
+/*
+ *  linux/net/iucv/af_iucv.c
+ *
+ *  IUCV protocol stack for Linux on zSeries
+ *
+ *  Copyright 2006 IBM Corporation
+ *
+ *  Author(s):	Jennifer Hunt <jenhunt@us.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <net/sock.h>
+#include <asm/ebcdic.h>
+#include <asm/cpcmd.h>
+#include <linux/kmod.h>
+
+#include <net/iucv/iucv.h>
+#include <net/iucv/af_iucv.h>
+
+#define CONFIG_IUCV_SOCK_DEBUG 1
+
+#define IPRMDATA 0x80
+#define VERSION "1.0"
+
+static char iucv_userid[80];
+
+static struct proto_ops iucv_sock_ops;
+
+static struct proto iucv_proto = {
+	.name		= "AF_IUCV",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct iucv_sock),
+};
+
+/* Call Back functions */
+static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
+static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
+static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]);
+static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], u8 ipuser[16]);
+static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]);
+
+static struct iucv_sock_list iucv_sk_list = {
+	.lock = RW_LOCK_UNLOCKED,
+	.autobind_name = ATOMIC_INIT(0)
+};
+
+static struct iucv_handler af_iucv_handler = {
+	.path_pending	  = iucv_callback_connreq,
+	.path_complete	  = iucv_callback_connack,
+	.path_severed	  = iucv_callback_connrej,
+	.message_pending  = iucv_callback_rx,
+	.message_complete = iucv_callback_txdone
+};
+
+static inline void high_nmcpy(unsigned char *dst, char *src)
+{
+       memcpy(dst, src, 8);
+}
+
+static inline void low_nmcpy(unsigned char *dst, char *src)
+{
+       memcpy(&dst[8], src, 8);
+}
+
+/* Timers */
+static void iucv_sock_timeout(unsigned long arg)
+{
+	struct sock *sk = (struct sock *)arg;
+
+	bh_lock_sock(sk);
+	sk->sk_err = ETIMEDOUT;
+	sk->sk_state_change(sk);
+	bh_unlock_sock(sk);
+
+	iucv_sock_kill(sk);
+	sock_put(sk);
+}
+
+static void iucv_sock_clear_timer(struct sock *sk)
+{
+	sk_stop_timer(sk, &sk->sk_timer);
+}
+
+static void iucv_sock_init_timer(struct sock *sk)
+{
+	init_timer(&sk->sk_timer);
+	sk->sk_timer.function = iucv_sock_timeout;
+	sk->sk_timer.data = (unsigned long)sk;
+}
+
+static struct sock *__iucv_get_sock_by_name(char *nm)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+
+	sk_for_each(sk, node, &iucv_sk_list.head)
+		if (!memcmp(&iucv_sk(sk)->src_name, nm, 8))
+			return sk;
+
+	return NULL;
+}
+
+static void iucv_sock_destruct(struct sock *sk)
+{
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+}
+
+/* Cleanup Listen */
+static void iucv_sock_cleanup_listen(struct sock *parent)
+{
+	struct sock *sk;
+
+	/* Close non-accepted connections */
+	while ((sk = iucv_accept_dequeue(parent, NULL))) {
+		iucv_sock_close(sk);
+		iucv_sock_kill(sk);
+	}
+
+	parent->sk_state = IUCV_CLOSED;
+	sock_set_flag(parent, SOCK_ZAPPED);
+}
+
+/* Kill socket */
+static void iucv_sock_kill(struct sock *sk)
+{
+	if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
+		return;
+
+	iucv_sock_unlink(&iucv_sk_list, sk);
+	sock_set_flag(sk, SOCK_DEAD);
+	sock_put(sk);
+}
+
+/* Close an IUCV socket */
+static void iucv_sock_close(struct sock *sk)
+{
+	unsigned char user_data[16];
+	struct iucv_sock *iucv = iucv_sk(sk);
+	int err;
+
+	iucv_sock_clear_timer(sk);
+	lock_sock(sk);
+
+	switch(sk->sk_state) {
+	case IUCV_LISTEN:
+		iucv_sock_cleanup_listen(sk);
+		break;
+
+	case IUCV_CONNECTED:
+	case IUCV_DISCONN:
+		err = 0;
+		if (iucv->path) {
+			low_nmcpy(user_data, iucv->src_name);
+			high_nmcpy(user_data, iucv->dst_name);
+			ASCEBC(user_data, sizeof(user_data));
+			err = iucv_path_sever(iucv->path, user_data);
+			iucv_path_free(iucv->path);
+			iucv->path = NULL;
+		}
+
+		sk->sk_state = IUCV_CLOSED;
+		sk->sk_state_change(sk);
+		sk->sk_err = ECONNRESET;
+		sk->sk_state_change(sk);
+
+		skb_queue_purge(&iucv->send_skb_q);
+
+		sock_set_flag(sk, SOCK_ZAPPED);
+		break;
+
+	default:
+		sock_set_flag(sk, SOCK_ZAPPED);
+		break;
+	};
+
+	release_sock(sk);
+	iucv_sock_kill(sk);
+}
+
+static void iucv_sock_init(struct sock *sk, struct sock *parent)
+{
+	if (parent)
+		sk->sk_type = parent->sk_type;
+}
+
+static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
+{
+	struct sock *sk;
+
+	sk = sk_alloc(PF_IUCV, prio, &iucv_proto, 1);
+	if (!sk)
+		return NULL;
+
+	sock_init_data(sock, sk);
+	INIT_LIST_HEAD(&iucv_sk(sk)->accept_q);
+	skb_queue_head_init(&iucv_sk(sk)->send_skb_q);
+	iucv_sk(sk)->send_tag = 0;
+
+	sk->sk_destruct = iucv_sock_destruct;
+	sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
+	sk->sk_allocation = GFP_DMA;
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	sk->sk_protocol = proto;
+	sk->sk_state	= IUCV_OPEN;
+
+	iucv_sock_init_timer(sk);
+
+	iucv_sock_link(&iucv_sk_list, sk);
+	return sk;
+}
+
+/* Create an IUCV socket */
+static int iucv_sock_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+
+	if (sock->type != SOCK_STREAM)
+		return -ESOCKTNOSUPPORT;
+
+	sock->state = SS_UNCONNECTED;
+	sock->ops = &iucv_sock_ops;
+
+	sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL);
+	if (!sk)
+		return -ENOMEM;
+
+	iucv_sock_init(sk, NULL);
+
+	return 0;
+}
+
+void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk)
+{
+	write_lock_bh(&l->lock);
+	sk_add_node(sk, &l->head);
+	write_unlock_bh(&l->lock);
+}
+
+void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk)
+{
+	write_lock_bh(&l->lock);
+	sk_del_node_init(sk);
+	write_unlock_bh(&l->lock);
+}
+
+void iucv_accept_enqueue(struct sock *parent, struct sock *sk)
+{
+	sock_hold(sk);
+	list_add_tail(&iucv_sk(sk)->accept_q, &iucv_sk(parent)->accept_q);
+	iucv_sk(sk)->parent = parent;
+	parent->sk_ack_backlog++;
+}
+
+void iucv_accept_unlink(struct sock *sk)
+{
+	list_del_init(&iucv_sk(sk)->accept_q);
+	iucv_sk(sk)->parent->sk_ack_backlog--;
+	iucv_sk(sk)->parent = NULL;
+	sock_put(sk);
+}
+
+struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
+{
+	struct iucv_sock *isk, *n;
+	struct sock *sk;
+
+	list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){
+		sk = (struct sock *) isk;
+		lock_sock(sk);
+
+		if (sk->sk_state == IUCV_CLOSED) {
+			release_sock(sk);
+			iucv_accept_unlink(sk);
+			continue;
+		}
+
+		if (sk->sk_state == IUCV_CONNECTED ||
+		    sk->sk_state == IUCV_SEVERED ||
+		    !newsock) {
+			iucv_accept_unlink(sk);
+			if (newsock)
+				sock_graft(sk, newsock);
+
+			if (sk->sk_state == IUCV_SEVERED)
+				sk->sk_state = IUCV_DISCONN;
+
+			release_sock(sk);
+			return sk;
+		}
+
+		release_sock(sk);
+	}
+	return NULL;
+}
+
+int iucv_sock_wait_state(struct sock *sk, int state, int state2,
+			 unsigned long timeo)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int err = 0;
+
+	add_wait_queue(sk->sk_sleep, &wait);
+	while (sk->sk_state != state && sk->sk_state != state2) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (!timeo) {
+			err = -EAGAIN;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			break;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+
+		err = sock_error(sk);
+		if (err)
+			break;
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return err;
+}
+
+/* Bind an unbound socket */
+static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
+			  int addr_len)
+{
+	struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
+	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv;
+	int err;
+
+	/* Verify the input sockaddr */
+	if (!addr || addr->sa_family != AF_IUCV)
+		return -EINVAL;
+
+	lock_sock(sk);
+	if (sk->sk_state != IUCV_OPEN) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	write_lock_bh(&iucv_sk_list.lock);
+
+	iucv = iucv_sk(sk);
+	if (__iucv_get_sock_by_name(sa->siucv_name)) {
+		err = -EADDRINUSE;
+		goto done_unlock;
+	}
+	if (iucv->path) {
+		err = 0;
+		goto done_unlock;
+	}
+
+	/* Bind the socket */
+	memcpy(iucv->src_name, sa->siucv_name, 8);
+
+	/* Copy the user id */
+	memcpy(iucv->src_user_id, iucv_userid, 8);
+	sk->sk_state = IUCV_BOUND;
+	err = 0;
+
+done_unlock:
+	/* Release the socket list lock */
+	write_unlock_bh(&iucv_sk_list.lock);
+done:
+	release_sock(sk);
+	return err;
+}
+
+/* Automatically bind an unbound socket */
+static int iucv_sock_autobind(struct sock *sk)
+{
+	struct iucv_sock *iucv = iucv_sk(sk);
+	char query_buffer[80];
+	char name[12];
+	int err = 0;
+
+	/* Set the userid and name */
+	cpcmd("QUERY USERID", query_buffer, sizeof(query_buffer), &err);
+	if (unlikely(err))
+		return -EPROTO;
+
+	memcpy(iucv->src_user_id, query_buffer, 8);
+
+	write_lock_bh(&iucv_sk_list.lock);
+
+	sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name));
+	while (__iucv_get_sock_by_name(name)) {
+		sprintf(name, "%08x",
+			atomic_inc_return(&iucv_sk_list.autobind_name));
+	}
+
+	write_unlock_bh(&iucv_sk_list.lock);
+
+	memcpy(&iucv->src_name, name, 8);
+
+	return err;
+}
+
+/* Connect an unconnected socket */
+static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
+			     int alen, int flags)
+{
+	struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
+	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv;
+	unsigned char user_data[16];
+	int err;
+
+	if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv))
+		return -EINVAL;
+
+	if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND)
+		return -EBADFD;
+
+	if (sk->sk_type != SOCK_STREAM)
+		return -EINVAL;
+
+	iucv = iucv_sk(sk);
+
+	if (sk->sk_state == IUCV_OPEN) {
+		err = iucv_sock_autobind(sk);
+		if (unlikely(err))
+			return err;
+	}
+
+	lock_sock(sk);
+
+	/* Set the destination information */
+	memcpy(iucv_sk(sk)->dst_user_id, sa->siucv_user_id, 8);
+	memcpy(iucv_sk(sk)->dst_name, sa->siucv_name, 8);
+
+	high_nmcpy(user_data, sa->siucv_name);
+	low_nmcpy(user_data, iucv_sk(sk)->src_name);
+	ASCEBC(user_data, sizeof(user_data));
+
+	iucv = iucv_sk(sk);
+	/* Create path. */
+	iucv->path = iucv_path_alloc(IUCV_QUEUELEN_DEFAULT,
+				     IPRMDATA, GFP_KERNEL);
+	err = iucv_path_connect(iucv->path, &af_iucv_handler,
+				sa->siucv_user_id, NULL, user_data, sk);
+	if (err) {
+		iucv_path_free(iucv->path);
+		iucv->path = NULL;
+		err = -ECONNREFUSED;
+		goto done;
+	}
+
+	if (sk->sk_state != IUCV_CONNECTED) {
+		err = iucv_sock_wait_state(sk, IUCV_CONNECTED, IUCV_DISCONN,
+				sock_sndtimeo(sk, flags & O_NONBLOCK));
+	}
+
+	if (sk->sk_state == IUCV_DISCONN) {
+		release_sock(sk);
+		return -ECONNREFUSED;
+	}
+done:
+	release_sock(sk);
+	return err;
+}
+
+/* Move a socket into listening state. */
+static int iucv_sock_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	lock_sock(sk);
+
+	err = -EINVAL;
+	if (sk->sk_state != IUCV_BOUND || sock->type != SOCK_STREAM)
+		goto done;
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_ack_backlog = 0;
+	sk->sk_state = IUCV_LISTEN;
+	err = 0;
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+/* Accept a pending connection */
+static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
+			    int flags)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct sock *sk = sock->sk, *nsk;
+	long timeo;
+	int err = 0;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != IUCV_LISTEN) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+	/* Wait for an incoming connection */
+	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	while (!(nsk = iucv_accept_dequeue(sk, newsock))){
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!timeo) {
+			err = -EAGAIN;
+			break;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+
+		if (sk->sk_state != IUCV_LISTEN) {
+			err = -EBADFD;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			break;
+		}
+	}
+
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+
+	if (err)
+		goto done;
+
+	newsock->state = SS_CONNECTED;
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
+			     int *len, int peer)
+{
+	struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
+	struct sock *sk = sock->sk;
+
+	addr->sa_family = AF_IUCV;
+	*len = sizeof(struct sockaddr_iucv);
+
+	if (peer) {
+		memcpy(siucv->siucv_user_id, iucv_sk(sk)->dst_user_id, 8);
+		memcpy(siucv->siucv_name, &iucv_sk(sk)->dst_name, 8);
+	} else {
+		memcpy(siucv->siucv_user_id, iucv_sk(sk)->src_user_id, 8);
+		memcpy(siucv->siucv_name, iucv_sk(sk)->src_name, 8);
+	}
+	memset(&siucv->siucv_port, 0, sizeof(siucv->siucv_port));
+	memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
+	memset(siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
+
+	return 0;
+}
+
+static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+			     struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv = iucv_sk(sk);
+	struct sk_buff *skb;
+	struct iucv_message txmsg;
+	int err;
+
+	err = sock_error(sk);
+	if (err)
+		return err;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+
+	if (sk->sk_shutdown & SEND_SHUTDOWN) {
+		err = -EPIPE;
+		goto out;
+	}
+
+	if (sk->sk_state == IUCV_CONNECTED){
+		if(!(skb = sock_alloc_send_skb(sk, len,
+				       msg->msg_flags & MSG_DONTWAIT,
+				       &err)))
+			return err;
+
+		if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)){
+			err = -EFAULT;
+			goto fail;
+		}
+
+		txmsg.class = 0;
+		txmsg.tag = iucv->send_tag++;
+		memcpy(skb->cb, &txmsg.tag, 4);
+		skb_queue_tail(&iucv->send_skb_q, skb);
+		err = iucv_message_send(iucv->path, &txmsg, 0, 0,
+					(void *) skb->data, skb->len);
+		if (err) {
+			if (err == 3)
+				printk(KERN_ERR "AF_IUCV msg limit exceeded\n");
+			skb_unlink(skb, &iucv->send_skb_q);
+			err = -EPIPE;
+			goto fail;
+		}
+
+	} else {
+		err = -ENOTCONN;
+		goto out;
+	}
+
+	release_sock(sk);
+	return len;
+
+fail:
+	kfree_skb(skb);
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+			     struct msghdr *msg, size_t len, int flags)
+{
+	int noblock = flags & MSG_DONTWAIT;
+	struct sock *sk = sock->sk;
+	int target, copied = 0;
+	struct sk_buff *skb;
+	int err = 0;
+
+	if (flags & (MSG_OOB))
+		return -EOPNOTSUPP;
+
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb) {
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			return 0;
+		return err;
+	}
+
+	copied = min_t(unsigned int, skb->len, len);
+
+	if (memcpy_toiovec(msg->msg_iov, skb->data, copied)) {
+		skb_queue_head(&sk->sk_receive_queue, skb);
+		if (copied == 0)
+			return -EFAULT;
+	}
+
+	len -= copied;
+
+	/* Mark read part of skb as used */
+	if (!(flags & MSG_PEEK)) {
+		skb_pull(skb, copied);
+
+		if (skb->len) {
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			goto done;
+		}
+
+		kfree_skb(skb);
+	} else
+		skb_queue_head(&sk->sk_receive_queue, skb);
+
+done:
+	return err ? : copied;
+}
+
+static inline unsigned int iucv_accept_poll(struct sock *parent)
+{
+	struct iucv_sock *isk, *n;
+	struct sock *sk;
+
+	list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){
+		sk = (struct sock *) isk;
+
+		if (sk->sk_state == IUCV_CONNECTED)
+			return POLLIN | POLLRDNORM;
+	}
+
+	return 0;
+}
+
+unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
+			    poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask = 0;
+
+	poll_wait(file, sk->sk_sleep, wait);
+
+	if (sk->sk_state == IUCV_LISTEN)
+		return iucv_accept_poll(sk);
+
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLRDHUP;
+
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+			(sk->sk_shutdown & RCV_SHUTDOWN))
+		mask |= POLLIN | POLLRDNORM;
+
+	if (sk->sk_state == IUCV_CLOSED)
+		mask |= POLLHUP;
+
+	if (sock_writeable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+	else
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	return mask;
+}
+
+static int iucv_sock_shutdown(struct socket *sock, int how)
+{
+	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv = iucv_sk(sk);
+	struct iucv_message txmsg;
+	int err = 0;
+	u8 prmmsg[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
+
+	how++;
+
+	if ((how & ~SHUTDOWN_MASK) || !how)
+		return -EINVAL;
+
+	lock_sock(sk);
+	switch(sk->sk_state) {
+	case IUCV_CLOSED:
+		err = -ENOTCONN;
+		goto fail;
+
+	default:
+		sk->sk_shutdown |= how;
+		break;
+	}
+
+	if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) {
+		txmsg.class = 0;
+		txmsg.tag = 0;
+		err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0,
+					(void *) prmmsg, 8);
+		if (err) {
+			switch(err) {
+			case 1:
+				err = -ENOTCONN;
+				break;
+			case 2:
+				err = -ECONNRESET;
+				break;
+			default:
+				err = -ENOTCONN;
+				break;
+			}
+		}
+	}
+
+	if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) {
+		err = iucv_path_quiesce(iucv_sk(sk)->path, NULL);
+		if (err)
+			err = -ENOTCONN;
+
+		skb_queue_purge(&sk->sk_receive_queue);
+	}
+
+	/* Wake up anyone sleeping in poll */
+	sk->sk_state_change(sk);
+
+fail:
+	release_sock(sk);
+	return err;
+}
+
+static int iucv_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	if (!sk)
+		return 0;
+
+	iucv_sock_close(sk);
+
+	/* Unregister with IUCV base support */
+	if (iucv_sk(sk)->path) {
+		iucv_path_sever(iucv_sk(sk)->path, NULL);
+		iucv_path_free(iucv_sk(sk)->path);
+		iucv_sk(sk)->path = NULL;
+	}
+
+	if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime){
+		lock_sock(sk);
+		err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0,
+					   sk->sk_lingertime);
+		release_sock(sk);
+	}
+
+	sock_orphan(sk);
+	iucv_sock_kill(sk);
+	return err;
+}
+
+/* Callback wrappers - called from iucv base support */
+static int iucv_callback_connreq(struct iucv_path *path,
+				 u8 ipvmid[8], u8 ipuser[16])
+{
+	unsigned char user_data[16];
+	unsigned char nuser_data[16];
+	unsigned char src_name[8];
+	struct hlist_node *node;
+	struct sock *sk, *nsk;
+	struct iucv_sock *iucv, *niucv;
+	int err;
+
+	memcpy(src_name, ipuser, 8);
+	EBCASC(src_name, 8);
+	/* Find out if this path belongs to af_iucv. */
+	read_lock(&iucv_sk_list.lock);
+	iucv = NULL;
+	sk_for_each(sk, node, &iucv_sk_list.head)
+		if (sk->sk_state == IUCV_LISTEN &&
+		    !memcmp(&iucv_sk(sk)->src_name, src_name, 8)) {
+			/*
+			 * Found a listening socket with
+			 * src_name == ipuser[0-7].
+			 */
+			iucv = iucv_sk(sk);
+			break;
+		}
+	read_unlock(&iucv_sk_list.lock);
+	if (!iucv)
+		/* No socket found, not one of our paths. */
+		return -EINVAL;
+
+	bh_lock_sock(sk);
+
+	/* Check if parent socket is listening */
+	low_nmcpy(user_data, iucv->src_name);
+	high_nmcpy(user_data, iucv->dst_name);
+	ASCEBC(user_data, sizeof(user_data));
+	if (sk->sk_state != IUCV_LISTEN) {
+		err = iucv_path_sever(path, user_data);
+		goto fail;
+	}
+
+	/* Check for backlog size */
+	if (sk_acceptq_is_full(sk)) {
+		err = iucv_path_sever(path, user_data);
+		goto fail;
+	}
+
+	/* Create the new socket */
+	nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC);
+	if (!nsk){
+		err = iucv_path_sever(path, user_data);
+		goto fail;
+	}
+
+	niucv = iucv_sk(nsk);
+	iucv_sock_init(nsk, sk);
+
+	/* Set the new iucv_sock */
+	memcpy(niucv->dst_name, ipuser + 8, 8);
+	EBCASC(niucv->dst_name, 8);
+	memcpy(niucv->dst_user_id, ipvmid, 8);
+	memcpy(niucv->src_name, iucv->src_name, 8);
+	memcpy(niucv->src_user_id, iucv->src_user_id, 8);
+	niucv->path = path;
+
+	/* Call iucv_accept */
+	high_nmcpy(nuser_data, ipuser + 8);
+	memcpy(nuser_data + 8, niucv->src_name, 8);
+	ASCEBC(nuser_data + 8, 8);
+
+	path->msglim = IUCV_QUEUELEN_DEFAULT;
+	err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk);
+	if (err){
+		err = iucv_path_sever(path, user_data);
+		goto fail;
+	}
+
+	iucv_accept_enqueue(sk, nsk);
+
+	/* Wake up accept */
+	nsk->sk_state = IUCV_CONNECTED;
+	sk->sk_data_ready(sk, 1);
+	err = 0;
+fail:
+	bh_unlock_sock(sk);
+	return 0;
+}
+
+static void iucv_callback_connack(struct iucv_path *path, u8 ipuser[16])
+{
+	struct sock *sk = path->private;
+
+	sk->sk_state = IUCV_CONNECTED;
+	sk->sk_state_change(sk);
+}
+
+static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
+{
+	struct sock *sk = path->private;
+	struct sk_buff *skb;
+	int rc;
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		return;
+
+	skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA);
+	if (!skb) {
+		iucv_message_reject(path, msg);
+		return;
+	}
+
+	if (msg->flags & IPRMDATA) {
+		skb->data = NULL;
+		skb->len = 0;
+	} else {
+		rc = iucv_message_receive(path, msg, 0, skb->data,
+					  msg->length, NULL);
+		if (rc) {
+			kfree_skb(skb);
+			return;
+		}
+
+		skb->h.raw = skb->data;
+		skb->nh.raw = skb->data;
+		skb->len = msg->length;
+	}
+
+	if (sock_queue_rcv_skb(sk, skb))
+		kfree_skb(skb);
+}
+
+static void iucv_callback_txdone(struct iucv_path *path,
+				 struct iucv_message *msg)
+{
+	struct sock *sk = path->private;
+	struct sk_buff *this;
+	struct sk_buff_head *list = &iucv_sk(sk)->send_skb_q;
+	struct sk_buff *list_skb = list->next;
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+
+	do {
+		this = list_skb;
+		list_skb = list_skb->next;
+	} while (memcmp(&msg->tag, this->cb, 4));
+
+	spin_unlock_irqrestore(&list->lock, flags);
+
+	skb_unlink(this, &iucv_sk(sk)->send_skb_q);
+	kfree_skb(this);
+}
+
+static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16])
+{
+	struct sock *sk = path->private;
+
+	if (!list_empty(&iucv_sk(sk)->accept_q))
+		sk->sk_state = IUCV_SEVERED;
+	else
+		sk->sk_state = IUCV_DISCONN;
+
+	sk->sk_state_change(sk);
+}
+
+static struct proto_ops iucv_sock_ops = {
+	.family		= PF_IUCV,
+	.owner		= THIS_MODULE,
+	.release	= iucv_sock_release,
+	.bind		= iucv_sock_bind,
+	.connect	= iucv_sock_connect,
+	.listen		= iucv_sock_listen,
+	.accept		= iucv_sock_accept,
+	.getname	= iucv_sock_getname,
+	.sendmsg	= iucv_sock_sendmsg,
+	.recvmsg	= iucv_sock_recvmsg,
+	.poll		= iucv_sock_poll,
+	.ioctl		= sock_no_ioctl,
+	.mmap		= sock_no_mmap,
+	.socketpair	= sock_no_socketpair,
+	.shutdown	= iucv_sock_shutdown,
+	.setsockopt	= sock_no_setsockopt,
+	.getsockopt	= sock_no_getsockopt
+};
+
+static struct net_proto_family iucv_sock_family_ops = {
+	.family	= AF_IUCV,
+	.owner	= THIS_MODULE,
+	.create	= iucv_sock_create,
+};
+
+static int afiucv_init(void)
+{
+	int err;
+
+	if (!MACHINE_IS_VM) {
+		printk(KERN_ERR "AF_IUCV connection needs VM as base\n");
+		err = -EPROTONOSUPPORT;
+		goto out;
+	}
+	cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
+	if (unlikely(err)) {
+		printk(KERN_ERR "AF_IUCV needs the VM userid\n");
+		err = -EPROTONOSUPPORT;
+		goto out;
+	}
+
+	err = iucv_register(&af_iucv_handler, 0);
+	if (err)
+		goto out;
+	err = proto_register(&iucv_proto, 0);
+	if (err)
+		goto out_iucv;
+	err = sock_register(&iucv_sock_family_ops);
+	if (err)
+		goto out_proto;
+	printk(KERN_INFO "AF_IUCV lowlevel driver initialized\n");
+	return 0;
+
+out_proto:
+	proto_unregister(&iucv_proto);
+out_iucv:
+	iucv_unregister(&af_iucv_handler, 0);
+out:
+	return err;
+}
+
+static void __exit afiucv_exit(void)
+{
+	sock_unregister(PF_IUCV);
+	proto_unregister(&iucv_proto);
+	iucv_unregister(&af_iucv_handler, 0);
+
+	printk(KERN_INFO "AF_IUCV lowlevel driver unloaded\n");
+}
+
+module_init(afiucv_init);
+module_exit(afiucv_exit);
+
+MODULE_AUTHOR("Jennifer Hunt <jenhunt@us.ibm.com>");
+MODULE_DESCRIPTION("IUCV Sockets ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_IUCV);
-- 
cgit v1.2.3


From dbca9b2750e3b1ee6f56a616160ccfc12e8b161f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 8 Feb 2007 14:16:46 -0800
Subject: [NET]: change layout of ehash table

ehash table layout is currently this one :

First half of this table is used by sockets not in TIME_WAIT state
Second half of it is used by sockets in TIME_WAIT state.

This is non optimal because of for a given hash or socket, the two chain heads
are located in separate cache lines.
Moreover the locks of the second half are never used.

If instead of this halving, we use two list heads in inet_ehash_bucket instead
of only one, we probably can avoid one cache miss, and reduce ram usage,
particularly if sizeof(rwlock_t) is big (various CONFIG_DEBUG_SPINLOCK,
CONFIG_DEBUG_LOCK_ALLOC settings). So we still halves the table but we keep
together related chains to speedup lookups and socket state change.

In this patch I did not try to align struct inet_ehash_bucket, but a future
patch could try to make this structure have a convenient size (a power of two
or a multiple of L1_CACHE_SIZE).
I guess rwlock will just vanish as soon as RCU is plugged into ehash :) , so
maybe we dont need to scratch our heads to align the bucket...

Note : In case struct inet_ehash_bucket is not a power of two, we could
probably change alloc_large_system_hash() (in case it use __get_free_pages())
to free the unused space. It currently allocates a big zone, but the last
quarter of it could be freed. Again, this should be a temporary 'problem'.

Patch tested on ipv4 tcp only, but should be OK for IPV6 and DCCP.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 10 +++++-----
 net/dccp/proto.c              |  4 ++--
 net/ipv4/inet_diag.c          |  2 +-
 net/ipv4/inet_hashtables.c    |  2 +-
 net/ipv4/inet_timewait_sock.c |  4 ++--
 net/ipv4/tcp.c                |  7 ++++---
 net/ipv4/tcp_ipv4.c           |  4 ++--
 net/ipv6/inet6_hashtables.c   |  4 ++--
 8 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 34cc76e3ddb4..d27ee8c0da3f 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -34,12 +34,13 @@
 #include <asm/byteorder.h>
 
 /* This is for all connections with a full identity, no wildcards.
- * New scheme, half the table is for TIME_WAIT, the other half is
- * for the rest.  I'll experiment with dynamic table growth later.
+ * One chain is dedicated to TIME_WAIT sockets.
+ * I'll experiment with dynamic table growth later.
  */
 struct inet_ehash_bucket {
 	rwlock_t	  lock;
 	struct hlist_head chain;
+	struct hlist_head twchain;
 };
 
 /* There are a few simple rules, which allow for local port reuse by
@@ -97,8 +98,7 @@ struct inet_hashinfo {
 	 *
 	 *          TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
 	 *
-	 * First half of the table is for sockets not in TIME_WAIT, second half
-	 * is for TIME_WAIT sockets only.
+	 * TIME_WAIT sockets use a separate chain (twchain).
 	 */
 	struct inet_ehash_bucket	*ehash;
 
@@ -369,7 +369,7 @@ static inline struct sock *
 	}
 
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
-	sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
+	sk_for_each(sk, node, &head->twchain) {
 		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
 			goto hit;
 	}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 63b3fa20e14b..48438565d70f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1024,7 +1024,6 @@ static int __init dccp_init(void)
 	do {
 		dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
 					sizeof(struct inet_ehash_bucket);
-		dccp_hashinfo.ehash_size >>= 1;
 		while (dccp_hashinfo.ehash_size &
 		       (dccp_hashinfo.ehash_size - 1))
 			dccp_hashinfo.ehash_size--;
@@ -1037,9 +1036,10 @@ static int __init dccp_init(void)
 		goto out_free_bind_bucket_cachep;
 	}
 
-	for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
+	for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
 		rwlock_init(&dccp_hashinfo.ehash[i].lock);
 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
+		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
 	}
 
 	bhash_order = ehash_order;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 9cd53addb784..8aa7d51e6881 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -778,7 +778,7 @@ next_normal:
 			struct inet_timewait_sock *tw;
 
 			inet_twsk_for_each(tw, node,
-				    &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
+				    &head->twchain) {
 
 				if (num < s_num)
 					goto next_dying;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 8c79c8a4ea5c..150ace18dc75 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -212,7 +212,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	write_lock(&head->lock);
 
 	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+	sk_for_each(sk2, node, &head->twchain) {
 		tw = inet_twsk(sk2);
 
 		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 9f414e35c488..a73cf93cee36 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -78,8 +78,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	if (__sk_del_node_init(sk))
 		sock_prot_dec_use(sk->sk_prot);
 
-	/* Step 3: Hash TW into TIMEWAIT half of established hash table. */
-	inet_twsk_add_node(tw, &(ehead + hashinfo->ehash_size)->chain);
+	/* Step 3: Hash TW into TIMEWAIT chain. */
+	inet_twsk_add_node(tw, &ehead->twchain);
 	atomic_inc(&tw->tw_refcnt);
 
 	write_unlock(&ehead->lock);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b67e0dd743be..5bd43d7294fd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2415,10 +2415,11 @@ void __init tcp_init(void)
 					&tcp_hashinfo.ehash_size,
 					NULL,
 					0);
-	tcp_hashinfo.ehash_size = (1 << tcp_hashinfo.ehash_size) >> 1;
-	for (i = 0; i < (tcp_hashinfo.ehash_size << 1); i++) {
+	tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size;
+	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
 		rwlock_init(&tcp_hashinfo.ehash[i].lock);
 		INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain);
+		INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain);
 	}
 
 	tcp_hashinfo.bhash =
@@ -2475,7 +2476,7 @@ void __init tcp_init(void)
 
 	printk(KERN_INFO "TCP: Hash tables configured "
 	       "(established %d bind %d)\n",
-	       tcp_hashinfo.ehash_size << 1, tcp_hashinfo.bhash_size);
+	       tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size);
 
 	tcp_register_congestion_control(&tcp_reno);
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 383e4b52dbde..f51d6404c61c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2051,7 +2051,7 @@ static void *established_get_first(struct seq_file *seq)
 		}
 		st->state = TCP_SEQ_STATE_TIME_WAIT;
 		inet_twsk_for_each(tw, node,
-				   &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
+				   &tcp_hashinfo.ehash[st->bucket].twchain) {
 			if (tw->tw_family != st->family) {
 				continue;
 			}
@@ -2107,7 +2107,7 @@ get_tw:
 	}
 
 	st->state = TCP_SEQ_STATE_TIME_WAIT;
-	tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
+	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
 	goto get_tw;
 found:
 	cur = sk;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b7e5bae0e347..e61116949bee 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -79,7 +79,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo,
 			goto hit; /* You sunk my battleship! */
 	}
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
-	sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
+	sk_for_each(sk, node, &head->twchain) {
 		const struct inet_timewait_sock *tw = inet_twsk(sk);
 
 		if(*((__portpair *)&(tw->tw_dport))	== ports	&&
@@ -183,7 +183,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	write_lock(&head->lock);
 
 	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+	sk_for_each(sk2, node, &head->twchain) {
 		const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
 
 		tw = inet_twsk(sk2);
-- 
cgit v1.2.3


From 42c05f6e6e3d57495054a4cae35850b3f7d1c343 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 8 Feb 2007 16:01:09 -0800
Subject: [ATM]: atmarp.h needs to always include linux/types.h

To provide the __be* types, even for userspace includes.

Reported by Andrew Walrond.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atmarp.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/atmarp.h b/include/linux/atmarp.h
index ee108f9e9cb7..231f4bdec730 100644
--- a/include/linux/atmarp.h
+++ b/include/linux/atmarp.h
@@ -6,9 +6,7 @@
 #ifndef _LINUX_ATMARP_H
 #define _LINUX_ATMARP_H
 
-#ifdef __KERNEL__
 #include <linux/types.h>
-#endif
 #include <linux/atmapi.h>
 #include <linux/atmioc.h>
 
-- 
cgit v1.2.3