diff options
author | Hannes Frederic Sowa <hannes@stressinduktion.org> | 2014-11-05 00:23:04 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-11-05 22:01:21 -0500 |
commit | e5a2c899957659cd1a9f789bc462f9c0b35f5150 (patch) | |
tree | 4c9b8a6f89d961daf9ada9f5ee95f8b371ce3a04 | |
parent | 2c99cd914d4fed9160d98849c9dd38034616768e (diff) |
fast_hash: avoid indirect function calls
By default the arch_fast_hash hashing function pointers are initialized
to jhash(2). If during boot-up a CPU with SSE4.2 is detected they get
updated to the CRC32 ones. This dispatching scheme incurs a function
pointer lookup and indirect call for every hashing operation.
rhashtable as a user of arch_fast_hash e.g. stores pointers to hashing
functions in its structure, too, causing two indirect branches per
hashing operation.
Using alternative_call we can get away with one of those indirect branches.
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/x86/include/asm/hash.h | 51 | ||||
-rw-r--r-- | arch/x86/lib/hash.c | 29 | ||||
-rw-r--r-- | include/asm-generic/hash.h | 36 | ||||
-rw-r--r-- | include/linux/hash.h | 34 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/hash.c | 39 |
6 files changed, 98 insertions, 93 deletions
diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h index e8c58f88b1d4..a881d784f044 100644 --- a/arch/x86/include/asm/hash.h +++ b/arch/x86/include/asm/hash.h @@ -1,7 +1,48 @@ -#ifndef _ASM_X86_HASH_H -#define _ASM_X86_HASH_H +#ifndef __ASM_X86_HASH_H +#define __ASM_X86_HASH_H -struct fast_hash_ops; -extern void setup_arch_fast_hash(struct fast_hash_ops *ops); +#include <linux/cpufeature.h> +#include <asm/alternative.h> -#endif /* _ASM_X86_HASH_H */ +u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed); +u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed); + +/* + * non-inline versions of jhash so gcc does not need to generate + * duplicate code in every object file + */ +u32 __jhash(const void *data, u32 len, u32 seed); +u32 __jhash2(const u32 *data, u32 len, u32 seed); + +/* + * for documentation of these functions please look into + * <include/asm-generic/hash.h> + */ + +static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed) +{ + u32 hash; + + alternative_call(__jhash, __intel_crc4_2_hash, X86_FEATURE_XMM4_2, +#ifdef CONFIG_X86_64 + "=a" (hash), "D" (data), "S" (len), "d" (seed)); +#else + "=a" (hash), "a" (data), "d" (len), "c" (seed)); +#endif + return hash; +} + +static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed) +{ + u32 hash; + + alternative_call(__jhash2, __intel_crc4_2_hash2, X86_FEATURE_XMM4_2, +#ifdef CONFIG_X86_64 + "=a" (hash), "D" (data), "S" (len), "d" (seed)); +#else + "=a" (hash), "a" (data), "d" (len), "c" (seed)); +#endif + return hash; +} + +#endif /* __ASM_X86_HASH_H */ diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c index ff4fa51a5b1f..e14327198835 100644 --- a/arch/x86/lib/hash.c +++ b/arch/x86/lib/hash.c @@ -31,13 +31,13 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/hash.h> -#include <linux/init.h> - #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/hash.h> +#include <linux/hash.h> +#include <linux/jhash.h> + static inline u32 crc32_u32(u32 crc, u32 val) { #ifdef CONFIG_AS_CRC32 @@ -48,7 +48,7 @@ static inline u32 crc32_u32(u32 crc, u32 val) return crc; } -static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed) +u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed) { const u32 *p32 = (const u32 *) data; u32 i, tmp = 0; @@ -71,22 +71,27 @@ static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed) return seed; } +EXPORT_SYMBOL(__intel_crc4_2_hash); -static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed) +u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed) { - const u32 *p32 = (const u32 *) data; u32 i; for (i = 0; i < len; i++) - seed = crc32_u32(seed, *p32++); + seed = crc32_u32(seed, *data++); return seed; } +EXPORT_SYMBOL(__intel_crc4_2_hash2); -void __init setup_arch_fast_hash(struct fast_hash_ops *ops) +u32 __jhash(const void *data, u32 len, u32 seed) { - if (cpu_has_xmm4_2) { - ops->hash = intel_crc4_2_hash; - ops->hash2 = intel_crc4_2_hash2; - } + return jhash(data, len, seed); +} +EXPORT_SYMBOL(__jhash); + +u32 __jhash2(const u32 *data, u32 len, u32 seed) +{ + return jhash2(data, len, seed); } +EXPORT_SYMBOL(__jhash2); diff --git a/include/asm-generic/hash.h b/include/asm-generic/hash.h index b6312843dbd9..3c82760ff2a4 100644 --- a/include/asm-generic/hash.h +++ b/include/asm-generic/hash.h @@ -1,9 +1,41 @@ #ifndef __ASM_GENERIC_HASH_H #define __ASM_GENERIC_HASH_H -struct fast_hash_ops; -static inline void setup_arch_fast_hash(struct fast_hash_ops *ops) +#include <linux/jhash.h> + +/** + * arch_fast_hash - Caclulates a hash over a given buffer that can have + * arbitrary size. This function will eventually use an + * architecture-optimized hashing implementation if + * available, and trades off distribution for speed. + * + * @data: buffer to hash + * @len: length of buffer in bytes + * @seed: start seed + * + * Returns 32bit hash. + */ +static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed) +{ + return jhash(data, len, seed); +} + +/** + * arch_fast_hash2 - Caclulates a hash over a given buffer that has a + * size that is of a multiple of 32bit words. This + * function will eventually use an architecture- + * optimized hashing implementation if available, + * and trades off distribution for speed. + * + * @data: buffer to hash (must be 32bit padded) + * @len: number of 32bit words + * @seed: start seed + * + * Returns 32bit hash. + */ +static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed) { + return jhash2(data, len, seed); } #endif /* __ASM_GENERIC_HASH_H */ diff --git a/include/linux/hash.h b/include/linux/hash.h index d0494c399392..6e8fb028848c 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -84,38 +84,4 @@ static inline u32 hash32_ptr(const void *ptr) return (u32)val; } -struct fast_hash_ops { - u32 (*hash)(const void *data, u32 len, u32 seed); - u32 (*hash2)(const u32 *data, u32 len, u32 seed); -}; - -/** - * arch_fast_hash - Caclulates a hash over a given buffer that can have - * arbitrary size. This function will eventually use an - * architecture-optimized hashing implementation if - * available, and trades off distribution for speed. - * - * @data: buffer to hash - * @len: length of buffer in bytes - * @seed: start seed - * - * Returns 32bit hash. - */ -extern u32 arch_fast_hash(const void *data, u32 len, u32 seed); - -/** - * arch_fast_hash2 - Caclulates a hash over a given buffer that has a - * size that is of a multiple of 32bit words. This - * function will eventually use an architecture- - * optimized hashing implementation if available, - * and trades off distribution for speed. - * - * @data: buffer to hash (must be 32bit padded) - * @len: number of 32bit words - * @seed: start seed - * - * Returns 32bit hash. - */ -extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed); - #endif /* _LINUX_HASH_H */ diff --git a/lib/Makefile b/lib/Makefile index 7512dc978f18..04e53dd16070 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o hash.o rhashtable.o + percpu-refcount.o percpu_ida.o rhashtable.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o diff --git a/lib/hash.c b/lib/hash.c deleted file mode 100644 index fea973f4bd57..000000000000 --- a/lib/hash.c +++ /dev/null @@ -1,39 +0,0 @@ -/* General purpose hashing library - * - * That's a start of a kernel hashing library, which can be extended - * with further algorithms in future. arch_fast_hash{2,}() will - * eventually resolve to an architecture optimized implementation. - * - * Copyright 2013 Francesco Fusco <ffusco@redhat.com> - * Copyright 2013 Daniel Borkmann <dborkman@redhat.com> - * Copyright 2013 Thomas Graf <tgraf@redhat.com> - * Licensed under the GNU General Public License, version 2.0 (GPLv2) - */ - -#include <linux/jhash.h> -#include <linux/hash.h> -#include <linux/cache.h> - -static struct fast_hash_ops arch_hash_ops __read_mostly = { - .hash = jhash, - .hash2 = jhash2, -}; - -u32 arch_fast_hash(const void *data, u32 len, u32 seed) -{ - return arch_hash_ops.hash(data, len, seed); -} -EXPORT_SYMBOL_GPL(arch_fast_hash); - -u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed) -{ - return arch_hash_ops.hash2(data, len, seed); -} -EXPORT_SYMBOL_GPL(arch_fast_hash2); - -static int __init hashlib_init(void) -{ - setup_arch_fast_hash(&arch_hash_ops); - return 0; -} -early_initcall(hashlib_init); |