From 994fcca7f1c93f4e78008d5806c68067d3b75487 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 28 Aug 2019 00:10:54 +0100 Subject: siphash: add cryptographically secure PRF commit 2c956a60778cbb6a27e0c7a8a52a91378c90e1d1 upstream. SipHash is a 64-bit keyed hash function that is actually a cryptographically secure PRF, like HMAC. Except SipHash is super fast, and is meant to be used as a hashtable keyed lookup function, or as a general PRF for short input use cases, such as sequence numbers or RNG chaining. For the first usage: There are a variety of attacks known as "hashtable poisoning" in which an attacker forms some data such that the hash of that data will be the same, and then preceeds to fill up all entries of a hashbucket. This is a realistic and well-known denial-of-service vector. Currently hashtables use jhash, which is fast but not secure, and some kind of rotating key scheme (or none at all, which isn't good). SipHash is meant as a replacement for jhash in these cases. There are a modicum of places in the kernel that are vulnerable to hashtable poisoning attacks, either via userspace vectors or network vectors, and there's not a reliable mechanism inside the kernel at the moment to fix it. The first step toward fixing these issues is actually getting a secure primitive into the kernel for developers to use. Then we can, bit by bit, port things over to it as deemed appropriate. While SipHash is extremely fast for a cryptographically secure function, it is likely a bit slower than the insecure jhash, and so replacements will be evaluated on a case-by-case basis based on whether or not the difference in speed is negligible and whether or not the current jhash usage poses a real security risk. For the second usage: A few places in the kernel are using MD5 or SHA1 for creating secure sequence numbers, syn cookies, port numbers, or fast random numbers. SipHash is a faster and more fitting, and more secure replacement for MD5 in those situations. Replacing MD5 and SHA1 with SipHash for these uses is obvious and straight-forward, and so is submitted along with this patch series. There shouldn't be much of a debate over its efficacy. Dozens of languages are already using this internally for their hash tables and PRFs. Some of the BSDs already use this in their kernels. SipHash is a widely known high-speed solution to a widely known set of problems, and it's time we catch-up. Signed-off-by: Jason A. Donenfeld Reviewed-by: Jean-Philippe Aumasson Cc: Linus Torvalds Cc: Eric Biggers Cc: David Laight Cc: Eric Dumazet Signed-off-by: David S. Miller [bwh: Backported to 4.4 as dependency of commits df453700e8d8 "inet: switch IP ID generator to siphash" and 3c79107631db "netfilter: ctnetlink: don't use conntrack/expect object addresses as id": - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- Documentation/siphash.txt | 100 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 Documentation/siphash.txt (limited to 'Documentation') diff --git a/Documentation/siphash.txt b/Documentation/siphash.txt new file mode 100644 index 000000000000..e8e6ddbbaab4 --- /dev/null +++ b/Documentation/siphash.txt @@ -0,0 +1,100 @@ + SipHash - a short input PRF +----------------------------------------------- +Written by Jason A. Donenfeld + +SipHash is a cryptographically secure PRF -- a keyed hash function -- that +performs very well for short inputs, hence the name. It was designed by +cryptographers Daniel J. Bernstein and Jean-Philippe Aumasson. It is intended +as a replacement for some uses of: `jhash`, `md5_transform`, `sha_transform`, +and so forth. + +SipHash takes a secret key filled with randomly generated numbers and either +an input buffer or several input integers. It spits out an integer that is +indistinguishable from random. You may then use that integer as part of secure +sequence numbers, secure cookies, or mask it off for use in a hash table. + +1. Generating a key + +Keys should always be generated from a cryptographically secure source of +random numbers, either using get_random_bytes or get_random_once: + +siphash_key_t key; +get_random_bytes(&key, sizeof(key)); + +If you're not deriving your key from here, you're doing it wrong. + +2. Using the functions + +There are two variants of the function, one that takes a list of integers, and +one that takes a buffer: + +u64 siphash(const void *data, size_t len, const siphash_key_t *key); + +And: + +u64 siphash_1u64(u64, const siphash_key_t *key); +u64 siphash_2u64(u64, u64, const siphash_key_t *key); +u64 siphash_3u64(u64, u64, u64, const siphash_key_t *key); +u64 siphash_4u64(u64, u64, u64, u64, const siphash_key_t *key); +u64 siphash_1u32(u32, const siphash_key_t *key); +u64 siphash_2u32(u32, u32, const siphash_key_t *key); +u64 siphash_3u32(u32, u32, u32, const siphash_key_t *key); +u64 siphash_4u32(u32, u32, u32, u32, const siphash_key_t *key); + +If you pass the generic siphash function something of a constant length, it +will constant fold at compile-time and automatically choose one of the +optimized functions. + +3. Hashtable key function usage: + +struct some_hashtable { + DECLARE_HASHTABLE(hashtable, 8); + siphash_key_t key; +}; + +void init_hashtable(struct some_hashtable *table) +{ + get_random_bytes(&table->key, sizeof(table->key)); +} + +static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input) +{ + return &table->hashtable[siphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)]; +} + +You may then iterate like usual over the returned hash bucket. + +4. Security + +SipHash has a very high security margin, with its 128-bit key. So long as the +key is kept secret, it is impossible for an attacker to guess the outputs of +the function, even if being able to observe many outputs, since 2^128 outputs +is significant. + +Linux implements the "2-4" variant of SipHash. + +5. Struct-passing Pitfalls + +Often times the XuY functions will not be large enough, and instead you'll +want to pass a pre-filled struct to siphash. When doing this, it's important +to always ensure the struct has no padding holes. The easiest way to do this +is to simply arrange the members of the struct in descending order of size, +and to use offsetendof() instead of sizeof() for getting the size. For +performance reasons, if possible, it's probably a good thing to align the +struct to the right boundary. Here's an example: + +const struct { + struct in6_addr saddr; + u32 counter; + u16 dport; +} __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct in6_addr *)saddr, + .counter = counter, + .dport = dport +}; +u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret); + +6. Resources + +Read the SipHash paper if you're interested in learning more: +https://131002.net/siphash/siphash.pdf -- cgit v1.2.3 From 71b951c85b3b36480260a31419126b81f27db733 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 28 Aug 2019 00:11:00 +0100 Subject: siphash: implement HalfSipHash1-3 for hash tables commit 1ae2324f732c9c4e2fa4ebd885fa1001b70d52e1 upstream. HalfSipHash, or hsiphash, is a shortened version of SipHash, which generates 32-bit outputs using a weaker 64-bit key. It has *much* lower security margins, and shouldn't be used for anything too sensitive, but it could be used as a hashtable key function replacement, if the output is never exposed, and if the security requirement is not too high. The goal is to make this something that performance-critical jhash users would be willing to use. On 64-bit machines, HalfSipHash1-3 is slower than SipHash1-3, so we alias SipHash1-3 to HalfSipHash1-3 on those systems. 64-bit x86_64: [ 0.509409] test_siphash: SipHash2-4 cycles: 4049181 [ 0.510650] test_siphash: SipHash1-3 cycles: 2512884 [ 0.512205] test_siphash: HalfSipHash1-3 cycles: 3429920 [ 0.512904] test_siphash: JenkinsHash cycles: 978267 So, we map hsiphash() -> SipHash1-3 32-bit x86: [ 0.509868] test_siphash: SipHash2-4 cycles: 14812892 [ 0.513601] test_siphash: SipHash1-3 cycles: 9510710 [ 0.515263] test_siphash: HalfSipHash1-3 cycles: 3856157 [ 0.515952] test_siphash: JenkinsHash cycles: 1148567 So, we map hsiphash() -> HalfSipHash1-3 hsiphash() is roughly 3 times slower than jhash(), but comes with a considerable security improvement. Signed-off-by: Jason A. Donenfeld Reviewed-by: Jean-Philippe Aumasson Signed-off-by: David S. Miller [bwh: Backported to 4.4 to avoid regression for WireGuard with only half the siphash API present] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- Documentation/siphash.txt | 75 +++++++++++ include/linux/siphash.h | 57 +++++++- lib/siphash.c | 321 +++++++++++++++++++++++++++++++++++++++++++++- lib/test_siphash.c | 98 +++++++++++++- 4 files changed, 546 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/siphash.txt b/Documentation/siphash.txt index e8e6ddbbaab4..908d348ff777 100644 --- a/Documentation/siphash.txt +++ b/Documentation/siphash.txt @@ -98,3 +98,78 @@ u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret); Read the SipHash paper if you're interested in learning more: https://131002.net/siphash/siphash.pdf + + +~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +HalfSipHash - SipHash's insecure younger cousin +----------------------------------------------- +Written by Jason A. Donenfeld + +On the off-chance that SipHash is not fast enough for your needs, you might be +able to justify using HalfSipHash, a terrifying but potentially useful +possibility. HalfSipHash cuts SipHash's rounds down from "2-4" to "1-3" and, +even scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output) +instead of SipHash's 128-bit key. However, this may appeal to some +high-performance `jhash` users. + +Danger! + +Do not ever use HalfSipHash except for as a hashtable key function, and only +then when you can be absolutely certain that the outputs will never be +transmitted out of the kernel. This is only remotely useful over `jhash` as a +means of mitigating hashtable flooding denial of service attacks. + +1. Generating a key + +Keys should always be generated from a cryptographically secure source of +random numbers, either using get_random_bytes or get_random_once: + +hsiphash_key_t key; +get_random_bytes(&key, sizeof(key)); + +If you're not deriving your key from here, you're doing it wrong. + +2. Using the functions + +There are two variants of the function, one that takes a list of integers, and +one that takes a buffer: + +u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key); + +And: + +u32 hsiphash_1u32(u32, const hsiphash_key_t *key); +u32 hsiphash_2u32(u32, u32, const hsiphash_key_t *key); +u32 hsiphash_3u32(u32, u32, u32, const hsiphash_key_t *key); +u32 hsiphash_4u32(u32, u32, u32, u32, const hsiphash_key_t *key); + +If you pass the generic hsiphash function something of a constant length, it +will constant fold at compile-time and automatically choose one of the +optimized functions. + +3. Hashtable key function usage: + +struct some_hashtable { + DECLARE_HASHTABLE(hashtable, 8); + hsiphash_key_t key; +}; + +void init_hashtable(struct some_hashtable *table) +{ + get_random_bytes(&table->key, sizeof(table->key)); +} + +static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input) +{ + return &table->hashtable[hsiphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)]; +} + +You may then iterate like usual over the returned hash bucket. + +4. Performance + +HalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements, +this will not be a problem, as the hashtable lookup isn't the bottleneck. And +in general, this is probably a good sacrifice to make for the security and DoS +resistance of HalfSipHash. diff --git a/include/linux/siphash.h b/include/linux/siphash.h index feeb29cd113e..fa7a6b9cedbf 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -5,7 +5,9 @@ * SipHash: a fast short-input PRF * https://131002.net/siphash/ * - * This implementation is specifically for SipHash2-4. + * This implementation is specifically for SipHash2-4 for a secure PRF + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for + * hashtables. */ #ifndef _LINUX_SIPHASH_H @@ -82,4 +84,57 @@ static inline u64 siphash(const void *data, size_t len, return ___siphash_aligned(data, len, key); } +#define HSIPHASH_ALIGNMENT __alignof__(unsigned long) +typedef struct { + unsigned long key[2]; +} hsiphash_key_t; + +u32 __hsiphash_aligned(const void *data, size_t len, + const hsiphash_key_t *key); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key); +#endif + +u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); +u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); +u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c, + const hsiphash_key_t *key); +u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d, + const hsiphash_key_t *key); + +static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, + const hsiphash_key_t *key) +{ + if (__builtin_constant_p(len) && len == 4) + return hsiphash_1u32(le32_to_cpu(data[0]), key); + if (__builtin_constant_p(len) && len == 8) + return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + key); + if (__builtin_constant_p(len) && len == 12) + return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + le32_to_cpu(data[2]), key); + if (__builtin_constant_p(len) && len == 16) + return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + le32_to_cpu(data[2]), le32_to_cpu(data[3]), + key); + return __hsiphash_aligned(data, len, key); +} + +/** + * hsiphash - compute 32-bit hsiphash PRF value + * @data: buffer to hash + * @size: size of @data + * @key: the hsiphash key + */ +static inline u32 hsiphash(const void *data, size_t len, + const hsiphash_key_t *key) +{ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + return __hsiphash_unaligned(data, len, key); +#endif + return ___hsiphash_aligned(data, len, key); +} + #endif /* _LINUX_SIPHASH_H */ diff --git a/lib/siphash.c b/lib/siphash.c index c43cf406e71b..3ae58b4edad6 100644 --- a/lib/siphash.c +++ b/lib/siphash.c @@ -5,7 +5,9 @@ * SipHash: a fast short-input PRF * https://131002.net/siphash/ * - * This implementation is specifically for SipHash2-4. + * This implementation is specifically for SipHash2-4 for a secure PRF + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for + * hashtables. */ #include @@ -230,3 +232,320 @@ u64 siphash_3u32(const u32 first, const u32 second, const u32 third, POSTAMBLE } EXPORT_SYMBOL(siphash_3u32); + +#if BITS_PER_LONG == 64 +/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for + * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3. + */ + +#define HSIPROUND SIPROUND +#define HPREAMBLE(len) PREAMBLE(len) +#define HPOSTAMBLE \ + v3 ^= b; \ + HSIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + HSIPROUND; \ + HSIPROUND; \ + HSIPROUND; \ + return (v0 ^ v1) ^ (v2 ^ v3); + +u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = le64_to_cpup(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; + case 6: b |= ((u64)end[5]) << 40; + case 5: b |= ((u64)end[4]) << 32; + case 4: b |= le32_to_cpup(data); break; + case 3: b |= ((u64)end[2]) << 16; + case 2: b |= le16_to_cpup(data); break; + case 1: b |= end[0]; + } +#endif + HPOSTAMBLE +} +EXPORT_SYMBOL(__hsiphash_aligned); + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = get_unaligned_le64(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; + case 6: b |= ((u64)end[5]) << 40; + case 5: b |= ((u64)end[4]) << 32; + case 4: b |= get_unaligned_le32(end); break; + case 3: b |= ((u64)end[2]) << 16; + case 2: b |= get_unaligned_le16(end); break; + case 1: b |= end[0]; + } +#endif + HPOSTAMBLE +} +EXPORT_SYMBOL(__hsiphash_unaligned); +#endif + +/** + * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32 + * @first: first u32 + * @key: the hsiphash key + */ +u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key) +{ + HPREAMBLE(4) + b |= first; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_1u32); + +/** + * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32 + * @first: first u32 + * @second: second u32 + * @key: the hsiphash key + */ +u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + HPREAMBLE(8) + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_2u32); + +/** + * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @key: the hsiphash key + */ +u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third, + const hsiphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + HPREAMBLE(12) + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + b |= third; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_3u32); + +/** + * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @forth: forth u32 + * @key: the hsiphash key + */ +u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, + const u32 forth, const hsiphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + HPREAMBLE(16) + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + combined = (u64)forth << 32 | third; + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_4u32); +#else +#define HSIPROUND \ + do { \ + v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \ + v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \ + v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \ + v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \ + } while (0) + +#define HPREAMBLE(len) \ + u32 v0 = 0; \ + u32 v1 = 0; \ + u32 v2 = 0x6c796765U; \ + u32 v3 = 0x74656462U; \ + u32 b = ((u32)(len)) << 24; \ + v3 ^= key->key[1]; \ + v2 ^= key->key[0]; \ + v1 ^= key->key[1]; \ + v0 ^= key->key[0]; + +#define HPOSTAMBLE \ + v3 ^= b; \ + HSIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + HSIPROUND; \ + HSIPROUND; \ + HSIPROUND; \ + return v1 ^ v3; + +u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u32)); + const u8 left = len & (sizeof(u32) - 1); + u32 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u32)) { + m = le32_to_cpup(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } + switch (left) { + case 3: b |= ((u32)end[2]) << 16; + case 2: b |= le16_to_cpup(data); break; + case 1: b |= end[0]; + } + HPOSTAMBLE +} +EXPORT_SYMBOL(__hsiphash_aligned); + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u32)); + const u8 left = len & (sizeof(u32) - 1); + u32 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u32)) { + m = get_unaligned_le32(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } + switch (left) { + case 3: b |= ((u32)end[2]) << 16; + case 2: b |= get_unaligned_le16(end); break; + case 1: b |= end[0]; + } + HPOSTAMBLE +} +EXPORT_SYMBOL(__hsiphash_unaligned); +#endif + +/** + * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32 + * @first: first u32 + * @key: the hsiphash key + */ +u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key) +{ + HPREAMBLE(4) + v3 ^= first; + HSIPROUND; + v0 ^= first; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_1u32); + +/** + * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32 + * @first: first u32 + * @second: second u32 + * @key: the hsiphash key + */ +u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key) +{ + HPREAMBLE(8) + v3 ^= first; + HSIPROUND; + v0 ^= first; + v3 ^= second; + HSIPROUND; + v0 ^= second; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_2u32); + +/** + * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @key: the hsiphash key + */ +u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third, + const hsiphash_key_t *key) +{ + HPREAMBLE(12) + v3 ^= first; + HSIPROUND; + v0 ^= first; + v3 ^= second; + HSIPROUND; + v0 ^= second; + v3 ^= third; + HSIPROUND; + v0 ^= third; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_3u32); + +/** + * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @forth: forth u32 + * @key: the hsiphash key + */ +u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, + const u32 forth, const hsiphash_key_t *key) +{ + HPREAMBLE(16) + v3 ^= first; + HSIPROUND; + v0 ^= first; + v3 ^= second; + HSIPROUND; + v0 ^= second; + v3 ^= third; + HSIPROUND; + v0 ^= third; + v3 ^= forth; + HSIPROUND; + v0 ^= forth; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_4u32); +#endif diff --git a/lib/test_siphash.c b/lib/test_siphash.c index d972acfc15e4..a6d854d933bf 100644 --- a/lib/test_siphash.c +++ b/lib/test_siphash.c @@ -7,7 +7,9 @@ * SipHash: a fast short-input PRF * https://131002.net/siphash/ * - * This implementation is specifically for SipHash2-4. + * This implementation is specifically for SipHash2-4 for a secure PRF + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for + * hashtables. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -18,8 +20,8 @@ #include #include -/* Test vectors taken from official reference source available at: - * https://131002.net/siphash/siphash24.c +/* Test vectors taken from reference source available at: + * https://github.com/veorq/SipHash */ static const siphash_key_t test_key_siphash = @@ -50,6 +52,64 @@ static const u64 test_vectors_siphash[64] = { 0x958a324ceb064572ULL }; +#if BITS_PER_LONG == 64 +static const hsiphash_key_t test_key_hsiphash = + {{ 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }}; + +static const u32 test_vectors_hsiphash[64] = { + 0x050fc4dcU, 0x7d57ca93U, 0x4dc7d44dU, + 0xe7ddf7fbU, 0x88d38328U, 0x49533b67U, + 0xc59f22a7U, 0x9bb11140U, 0x8d299a8eU, + 0x6c063de4U, 0x92ff097fU, 0xf94dc352U, + 0x57b4d9a2U, 0x1229ffa7U, 0xc0f95d34U, + 0x2a519956U, 0x7d908b66U, 0x63dbd80cU, + 0xb473e63eU, 0x8d297d1cU, 0xa6cce040U, + 0x2b45f844U, 0xa320872eU, 0xdae6c123U, + 0x67349c8cU, 0x705b0979U, 0xca9913a5U, + 0x4ade3b35U, 0xef6cd00dU, 0x4ab1e1f4U, + 0x43c5e663U, 0x8c21d1bcU, 0x16a7b60dU, + 0x7a8ff9bfU, 0x1f2a753eU, 0xbf186b91U, + 0xada26206U, 0xa3c33057U, 0xae3a36a1U, + 0x7b108392U, 0x99e41531U, 0x3f1ad944U, + 0xc8138825U, 0xc28949a6U, 0xfaf8876bU, + 0x9f042196U, 0x68b1d623U, 0x8b5114fdU, + 0xdf074c46U, 0x12cc86b3U, 0x0a52098fU, + 0x9d292f9aU, 0xa2f41f12U, 0x43a71ed0U, + 0x73f0bce6U, 0x70a7e980U, 0x243c6d75U, + 0xfdb71513U, 0xa67d8a08U, 0xb7e8f148U, + 0xf7a644eeU, 0x0f1837f2U, 0x4b6694e0U, + 0xb7bbb3a8U +}; +#else +static const hsiphash_key_t test_key_hsiphash = + {{ 0x03020100U, 0x07060504U }}; + +static const u32 test_vectors_hsiphash[64] = { + 0x5814c896U, 0xe7e864caU, 0xbc4b0e30U, + 0x01539939U, 0x7e059ea6U, 0x88e3d89bU, + 0xa0080b65U, 0x9d38d9d6U, 0x577999b1U, + 0xc839caedU, 0xe4fa32cfU, 0x959246eeU, + 0x6b28096cU, 0x66dd9cd6U, 0x16658a7cU, + 0xd0257b04U, 0x8b31d501U, 0x2b1cd04bU, + 0x06712339U, 0x522aca67U, 0x911bb605U, + 0x90a65f0eU, 0xf826ef7bU, 0x62512debU, + 0x57150ad7U, 0x5d473507U, 0x1ec47442U, + 0xab64afd3U, 0x0a4100d0U, 0x6d2ce652U, + 0x2331b6a3U, 0x08d8791aU, 0xbc6dda8dU, + 0xe0f6c934U, 0xb0652033U, 0x9b9851ccU, + 0x7c46fb7fU, 0x732ba8cbU, 0xf142997aU, + 0xfcc9aa1bU, 0x05327eb2U, 0xe110131cU, + 0xf9e5e7c0U, 0xa7d708a6U, 0x11795ab1U, + 0x65671619U, 0x9f5fff91U, 0xd89c5267U, + 0x007783ebU, 0x95766243U, 0xab639262U, + 0x9c7e1390U, 0xc368dda6U, 0x38ddc455U, + 0xfa13d379U, 0x979ea4e8U, 0x53ecd77eU, + 0x2ee80657U, 0x33dbb66aU, 0xae3f0577U, + 0x88b4c4ccU, 0x3e7f480bU, 0x74c1ebf8U, + 0x87178304U +}; +#endif + static int __init siphash_test_init(void) { u8 in[64] __aligned(SIPHASH_ALIGNMENT); @@ -70,6 +130,16 @@ static int __init siphash_test_init(void) pr_info("siphash self-test unaligned %u: FAIL\n", i + 1); ret = -EINVAL; } + if (hsiphash(in, i, &test_key_hsiphash) != + test_vectors_hsiphash[i]) { + pr_info("hsiphash self-test aligned %u: FAIL\n", i + 1); + ret = -EINVAL; + } + if (hsiphash(in_unaligned + 1, i, &test_key_hsiphash) != + test_vectors_hsiphash[i]) { + pr_info("hsiphash self-test unaligned %u: FAIL\n", i + 1); + ret = -EINVAL; + } } if (siphash_1u64(0x0706050403020100ULL, &test_key_siphash) != test_vectors_siphash[8]) { @@ -115,6 +185,28 @@ static int __init siphash_test_init(void) pr_info("siphash self-test 4u32: FAIL\n"); ret = -EINVAL; } + if (hsiphash_1u32(0x03020100U, &test_key_hsiphash) != + test_vectors_hsiphash[4]) { + pr_info("hsiphash self-test 1u32: FAIL\n"); + ret = -EINVAL; + } + if (hsiphash_2u32(0x03020100U, 0x07060504U, &test_key_hsiphash) != + test_vectors_hsiphash[8]) { + pr_info("hsiphash self-test 2u32: FAIL\n"); + ret = -EINVAL; + } + if (hsiphash_3u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, &test_key_hsiphash) != + test_vectors_hsiphash[12]) { + pr_info("hsiphash self-test 3u32: FAIL\n"); + ret = -EINVAL; + } + if (hsiphash_4u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, 0x0f0e0d0cU, &test_key_hsiphash) != + test_vectors_hsiphash[16]) { + pr_info("hsiphash self-test 4u32: FAIL\n"); + ret = -EINVAL; + } if (!ret) pr_info("self-tests: pass\n"); return ret; -- cgit v1.2.3 From 721a2f80a4cbe486e0a20b64a4e85b9092f2a10e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 19 Aug 2019 15:52:35 +0000 Subject: x86/CPU/AMD: Clear RDRAND CPUID bit on AMD family 15h/16h [ Upstream commit c49a0a80137c7ca7d6ced4c812c9e07a949f6f24 ] There have been reports of RDRAND issues after resuming from suspend on some AMD family 15h and family 16h systems. This issue stems from a BIOS not performing the proper steps during resume to ensure RDRAND continues to function properly. RDRAND support is indicated by CPUID Fn00000001_ECX[30]. This bit can be reset by clearing MSR C001_1004[62]. Any software that checks for RDRAND support using CPUID, including the kernel, will believe that RDRAND is not supported. Update the CPU initialization to clear the RDRAND CPUID bit for any family 15h and 16h processor that supports RDRAND. If it is known that the family 15h or family 16h system does not have an RDRAND resume issue or that the system will not be placed in suspend, the "rdrand=force" kernel parameter can be used to stop the clearing of the RDRAND CPUID bit. Additionally, update the suspend and resume path to save and restore the MSR C001_1004 value to ensure that the RDRAND CPUID setting remains in place after resuming from suspend. Note, that clearing the RDRAND CPUID bit does not prevent a processor that normally supports the RDRAND instruction from executing it. So any code that determined the support based on family and model won't #UD. Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: Andrew Cooper Cc: Andrew Morton Cc: Chen Yu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: "linux-doc@vger.kernel.org" Cc: "linux-pm@vger.kernel.org" Cc: Nathan Chancellor Cc: Paolo Bonzini Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: Cc: Thomas Gleixner Cc: "x86@kernel.org" Link: https://lkml.kernel.org/r/7543af91666f491547bd86cebb1e17c66824ab9f.1566229943.git.thomas.lendacky@amd.com [sl: adjust context in docs] Signed-off-by: Sasha Levin --- Documentation/kernel-parameters.txt | 7 +++ arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/amd.c | 66 ++++++++++++++++++++++++++++ arch/x86/power/cpu.c | 86 +++++++++++++++++++++++++++++++------ 4 files changed, 147 insertions(+), 13 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7a9fd54a0186..5b94c0bfba85 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3415,6 +3415,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Run specified binary instead of /init from the ramdisk, used for early userspace startup. See initrd. + rdrand= [X86] + force - Override the decision by the kernel to hide the + advertisement of RDRAND support (this affects + certain AMD processors because of buggy BIOS + support, specifically around the suspend/resume + path). + reboot= [KNL] Format (x86 or x86_64): [w[arm] | c[old] | h[ard] | s[oft] | g[pio]] \ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d4f5b8209393..30183770132a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -311,6 +311,7 @@ #define MSR_AMD64_PATCH_LEVEL 0x0000008b #define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6f2483292de0..424d8a636615 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -684,6 +684,64 @@ static void init_amd_ln(struct cpuinfo_x86 *c) msr_set_bit(MSR_AMD64_DE_CFG, 31); } +static bool rdrand_force; + +static int __init rdrand_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "force")) + rdrand_force = true; + else + return -EINVAL; + + return 0; +} +early_param("rdrand", rdrand_cmdline); + +static void clear_rdrand_cpuid_bit(struct cpuinfo_x86 *c) +{ + /* + * Saving of the MSR used to hide the RDRAND support during + * suspend/resume is done by arch/x86/power/cpu.c, which is + * dependent on CONFIG_PM_SLEEP. + */ + if (!IS_ENABLED(CONFIG_PM_SLEEP)) + return; + + /* + * The nordrand option can clear X86_FEATURE_RDRAND, so check for + * RDRAND support using the CPUID function directly. + */ + if (!(cpuid_ecx(1) & BIT(30)) || rdrand_force) + return; + + msr_clear_bit(MSR_AMD64_CPUID_FN_1, 62); + + /* + * Verify that the CPUID change has occurred in case the kernel is + * running virtualized and the hypervisor doesn't support the MSR. + */ + if (cpuid_ecx(1) & BIT(30)) { + pr_info_once("BIOS may not properly restore RDRAND after suspend, but hypervisor does not support hiding RDRAND via CPUID.\n"); + return; + } + + clear_cpu_cap(c, X86_FEATURE_RDRAND); + pr_info_once("BIOS may not properly restore RDRAND after suspend, hiding RDRAND via CPUID. Use rdrand=force to reenable.\n"); +} + +static void init_amd_jg(struct cpuinfo_x86 *c) +{ + /* + * Some BIOS implementations do not restore proper RDRAND support + * across suspend and resume. Check on whether to hide the RDRAND + * instruction support via CPUID. + */ + clear_rdrand_cpuid_bit(c); +} + static void init_amd_bd(struct cpuinfo_x86 *c) { u64 value; @@ -711,6 +769,13 @@ static void init_amd_bd(struct cpuinfo_x86 *c) wrmsrl_safe(0xc0011021, value); } } + + /* + * Some BIOS implementations do not restore proper RDRAND support + * across suspend and resume. Check on whether to hide the RDRAND + * instruction support via CPUID. + */ + clear_rdrand_cpuid_bit(c); } static void init_amd_zn(struct cpuinfo_x86 *c) @@ -755,6 +820,7 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x10: init_amd_gh(c); break; case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; + case 0x16: init_amd_jg(c); break; case 0x17: init_amd_zn(c); break; } diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index d5f64996394a..2e5052b2d238 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -23,7 +24,7 @@ #include #include #include -#include +#include #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -347,15 +348,14 @@ static int __init bsp_pm_check_init(void) core_initcall(bsp_pm_check_init); -static int msr_init_context(const u32 *msr_id, const int total_num) +static int msr_build_context(const u32 *msr_id, const int num) { - int i = 0; + struct saved_msrs *saved_msrs = &saved_context.saved_msrs; struct saved_msr *msr_array; + int total_num; + int i, j; - if (saved_context.saved_msrs.array || saved_context.saved_msrs.num > 0) { - pr_err("x86/pm: MSR quirk already applied, please check your DMI match table.\n"); - return -EINVAL; - } + total_num = saved_msrs->num + num; msr_array = kmalloc_array(total_num, sizeof(struct saved_msr), GFP_KERNEL); if (!msr_array) { @@ -363,19 +363,30 @@ static int msr_init_context(const u32 *msr_id, const int total_num) return -ENOMEM; } - for (i = 0; i < total_num; i++) { - msr_array[i].info.msr_no = msr_id[i]; + if (saved_msrs->array) { + /* + * Multiple callbacks can invoke this function, so copy any + * MSR save requests from previous invocations. + */ + memcpy(msr_array, saved_msrs->array, + sizeof(struct saved_msr) * saved_msrs->num); + + kfree(saved_msrs->array); + } + + for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) { + msr_array[i].info.msr_no = msr_id[j]; msr_array[i].valid = false; msr_array[i].info.reg.q = 0; } - saved_context.saved_msrs.num = total_num; - saved_context.saved_msrs.array = msr_array; + saved_msrs->num = total_num; + saved_msrs->array = msr_array; return 0; } /* - * The following section is a quirk framework for problematic BIOSen: + * The following sections are a quirk framework for problematic BIOSen: * Sometimes MSRs are modified by the BIOSen after suspended to * RAM, this might cause unexpected behavior after wakeup. * Thus we save/restore these specified MSRs across suspend/resume @@ -390,7 +401,7 @@ static int msr_initialize_bdw(const struct dmi_system_id *d) u32 bdw_msr_id[] = { MSR_IA32_THERM_CONTROL }; pr_info("x86/pm: %s detected, MSR saving is needed during suspending.\n", d->ident); - return msr_init_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id)); + return msr_build_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id)); } static struct dmi_system_id msr_save_dmi_table[] = { @@ -405,9 +416,58 @@ static struct dmi_system_id msr_save_dmi_table[] = { {} }; +static int msr_save_cpuid_features(const struct x86_cpu_id *c) +{ + u32 cpuid_msr_id[] = { + MSR_AMD64_CPUID_FN_1, + }; + + pr_info("x86/pm: family %#hx cpu detected, MSR saving is needed during suspending.\n", + c->family); + + return msr_build_context(cpuid_msr_id, ARRAY_SIZE(cpuid_msr_id)); +} + +static const struct x86_cpu_id msr_save_cpu_table[] = { + { + .vendor = X86_VENDOR_AMD, + .family = 0x15, + .model = X86_MODEL_ANY, + .feature = X86_FEATURE_ANY, + .driver_data = (kernel_ulong_t)msr_save_cpuid_features, + }, + { + .vendor = X86_VENDOR_AMD, + .family = 0x16, + .model = X86_MODEL_ANY, + .feature = X86_FEATURE_ANY, + .driver_data = (kernel_ulong_t)msr_save_cpuid_features, + }, + {} +}; + +typedef int (*pm_cpu_match_t)(const struct x86_cpu_id *); +static int pm_cpu_check(const struct x86_cpu_id *c) +{ + const struct x86_cpu_id *m; + int ret = 0; + + m = x86_match_cpu(msr_save_cpu_table); + if (m) { + pm_cpu_match_t fn; + + fn = (pm_cpu_match_t)m->driver_data; + ret = fn(m); + } + + return ret; +} + static int pm_check_save_msr(void) { dmi_check_system(msr_save_dmi_table); + pm_cpu_check(msr_save_cpu_table); + return 0; } -- cgit v1.2.3