x86, mm: dont use non-temporal stores in pagecache accesses

Impact: standardize IO on cached ops On modern CPUs it is almost always a bad idea to use non-temporal stores, as the regression in this commit has shown it: 30d697f: x86: fix performance regression in write() syscall The kernel simply has no good information about whether using non-temporal stores is a good idea or not - and trying to add heuristics only increases complexity and inserts fragility. The regression on cached write()s took very long to be found - over two years. So dont take any chances and let the hardware decide how it makes use of its caches. The only exception is drivers/gpu/drm/i915/i915_gem.c: there were we are absolutely sure that another entity (the GPU) will pick up the dirty data immediately and that the CPU will not touch that data before the GPU will. Also, keep the _nocache() primitives to make it easier for people to experiment with these details. There may be more clear-cut cases where non-cached copies can be used, outside of filemap.c. Cc: Salman Qazi <sqazi@google.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2009-03-02 11:00:57 +0100
committer: Ingo Molnar <mingo@elte.hu> 2009-03-02 11:06:49 +0100
commit: f180053694b43d5714bf56cb95499a3c32ff155c (patch)
tree: 00286fcc88d2842629b039da4009a1332b3a1719 /arch/x86/include/asm
parent: 34754b69a6f87aa6aa2860525a82f12532f83afd (diff)
2 files changed, 9 insertions, 20 deletions
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index a0ba61386972..5e06259e90e5 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -157,7 +157,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
 }
 
 static __always_inline unsigned long __copy_from_user_nocache(void *to,
-		const void __user *from, unsigned long n, unsigned long total)
+				const void __user *from, unsigned long n)
 {
 	might_fault();
 	if (__builtin_constant_p(n)) {
@@ -180,7 +180,7 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to,
 
 static __always_inline unsigned long
 __copy_from_user_inatomic_nocache(void *to, const void __user *from,
-				  unsigned long n, unsigned long total)
+				  unsigned long n)
 {
        return __copy_from_user_ll_nocache_nozero(to, from, n);
 }
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index dcaa0404cf7b..8cc687326eb8 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -188,29 +188,18 @@ __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
 extern long __copy_user_nocache(void *dst, const void __user *src,
 				unsigned size, int zerorest);
 
-static inline int __copy_from_user_nocache(void *dst, const void __user *src,
-				   unsigned size, unsigned long total)
+static inline int
+__copy_from_user_nocache(void *dst, const void __user *src, unsigned size)
 {
 	might_sleep();
-	/*
-	 * In practice this limit means that large file write()s
-	 * which get chunked to 4K copies get handled via
-	 * non-temporal stores here. Smaller writes get handled
-	 * via regular __copy_from_user():
-	 */
-	if (likely(total >= PAGE_SIZE))
-		return __copy_user_nocache(dst, src, size, 1);
-	else
-		return __copy_from_user(dst, src, size);
+	return __copy_user_nocache(dst, src, size, 1);
 }
 
-static inline int __copy_from_user_inatomic_nocache(void *dst,
-	    const void __user *src, unsigned size, unsigned total)
+static inline int
+__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
+				  unsigned size)
 {
-	if (likely(total >= PAGE_SIZE))
-		return __copy_user_nocache(dst, src, size, 0);
-	else
-		return __copy_from_user_inatomic(dst, src, size);
+	return __copy_user_nocache(dst, src, size, 0);
 }
 
 unsigned long
author	Ingo Molnar <mingo@elte.hu>	2009-03-02 11:00:57 +0100
committer	Ingo Molnar <mingo@elte.hu>	2009-03-02 11:06:49 +0100
commit	f180053694b43d5714bf56cb95499a3c32ff155c (patch)
tree	00286fcc88d2842629b039da4009a1332b3a1719 /arch/x86/include/asm
parent	34754b69a6f87aa6aa2860525a82f12532f83afd (diff)