Merge 4.4.148 into android-4.4

Changes in 4.4.148 ext4: fix check to prevent initializing reserved inodes tpm: fix race condition in tpm_common_write() ipv4+ipv6: Make INET*_ESP select CRYPTO_ECHAINIV fork: unconditionally clear stack on fork parisc: Enable CONFIG_MLONGCALLS by default parisc: Define mb() and add memory barriers to assembler unlock sequences xen/netfront: don't cache skb_shinfo() ACPI / LPSS: Add missing prv_offset setting for byt/cht PWM devices scsi: sr: Avoid that opening a CD-ROM hangs with runtime power management enabled root dentries need RCU-delayed freeing fix mntput/mntput race fix __legitimize_mnt()/mntput() race IB/core: Make testing MR flags for writability a static inline function IB/mlx4: Mark user MR as writable if actual virtual memory is writable IB/ocrdma: fix out of bounds access to local buffer ARM: dts: imx6sx: fix irq for pcie bridge x86/paravirt: Fix spectre-v2 mitigations for paravirt guests x86/speculation: Protect against userspace-userspace spectreRSB kprobes/x86: Fix %p uses in error messages x86/irqflags: Provide a declaration for native_save_fl x86/speculation/l1tf: Increase 32bit PAE __PHYSICAL_PAGE_SHIFT x86/mm: Move swap offset/type up in PTE to work around erratum x86/mm: Fix swap entry comment and macro mm: x86: move _PAGE_SWP_SOFT_DIRTY from bit 7 to bit 1 x86/speculation/l1tf: Change order of offset/type in swap entry x86/speculation/l1tf: Protect swap entries against L1TF x86/speculation/l1tf: Protect PROT_NONE PTEs against speculation x86/speculation/l1tf: Make sure the first page is always reserved x86/speculation/l1tf: Add sysfs reporting for l1tf mm: Add vm_insert_pfn_prot() mm: fix cache mode tracking in vm_insert_mixed() x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings x86/speculation/l1tf: Limit swap file size to MAX_PA/2 x86/bugs: Move the l1tf function and define pr_fmt properly x86/speculation/l1tf: Extend 64bit swap file size limit x86/cpufeatures: Add detection of L1D cache flush support. x86/speculation/l1tf: Protect PAE swap entries against L1TF x86/speculation/l1tf: Fix up pte->pfn conversion for PAE x86/speculation/l1tf: Invert all not present mappings x86/speculation/l1tf: Make pmd/pud_mknotpresent() invert x86/mm/pat: Make set_memory_np() L1TF safe x86/mm/kmmio: Make the tracer robust against L1TF x86/speculation/l1tf: Fix up CPU feature flags x86/init: fix build with CONFIG_SWAP=n x86/speculation/l1tf: Unbreak !__HAVE_ARCH_PFN_MODIFY_ALLOWED architectures Linux 4.4.148 Change-Id: I83c857d9d9d74ee47e61d15eb411f276f057ba3d Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
author: Greg Kroah-Hartman <gregkh@google.com> 2018-08-15 18:20:41 +0200
committer: Greg Kroah-Hartman <gregkh@google.com> 2018-08-15 18:20:41 +0200
commit: f057ff937754efc42d56bee825187b2ce6c36958 (patch)
tree: a3e7e80d30998b70b74a82bbc11118bb07acf4fd /mm
parent: a5fc66599b6143999290603a51da11868f7d36a9 (diff)
parent: 30a97c1e2dc39f45d9deeeccc2733278fc285d5e (diff)
3 files changed, 130 insertions, 27 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 177cb7d111a9..d5bb1465d30c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1605,8 +1605,29 @@ out:
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn)
 {
+	return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_insert_pfn);
+
+/**
+ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ * @pgprot: pgprot flags for the inserted page
+ *
+ * This is exactly like vm_insert_pfn, except that it allows drivers to
+ * to override pgprot on a per-page basis.
+ *
+ * This only makes sense for IO mappings, and it makes no sense for
+ * cow mappings.  In general, using multiple vmas is preferable;
+ * vm_insert_pfn_prot should only be used if using multiple VMAs is
+ * impractical.
+ */
+int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn, pgprot_t pgprot)
+{
 	int ret;
-	pgprot_t pgprot = vma->vm_page_prot;
 	/*
 	 * Technically, architectures with pte_special can avoid all these
 	 * restrictions (same for remap_pfn_range).  However we would like
@@ -1624,19 +1645,29 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 	if (track_pfn_insert(vma, &pgprot, pfn))
 		return -EINVAL;
 
+	if (!pfn_modify_allowed(pfn, pgprot))
+		return -EACCES;
+
 	ret = insert_pfn(vma, addr, pfn, pgprot);
 
 	return ret;
 }
-EXPORT_SYMBOL(vm_insert_pfn);
+EXPORT_SYMBOL(vm_insert_pfn_prot);
 
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn)
 {
+	pgprot_t pgprot = vma->vm_page_prot;
+
 	BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
 
 	if (addr < vma->vm_start || addr >= vma->vm_end)
 		return -EFAULT;
+	if (track_pfn_insert(vma, &pgprot, pfn))
+		return -EINVAL;
+
+	if (!pfn_modify_allowed(pfn, pgprot))
+		return -EACCES;
 
 	/*
 	 * If we don't have pte special, then we have to use the pfn_valid()
@@ -1649,9 +1680,9 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 		struct page *page;
 
 		page = pfn_to_page(pfn);
-		return insert_page(vma, addr, page, vma->vm_page_prot);
+		return insert_page(vma, addr, page, pgprot);
 	}
-	return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+	return insert_pfn(vma, addr, pfn, pgprot);
 }
 EXPORT_SYMBOL(vm_insert_mixed);
 
@@ -1666,6 +1697,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 {
 	pte_t *pte;
 	spinlock_t *ptl;
+	int err = 0;
 
 	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
 	if (!pte)
@@ -1673,12 +1705,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	arch_enter_lazy_mmu_mode();
 	do {
 		BUG_ON(!pte_none(*pte));
+		if (!pfn_modify_allowed(pfn, prot)) {
+			err = -EACCES;
+			break;
+		}
 		set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
 		pfn++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
-	return 0;
+	return err;
 }
 
 static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1687,6 +1723,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 {
 	pmd_t *pmd;
 	unsigned long next;
+	int err;
 
 	pfn -= addr >> PAGE_SHIFT;
 	pmd = pmd_alloc(mm, pud, addr);
@@ -1695,9 +1732,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 	VM_BUG_ON(pmd_trans_huge(*pmd));
 	do {
 		next = pmd_addr_end(addr, end);
-		if (remap_pte_range(mm, pmd, addr, next,
-				pfn + (addr >> PAGE_SHIFT), prot))
-			return -ENOMEM;
+		err = remap_pte_range(mm, pmd, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot);
+		if (err)
+			return err;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
@@ -1708,6 +1746,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 {
 	pud_t *pud;
 	unsigned long next;
+	int err;
 
 	pfn -= addr >> PAGE_SHIFT;
 	pud = pud_alloc(mm, pgd, addr);
@@ -1715,9 +1754,10 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
-		if (remap_pmd_range(mm, pud, addr, next,
-				pfn + (addr >> PAGE_SHIFT), prot))
-			return -ENOMEM;
+		err = remap_pmd_range(mm, pud, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot);
+		if (err)
+			return err;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
diff --git a/mm/mprotect.c b/mm/mprotect.c
index b8849a3930a0..fcd678c3bd24 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -255,6 +255,42 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 	return pages;
 }
 
+static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
+			       unsigned long next, struct mm_walk *walk)
+{
+	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+		0 : -EACCES;
+}
+
+static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
+				   unsigned long addr, unsigned long next,
+				   struct mm_walk *walk)
+{
+	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+		0 : -EACCES;
+}
+
+static int prot_none_test(unsigned long addr, unsigned long next,
+			  struct mm_walk *walk)
+{
+	return 0;
+}
+
+static int prot_none_walk(struct vm_area_struct *vma, unsigned long start,
+			   unsigned long end, unsigned long newflags)
+{
+	pgprot_t new_pgprot = vm_get_page_prot(newflags);
+	struct mm_walk prot_none_walk = {
+		.pte_entry = prot_none_pte_entry,
+		.hugetlb_entry = prot_none_hugetlb_entry,
+		.test_walk = prot_none_test,
+		.mm = current->mm,
+		.private = &new_pgprot,
+	};
+
+	return walk_page_range(start, end, &prot_none_walk);
+}
+
 int
 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	unsigned long start, unsigned long end, unsigned long newflags)
@@ -273,6 +309,19 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	}
 
 	/*
+	 * Do PROT_NONE PFN permission checks here when we can still
+	 * bail out without undoing a lot of state. This is a rather
+	 * uncommon case, so doesn't need to be very optimized.
+	 */
+	if (arch_has_pfn_modify_check() &&
+	    (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
+	    (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
+		error = prot_none_walk(vma, start, end, newflags);
+		if (error)
+			return error;
+	}
+
+	/*
 	 * If we make a private mapping writable we increase our commit;
 	 * but (without finer accounting) cannot reduce our commit if we
 	 * make it unwritable again. hugetlb mapping were accounted for
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 80ba8711cd61..623c77c1327b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2206,6 +2206,35 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 	return 0;
 }
 
+
+/*
+ * Find out how many pages are allowed for a single swap device. There
+ * are two limiting factors:
+ * 1) the number of bits for the swap offset in the swp_entry_t type, and
+ * 2) the number of bits in the swap pte, as defined by the different
+ * architectures.
+ *
+ * In order to find the largest possible bit mask, a swap entry with
+ * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
+ * decoded to a swp_entry_t again, and finally the swap offset is
+ * extracted.
+ *
+ * This will mask all the bits from the initial ~0UL mask that can't
+ * be encoded in either the swp_entry_t or the architecture definition
+ * of a swap pte.
+ */
+unsigned long generic_max_swapfile_size(void)
+{
+	return swp_offset(pte_to_swp_entry(
+			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+}
+
+/* Can be overridden by an architecture for additional checks. */
+__weak unsigned long max_swapfile_size(void)
+{
+	return generic_max_swapfile_size();
+}
+
 static unsigned long read_swap_header(struct swap_info_struct *p,
 					union swap_header *swap_header,
 					struct inode *inode)
@@ -2241,22 +2270,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	p->cluster_next = 1;
 	p->cluster_nr = 0;
 
-	/*
-	 * Find out how many pages are allowed for a single swap
-	 * device. There are two limiting factors: 1) the number
-	 * of bits for the swap offset in the swp_entry_t type, and
-	 * 2) the number of bits in the swap pte as defined by the
-	 * different architectures. In order to find the
-	 * largest possible bit mask, a swap entry with swap type 0
-	 * and swap offset ~0UL is created, encoded to a swap pte,
-	 * decoded to a swp_entry_t again, and finally the swap
-	 * offset is extracted. This will mask all the bits from
-	 * the initial ~0UL mask that can't be encoded in either
-	 * the swp_entry_t or the architecture definition of a
-	 * swap pte.
-	 */
-	maxpages = swp_offset(pte_to_swp_entry(
-			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+	maxpages = max_swapfile_size();
 	last_page = swap_header->info.last_page;
 	if (!last_page) {
 		pr_warn("Empty swap-file\n");
author	Greg Kroah-Hartman <gregkh@google.com>	2018-08-15 18:20:41 +0200
committer	Greg Kroah-Hartman <gregkh@google.com>	2018-08-15 18:20:41 +0200
commit	f057ff937754efc42d56bee825187b2ce6c36958 (patch)
tree	a3e7e80d30998b70b74a82bbc11118bb07acf4fd /mm
parent	a5fc66599b6143999290603a51da11868f7d36a9 (diff)
parent	30a97c1e2dc39f45d9deeeccc2733278fc285d5e (diff)