summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/vm/numa_memory_policy.txt33
-rw-r--r--include/linux/mempolicy.h1
-rw-r--r--mm/mempolicy.c12
3 files changed, 28 insertions, 18 deletions
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt
index 8242f52d0f22..dd4986497996 100644
--- a/Documentation/vm/numa_memory_policy.txt
+++ b/Documentation/vm/numa_memory_policy.txt
@@ -302,31 +302,30 @@ MEMORY POLICIES AND CPUSETS
Memory policies work within cpusets as described above. For memory policies
that require a node or set of nodes, the nodes are restricted to the set of
-nodes whose memories are allowed by the cpuset constraints. If the
-intersection of the set of nodes specified for the policy and the set of nodes
-allowed by the cpuset is the empty set, the policy is considered invalid and
-cannot be installed.
+nodes whose memories are allowed by the cpuset constraints. If the nodemask
+specified for the policy contains nodes that are not allowed by the cpuset, or
+the intersection of the set of nodes specified for the policy and the set of
+nodes with memory is the empty set, the policy is considered invalid
+and cannot be installed.
The interaction of memory policies and cpusets can be problematic for a
couple of reasons:
-1) the memory policy APIs take physical node id's as arguments. However, the
- memory policy APIs do not provide a way to determine what nodes are valid
- in the context where the application is running. An application MAY consult
- the cpuset file system [directly or via an out of tree, and not generally
- available, libcpuset API] to obtain this information, but then the
- application must be aware that it is running in a cpuset and use what are
- intended primarily as administrative APIs.
-
- However, as long as the policy specifies at least one node that is valid
- in the controlling cpuset, the policy can be used.
+1) the memory policy APIs take physical node id's as arguments. As mentioned
+ above, it is illegal to specify nodes that are not allowed in the cpuset.
+ The application must query the allowed nodes using the get_mempolicy()
+ API with the MPOL_F_MEMS_ALLOWED flag to determine the allowed nodes and
+ restrict itself to those nodes. However, the resources available to a
+ cpuset can be changed by the system administrator, or a workload manager
+ application, at any time. So, a task may still get errors attempting to
+ specify policy nodes, and must query the allowed memories again.
2) when tasks in two cpusets share access to a memory region, such as shared
memory segments created by shmget() of mmap() with the MAP_ANONYMOUS and
MAP_SHARED flags, and any of the tasks install shared policy on the region,
only nodes whose memories are allowed in both cpusets may be used in the
- policies. Again, obtaining this information requires "stepping outside"
- the memory policy APIs, as well as knowing in what cpusets other task might
- be attaching to the shared region, to use the cpuset information.
+ policies. Obtaining this information requires "stepping outside" the
+ memory policy APIs to use the cpuset information and requires that one
+ know in what cpusets other task might be attaching to the shared region.
Furthermore, if the cpusets' allowed memory sets are disjoint, "local"
allocation is the only valid policy.
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index a020eb2d4e2a..7e9698ec839b 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -19,6 +19,7 @@
/* Flags for get_mem_policy */
#define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */
#define MPOL_F_ADDR (1<<1) /* look up vma using address */
+#define MPOL_F_MEMS_ALLOWED (1<<2) /* return allowed memories */
/* Flags for mbind */
#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 3a45b7dd0a09..ccbdb22147bb 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -526,8 +526,18 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
struct mempolicy *pol = current->mempolicy;
cpuset_update_task_memory_state();
- if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
+ if (flags &
+ ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED))
return -EINVAL;
+
+ if (flags & MPOL_F_MEMS_ALLOWED) {
+ if (flags & (MPOL_F_NODE|MPOL_F_ADDR))
+ return -EINVAL;
+ *policy = 0; /* just so it's initialized */
+ *nmask = cpuset_current_mems_allowed;
+ return 0;
+ }
+
if (flags & MPOL_F_ADDR) {
down_read(&mm->mmap_sem);
vma = find_vma_intersection(mm, addr, addr+1);