diff options
author | Tejun Heo <tj@kernel.org> | 2015-06-24 16:58:23 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 17:49:45 -0700 |
commit | c2b42d3cadbffbf5117ccdbcb3a2fc47c0d59bae (patch) | |
tree | 499feeac186fae24e36057206b51f3ca1a0bc016 /mm | |
parent | f4b90b70b7a4f5c29c442399ffd531332356e1f5 (diff) |
memcg: convert mem_cgroup->under_oom from atomic_t to int
memcg->under_oom tracks whether the memcg is under OOM conditions and is
an atomic_t counter managed with mem_cgroup_[un]mark_under_oom(). While
atomic_t appears to be simple synchronization-wise, when used as a
synchronization construct like here, it's trickier and more error-prone
due to weak memory ordering rules, especially around atomic_read(), and
false sense of security.
For example, both non-trivial read sites of memcg->under_oom are a bit
problematic although not being actually broken.
* mem_cgroup_oom_register_event()
It isn't explicit what guarantees the memory ordering between event
addition and memcg->under_oom check. This isn't broken only because
memcg_oom_lock is used for both event list and memcg->oom_lock.
* memcg_oom_recover()
The lockless test doesn't have any explanation why this would be
safe.
mem_cgroup_[un]mark_under_oom() are very cold paths and there's no point
in avoiding locking memcg_oom_lock there. This patch converts
memcg->under_oom from atomic_t to int, puts their modifications under
memcg_oom_lock and documents why the lockless test in
memcg_oom_recover() is safe.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 29 |
1 files changed, 21 insertions, 8 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6a5f5d59f7d7..e65f7b0131d3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -285,8 +285,9 @@ struct mem_cgroup { */ bool use_hierarchy; + /* protected by memcg_oom_lock */ bool oom_lock; - atomic_t under_oom; + int under_oom; int swappiness; /* OOM-Killer disable */ @@ -1809,8 +1810,10 @@ static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg) { struct mem_cgroup *iter; + spin_lock(&memcg_oom_lock); for_each_mem_cgroup_tree(iter, memcg) - atomic_inc(&iter->under_oom); + iter->under_oom++; + spin_unlock(&memcg_oom_lock); } static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg) @@ -1819,11 +1822,13 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg) /* * When a new child is created while the hierarchy is under oom, - * mem_cgroup_oom_lock() may not be called. We have to use - * atomic_add_unless() here. + * mem_cgroup_oom_lock() may not be called. Watch for underflow. */ + spin_lock(&memcg_oom_lock); for_each_mem_cgroup_tree(iter, memcg) - atomic_add_unless(&iter->under_oom, -1, 0); + if (iter->under_oom > 0) + iter->under_oom--; + spin_unlock(&memcg_oom_lock); } static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); @@ -1851,7 +1856,15 @@ static int memcg_oom_wake_function(wait_queue_t *wait, static void memcg_oom_recover(struct mem_cgroup *memcg) { - if (memcg && atomic_read(&memcg->under_oom)) + /* + * For the following lockless ->under_oom test, the only required + * guarantee is that it must see the state asserted by an OOM when + * this function is called as a result of userland actions + * triggered by the notification of the OOM. This is trivially + * achieved by invoking mem_cgroup_mark_under_oom() before + * triggering notification. + */ + if (memcg && memcg->under_oom) __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg); } @@ -3860,7 +3873,7 @@ static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg, list_add(&event->list, &memcg->oom_notify); /* already in OOM ? */ - if (atomic_read(&memcg->under_oom)) + if (memcg->under_oom) eventfd_signal(eventfd, 1); spin_unlock(&memcg_oom_lock); @@ -3889,7 +3902,7 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(sf)); seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable); - seq_printf(sf, "under_oom %d\n", (bool)atomic_read(&memcg->under_oom)); + seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom); return 0; } |