Merge "blk-mq: use static mapping"

author: Linux Build Service Account <lnxbuild@localhost> 2016-10-03 10:34:53 -0700
committer: Gerrit - the friendly Code Review server <code-review@localhost> 2016-10-03 10:34:53 -0700
commit: abc3610d33c396660dde542e1900edd77a4d8a12 (patch)
tree: f67f2631d6e994b21e067b37bdd851d17af01241
parent: a9849c7d7fffb1c6cc211d7dc48eb7b428207fde (diff)
parent: 6b520c7ac72604b16eb68274d23efe4ff49444f4 (diff)
3 files changed, 39 insertions, 85 deletions
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 8764c241e5bb..8cf06af3036e 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -31,8 +31,8 @@ static int get_first_sibling(unsigned int cpu)
 	return cpu;
 }
 
-int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
-			    const struct cpumask *online_mask)
+static int blk_mq_update_queue_map(unsigned int *map,
+		unsigned int nr_queues, const struct cpumask *online_mask)
 {
 	unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
 	cpumask_var_t cpus;
@@ -52,18 +52,14 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
 
 	queue = 0;
 	for_each_possible_cpu(i) {
-		if (!cpumask_test_cpu(i, online_mask)) {
-			map[i] = 0;
-			continue;
-		}
-
 		/*
 		 * Easy case - we have equal or more hardware queues. Or
 		 * there are no thread siblings to take into account. Do
 		 * 1:1 if enough, or sequential mapping if less.
 		 */
-		if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
-			map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
+		if (nr_queues >= nr_cpu_ids) {
+			map[i] = cpu_to_queue_index(nr_cpu_ids, nr_queues,
+					queue);
 			queue++;
 			continue;
 		}
@@ -75,7 +71,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
 		 */
 		first_sibling = get_first_sibling(i);
 		if (first_sibling == i) {
-			map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
+			map[i] = cpu_to_queue_index(nr_cpu_ids, nr_queues,
 							queue);
 			queue++;
 		} else
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6d6f8feb48c0..8398e18d4139 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1783,10 +1783,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 		INIT_LIST_HEAD(&__ctx->rq_list);
 		__ctx->queue = q;
 
-		/* If the cpu isn't online, the cpu is mapped to first hctx */
-		if (!cpu_online(i))
-			continue;
-
 		hctx = q->mq_ops->map_queue(q, i);
 
 		/*
@@ -1820,12 +1816,9 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 	 * Map software to hardware queues
 	 */
 	queue_for_each_ctx(q, ctx, i) {
-		/* If the cpu isn't online, the cpu is mapped to first hctx */
-		if (!cpumask_test_cpu(i, online_mask))
-			continue;
-
 		hctx = q->mq_ops->map_queue(q, i);
-		cpumask_set_cpu(i, hctx->cpumask);
+		if (cpumask_test_cpu(i, online_mask))
+			cpumask_set_cpu(i, hctx->cpumask);
 		ctx->index_hw = hctx->nr_ctx;
 		hctx->ctxs[hctx->nr_ctx++] = ctx;
 	}
@@ -1863,17 +1856,22 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 
 		/*
 		 * Initialize batch roundrobin counts
+		 * Set next_cpu for only those hctxs that have an online CPU
+		 * in their cpumask field. For hctxs that belong to few online
+		 * and few offline CPUs, this will always provide one CPU from
+		 * online ones. For hctxs belonging to all offline CPUs, their
+		 * cpumask will be updated in reinit_notify.
 		 */
-		hctx->next_cpu = cpumask_first(hctx->cpumask);
-		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
+		if (cpumask_first(hctx->cpumask) < nr_cpu_ids) {
+			hctx->next_cpu = cpumask_first(hctx->cpumask);
+			hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
+		}
 	}
 
 	queue_for_each_ctx(q, ctx, i) {
-		if (!cpumask_test_cpu(i, online_mask))
-			continue;
-
 		hctx = q->mq_ops->map_queue(q, i);
-		cpumask_set_cpu(i, hctx->tags->cpumask);
+		if (cpumask_test_cpu(i, online_mask))
+			cpumask_set_cpu(i, hctx->tags->cpumask);
 	}
 }
 
@@ -2101,38 +2099,13 @@ void blk_mq_free_queue(struct request_queue *q)
 	blk_mq_free_hw_queues(q, set);
 }
 
-/* Basically redo blk_mq_init_queue with queue frozen */
-static void blk_mq_queue_reinit(struct request_queue *q,
-				const struct cpumask *online_mask)
-{
-	WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
-
-	blk_mq_sysfs_unregister(q);
-
-	blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask);
-
-	/*
-	 * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
-	 * we should change hctx numa_node according to new topology (this
-	 * involves free and re-allocate memory, worthy doing?)
-	 */
-
-	blk_mq_map_swqueue(q, online_mask);
-
-	blk_mq_sysfs_register(q);
-}
-
 static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
 				      unsigned long action, void *hcpu)
 {
 	struct request_queue *q;
+	struct blk_mq_hw_ctx *hctx;
+	int i;
 	int cpu = (unsigned long)hcpu;
-	/*
-	 * New online cpumask which is going to be set in this hotplug event.
-	 * Declare this cpumasks as global as cpu-hotplug operation is invoked
-	 * one-by-one and dynamically allocating this could result in a failure.
-	 */
-	static struct cpumask online_new;
 
 	/*
 	 * Before hotadded cpu starts handling requests, new mappings must
@@ -2154,44 +2127,31 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_DEAD:
 	case CPU_UP_CANCELED:
-		cpumask_copy(&online_new, cpu_online_mask);
+		mutex_lock(&all_q_mutex);
+		list_for_each_entry(q, &all_q_list, all_q_node) {
+			queue_for_each_hw_ctx(q, hctx, i) {
+				cpumask_clear_cpu(cpu, hctx->cpumask);
+				cpumask_clear_cpu(cpu, hctx->tags->cpumask);
+			}
+		}
+		mutex_unlock(&all_q_mutex);
 		break;
 	case CPU_UP_PREPARE:
-		cpumask_copy(&online_new, cpu_online_mask);
-		cpumask_set_cpu(cpu, &online_new);
+		/* Update hctx->cpumask for newly onlined CPUs */
+		mutex_lock(&all_q_mutex);
+		list_for_each_entry(q, &all_q_list, all_q_node) {
+			queue_for_each_hw_ctx(q, hctx, i) {
+				cpumask_set_cpu(cpu, hctx->cpumask);
+				hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
+				cpumask_set_cpu(cpu, hctx->tags->cpumask);
+			}
+		}
+		mutex_unlock(&all_q_mutex);
 		break;
 	default:
 		return NOTIFY_OK;
 	}
 
-	mutex_lock(&all_q_mutex);
-
-	/*
-	 * We need to freeze and reinit all existing queues.  Freezing
-	 * involves synchronous wait for an RCU grace period and doing it
-	 * one by one may take a long time.  Start freezing all queues in
-	 * one swoop and then wait for the completions so that freezing can
-	 * take place in parallel.
-	 */
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_mq_freeze_queue_start(q);
-	list_for_each_entry(q, &all_q_list, all_q_node) {
-		blk_mq_freeze_queue_wait(q);
-
-		/*
-		 * timeout handler can't touch hw queue during the
-		 * reinitialization
-		 */
-		del_timer_sync(&q->timeout);
-	}
-
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_mq_queue_reinit(q, &online_new);
-
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_mq_unfreeze_queue(q);
-
-	mutex_unlock(&all_q_mutex);
 	return NOTIFY_OK;
 }
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 713820b47b31..7fd11bcaa409 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -48,8 +48,6 @@ void blk_mq_disable_hotplug(void);
  * CPU -> queue mappings
  */
 extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
-extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
-				   const struct cpumask *online_mask);
 extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
 
 /*
author	Linux Build Service Account <lnxbuild@localhost>	2016-10-03 10:34:53 -0700
committer	Gerrit - the friendly Code Review server <code-review@localhost>	2016-10-03 10:34:53 -0700
commit	abc3610d33c396660dde542e1900edd77a4d8a12 (patch)
tree	f67f2631d6e994b21e067b37bdd851d17af01241
parent	a9849c7d7fffb1c6cc211d7dc48eb7b428207fde (diff)
parent	6b520c7ac72604b16eb68274d23efe4ff49444f4 (diff)