From 450adcbe518ab3a3953d8475309525d22de77cba Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 1 Mar 2011 13:40:54 -0500 Subject: blk-throttle: Do not use kblockd workqueue for throtl work o Dominik Klein reported a system hang issue while doing some blkio throttling testing. https://lkml.org/lkml/2011/2/24/173 o Some tracing revealed that CFQ was not dispatching any more jobs as queue unplug was not happening. And queue unplug was not happening because unplug work was not being called as there was one throttling work on same cpu which as not finished yet. And throttling work had not finished as it was tyring to dispatch a bio to CFQ but all the request descriptors were consume to it was put to sleep. o So basically it is a cyclic dependecny between CFQ unplug work and throtl dispatch work. Tejun suggested that use separate workqueue for such cases. o This patch uses a separate workqueue for throttle related work and does not rely on kblockd workqueue anymore. Cc: stable@kernel.org Reported-by: Dominik Klein Signed-off-by: Vivek Goyal Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4d18ff34670a..dd8cd0f47e3a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1088,7 +1088,6 @@ static inline void put_dev_sector(Sector p) struct work_struct; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); -int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP /* @@ -1136,7 +1135,6 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) extern int blk_throtl_init(struct request_queue *q); extern void blk_throtl_exit(struct request_queue *q); extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); -extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay); extern void throtl_shutdown_timer_wq(struct request_queue *q); #else /* CONFIG_BLK_DEV_THROTTLING */ static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) @@ -1146,7 +1144,6 @@ static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) static inline int blk_throtl_init(struct request_queue *q) { return 0; } static inline int blk_throtl_exit(struct request_queue *q) { return 0; } -static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {} static inline void throtl_shutdown_timer_wq(struct request_queue *q) {} #endif /* CONFIG_BLK_DEV_THROTTLING */ -- cgit v1.2.3 From 77bd70e9009eab6dbdef3ee08afe87ab26df8dac Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 4 Feb 2011 14:57:43 +0000 Subject: mfd: Don't suspend WM8994 if the CODEC is not suspended ASoC supports keeping the audio subsysetm active over suspend in order to support use cases such as audio passthrough from a cellular modem with the main CPU suspended. Ensure that we don't power down the CODEC when this is happening by checking to see if VMID is up and skipping suspend and resume when it is. If the CODEC has suspended then it'll turn VMID off before the core suspend() gets called. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/wm8994-core.c | 18 ++++++++++++++++++ include/linux/mfd/wm8994/core.h | 1 + 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c index 41233c7fa581..f4016a075fd6 100644 --- a/drivers/mfd/wm8994-core.c +++ b/drivers/mfd/wm8994-core.c @@ -246,6 +246,16 @@ static int wm8994_suspend(struct device *dev) struct wm8994 *wm8994 = dev_get_drvdata(dev); int ret; + /* Don't actually go through with the suspend if the CODEC is + * still active (eg, for audio passthrough from CP. */ + ret = wm8994_reg_read(wm8994, WM8994_POWER_MANAGEMENT_1); + if (ret < 0) { + dev_err(dev, "Failed to read power status: %d\n", ret); + } else if (ret & WM8994_VMID_SEL_MASK) { + dev_dbg(dev, "CODEC still active, ignoring suspend\n"); + return 0; + } + /* GPIO configuration state is saved here since we may be configuring * the GPIO alternate functions even if we're not using the gpiolib * driver for them. @@ -261,6 +271,8 @@ static int wm8994_suspend(struct device *dev) if (ret < 0) dev_err(dev, "Failed to save LDO registers: %d\n", ret); + wm8994->suspended = true; + ret = regulator_bulk_disable(wm8994->num_supplies, wm8994->supplies); if (ret != 0) { @@ -276,6 +288,10 @@ static int wm8994_resume(struct device *dev) struct wm8994 *wm8994 = dev_get_drvdata(dev); int ret; + /* We may have lied to the PM core about suspending */ + if (!wm8994->suspended) + return 0; + ret = regulator_bulk_enable(wm8994->num_supplies, wm8994->supplies); if (ret != 0) { @@ -298,6 +314,8 @@ static int wm8994_resume(struct device *dev) if (ret < 0) dev_err(dev, "Failed to restore GPIO registers: %d\n", ret); + wm8994->suspended = false; + return 0; } #endif diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h index 3fd36845ca45..ef4f0b6083a3 100644 --- a/include/linux/mfd/wm8994/core.h +++ b/include/linux/mfd/wm8994/core.h @@ -71,6 +71,7 @@ struct wm8994 { u16 irq_masks_cache[WM8994_NUM_IRQ_REGS]; /* Used over suspend/resume */ + bool suspended; u16 ldo_regs[WM8994_NUM_LDO_REGS]; u16 gpio_regs[WM8994_NUM_GPIO_REGS]; -- cgit v1.2.3 From 1654e7411a1ad4999fe7890ef51d2a2bbb1fcf76 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 2 Mar 2011 08:48:05 -0500 Subject: block: add @force_kblockd to __blk_run_queue() __blk_run_queue() automatically either calls q->request_fn() directly or schedules kblockd depending on whether the function is recursed. blk-flush implementation needs to be able to explicitly choose kblockd. Add @force_kblockd. All the current users are converted to specify %false for the parameter and this patch doesn't introduce any behavior change. stable: This is prerequisite for fixing ide oops caused by the new blk-flush implementation. Signed-off-by: Tejun Heo Cc: Jan Beulich Cc: James Bottomley Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-core.c | 11 ++++++----- block/blk-flush.c | 2 +- block/cfq-iosched.c | 6 +++--- block/elevator.c | 4 ++-- drivers/scsi/scsi_lib.c | 2 +- drivers/scsi/scsi_transport_fc.c | 2 +- include/linux/blkdev.h | 2 +- 7 files changed, 15 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 792ece276160..518dd423a5fe 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -352,7 +352,7 @@ void blk_start_queue(struct request_queue *q) WARN_ON(!irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); - __blk_run_queue(q); + __blk_run_queue(q, false); } EXPORT_SYMBOL(blk_start_queue); @@ -403,13 +403,14 @@ EXPORT_SYMBOL(blk_sync_queue); /** * __blk_run_queue - run a single device queue * @q: The queue to run + * @force_kblockd: Don't run @q->request_fn directly. Use kblockd. * * Description: * See @blk_run_queue. This variant must be called with the queue lock * held and interrupts disabled. * */ -void __blk_run_queue(struct request_queue *q) +void __blk_run_queue(struct request_queue *q, bool force_kblockd) { blk_remove_plug(q); @@ -423,7 +424,7 @@ void __blk_run_queue(struct request_queue *q) * Only recurse once to avoid overrunning the stack, let the unplug * handling reinvoke the handler shortly if we already got there. */ - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { + if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { q->request_fn(q); queue_flag_clear(QUEUE_FLAG_REENTER, q); } else { @@ -446,7 +447,7 @@ void blk_run_queue(struct request_queue *q) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - __blk_run_queue(q); + __blk_run_queue(q, false); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_run_queue); @@ -1053,7 +1054,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq, drive_stat_acct(rq, 1); __elv_add_request(q, rq, where, 0); - __blk_run_queue(q); + __blk_run_queue(q, false); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_insert_request); diff --git a/block/blk-flush.c b/block/blk-flush.c index 54b123d6563e..56adaa8d55cd 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -69,7 +69,7 @@ static void blk_flush_complete_seq_end_io(struct request_queue *q, * queue. Kick the queue in those cases. */ if (was_empty && next_rq) - __blk_run_queue(q); + __blk_run_queue(q, false); } static void pre_flush_end_io(struct request *rq, int error) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 7be4c7959625..ea83a4f0c27d 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3355,7 +3355,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqd->busy_queues > 1) { cfq_del_timer(cfqd, cfqq); cfq_clear_cfqq_wait_request(cfqq); - __blk_run_queue(cfqd->queue); + __blk_run_queue(cfqd->queue, false); } else { cfq_blkiocg_update_idle_time_stats( &cfqq->cfqg->blkg); @@ -3370,7 +3370,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, * this new queue is RT and the current one is BE */ cfq_preempt_queue(cfqd, cfqq); - __blk_run_queue(cfqd->queue); + __blk_run_queue(cfqd->queue, false); } } @@ -3731,7 +3731,7 @@ static void cfq_kick_queue(struct work_struct *work) struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); - __blk_run_queue(cfqd->queue); + __blk_run_queue(cfqd->queue, false); spin_unlock_irq(q->queue_lock); } diff --git a/block/elevator.c b/block/elevator.c index 2569512830d3..236e93c1f46c 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -602,7 +602,7 @@ void elv_quiesce_start(struct request_queue *q) */ elv_drain_elevator(q); while (q->rq.elvpriv) { - __blk_run_queue(q); + __blk_run_queue(q, false); spin_unlock_irq(q->queue_lock); msleep(10); spin_lock_irq(q->queue_lock); @@ -651,7 +651,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) * with anything. There's no point in delaying queue * processing. */ - __blk_run_queue(q); + __blk_run_queue(q, false); break; case ELEVATOR_INSERT_SORT: diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9045c52abd25..fb2bb35c62cb 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -443,7 +443,7 @@ static void scsi_run_queue(struct request_queue *q) &sdev->request_queue->queue_flags); if (flagset) queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); - __blk_run_queue(sdev->request_queue); + __blk_run_queue(sdev->request_queue, false); if (flagset) queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); spin_unlock(sdev->request_queue->queue_lock); diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 998c01be3234..5c3ccfc6b622 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3829,7 +3829,7 @@ fc_bsg_goose_queue(struct fc_rport *rport) !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); if (flagset) queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q); - __blk_run_queue(rport->rqst_q); + __blk_run_queue(rport->rqst_q, false); if (flagset) queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dd8cd0f47e3a..d5063e1b5555 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -699,7 +699,7 @@ extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); -extern void __blk_run_queue(struct request_queue *); +extern void __blk_run_queue(struct request_queue *q, bool force_kblockd); extern void blk_run_queue(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, -- cgit v1.2.3 From f009918a1c1bbf8607b8aab3959876913a30193a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 28 Feb 2011 03:27:53 +0000 Subject: RxRPC: Fix v1 keys commit 339412841d7 (RxRPC: Allow key payloads to be passed in XDR form) broke klog for me. I notice the v1 key struct had a kif_version field added: -struct rxkad_key { - u16 security_index; /* RxRPC header security index */ - u16 ticket_len; /* length of ticket[] */ - u32 expiry; /* time at which expires */ - u32 kvno; /* key version number */ - u8 session_key[8]; /* DES session key */ - u8 ticket[0]; /* the encrypted ticket */ -}; +struct rxrpc_key_data_v1 { + u32 kif_version; /* 1 */ + u16 security_index; + u16 ticket_length; + u32 expiry; /* time_t */ + u32 kvno; + u8 session_key[8]; + u8 ticket[0]; +}; However the code in rxrpc_instantiate strips it away: data += sizeof(kver); datalen -= sizeof(kver); Removing kif_version fixes my problem. Signed-off-by: Anton Blanchard Signed-off-by: David Howells Signed-off-by: David S. Miller --- include/keys/rxrpc-type.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h index 5cb86c307f5d..fc4875433817 100644 --- a/include/keys/rxrpc-type.h +++ b/include/keys/rxrpc-type.h @@ -99,7 +99,6 @@ struct rxrpc_key_token { * structure of raw payloads passed to add_key() or instantiate key */ struct rxrpc_key_data_v1 { - u32 kif_version; /* 1 */ u16 security_index; u16 ticket_length; u32 expiry; /* time_t */ -- cgit v1.2.3 From 2d3a8497f8cc5aca14b722cd37d51f6c15ff9f74 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 3 Mar 2011 10:53:20 -0500 Subject: blktrace: Remove blk_fill_rwbs_rq. If we enable trace events to trace block actions, We use blk_fill_rwbs_rq to analyze the corresponding actions in request's cmd_flags, but we only choose the minor 2 bits from it, so most of other flags(e.g, REQ_SYNC) are missing. For example, with a sync write we get: write_test-2409 [001] 160.013869: block_rq_insert: 3,64 W 0 () 258135 + = 8 [write_test] Since now we have integrated the flags of both bio and request, it is safe to pass rq->cmd_flags directly to blk_fill_rwbs and blk_fill_rwbs_rq isn't needed any more. With this patch, after a sync write we get: write_test-2417 [000] 226.603878: block_rq_insert: 3,64 WS 0 () 258135 += 8 [write_test] Signed-off-by: Tao Ma Acked-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 1 - include/trace/events/block.h | 6 +++--- kernel/trace/blktrace.c | 16 ---------------- 3 files changed, 3 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3395cf7130f5..b22fb0d3db0f 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -245,7 +245,6 @@ static inline int blk_cmd_buf_len(struct request *rq) extern void blk_dump_cmd(char *buf, struct request *rq); extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); -extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq); #endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ diff --git a/include/trace/events/block.h b/include/trace/events/block.h index aba421d68f6f..78f18adb49c8 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -31,7 +31,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error, 0 : blk_rq_sectors(rq); __entry->errors = rq->errors; - blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); blk_dump_cmd(__get_str(cmd), rq); ), @@ -118,7 +118,7 @@ DECLARE_EVENT_CLASS(block_rq, __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? blk_rq_bytes(rq) : 0; - blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); blk_dump_cmd(__get_str(cmd), rq); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -563,7 +563,7 @@ TRACE_EVENT(block_rq_remap, __entry->nr_sector = blk_rq_sectors(rq); __entry->old_dev = dev; __entry->old_sector = from; - blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d95721f33702..cbafed7d4f38 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1827,21 +1827,5 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) rwbs[i] = '\0'; } -void blk_fill_rwbs_rq(char *rwbs, struct request *rq) -{ - int rw = rq->cmd_flags & 0x03; - int bytes; - - if (rq->cmd_flags & REQ_DISCARD) - rw |= REQ_DISCARD; - - if (rq->cmd_flags & REQ_SECURE) - rw |= REQ_SECURE; - - bytes = blk_rq_bytes(rq); - - blk_fill_rwbs(rwbs, rw, bytes); -} - #endif /* CONFIG_EVENT_TRACING */ -- cgit v1.2.3 From e3e89cc535223433a619d0969db3fa05cdd946b8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 4 Mar 2011 09:23:30 -0800 Subject: Mark ptrace_{traceme,attach,detach} static They are only used inside kernel/ptrace.c, and have been for a long time. We don't want to go back to the bad-old-days when architectures did things on their own, so make them static and private. Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 3 --- kernel/ptrace.c | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 092a04f874a8..a1147e5dd245 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -102,11 +102,8 @@ extern long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data); -extern int ptrace_traceme(void); extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); -extern int ptrace_attach(struct task_struct *tsk); -extern int ptrace_detach(struct task_struct *, unsigned int); extern void ptrace_disable(struct task_struct *); extern int ptrace_check_attach(struct task_struct *task, int kill); extern int ptrace_request(struct task_struct *child, long request, diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1708b1e2972d..e2302e40b360 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -163,7 +163,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) return !err; } -int ptrace_attach(struct task_struct *task) +static int ptrace_attach(struct task_struct *task) { int retval; @@ -219,7 +219,7 @@ out: * Performs checks and sets PT_PTRACED. * Should be used by all ptrace implementations for PTRACE_TRACEME. */ -int ptrace_traceme(void) +static int ptrace_traceme(void) { int ret = -EPERM; @@ -293,7 +293,7 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) return false; } -int ptrace_detach(struct task_struct *child, unsigned int data) +static int ptrace_detach(struct task_struct *child, unsigned int data) { bool dead = false; -- cgit v1.2.3 From 60bf8bf8815e6adea4c1d0423578c3b8000e2ec8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 4 Mar 2011 12:24:28 -0800 Subject: libceph: fix msgr backoff With commit f363e45f we replaced a bunch of hacky workqueue mutual exclusion logic with the WQ_NON_REENTRANT flag. One pieces of fallout is that the exponential backoff breaks in certain cases: * con_work attempts to connect. * we get an immediate failure, and the socket state change handler queues immediate work. * con_work calls con_fault, we decide to back off, but can't queue delayed work. In this case, we add a BACKOFF bit to make con_work reschedule delayed work next time it runs (which should be immediately). Signed-off-by: Sage Weil --- include/linux/ceph/messenger.h | 1 + net/ceph/messenger.c | 30 ++++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index c3011beac30d..eb31e108a64d 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -123,6 +123,7 @@ struct ceph_msg_pos { #define SOCK_CLOSED 11 /* socket state changed to closed */ #define OPENING 13 /* open connection w/ (possibly new) peer */ #define DEAD 14 /* dead, about to kfree */ +#define BACKOFF 15 /* * A single connection with another host. diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 6bd5025f6220..46fbc422ba74 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1949,6 +1949,19 @@ static void con_work(struct work_struct *work) work.work); mutex_lock(&con->mutex); + if (test_and_clear_bit(BACKOFF, &con->state)) { + dout("con_work %p backing off\n", con); + if (queue_delayed_work(ceph_msgr_wq, &con->work, + round_jiffies_relative(con->delay))) { + dout("con_work %p backoff %lu\n", con, con->delay); + mutex_unlock(&con->mutex); + return; + } else { + con->ops->put(con); + dout("con_work %p FAILED to back off %lu\n", con, + con->delay); + } + } if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ dout("con_work CLOSED\n"); @@ -2017,11 +2030,24 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - dout("fault queueing %p delay %lu\n", con, con->delay); con->ops->get(con); if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay)) == 0) + round_jiffies_relative(con->delay))) { + dout("fault queued %p delay %lu\n", con, con->delay); + } else { con->ops->put(con); + dout("fault failed to queue %p delay %lu, backoff\n", + con, con->delay); + /* + * In many cases we see a socket state change + * while con_work is running and end up + * queuing (non-delayed) work, such that we + * can't backoff with a delay. Set a flag so + * that when con_work restarts we schedule the + * delay then. + */ + set_bit(BACKOFF, &con->state); + } } out_unlock: -- cgit v1.2.3 From e76661d0a59e53e5cc4dccbe4b755d1dc8a968ec Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 3 Mar 2011 10:10:15 -0800 Subject: libceph: fix msgr keepalive flag There was some broken keepalive code using a dead variable. Shift to using the proper bit flag. Signed-off-by: Sage Weil --- include/linux/ceph/messenger.h | 1 - net/ceph/messenger.c | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index eb31e108a64d..31d91a64838b 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -161,7 +161,6 @@ struct ceph_connection { struct list_head out_queue; struct list_head out_sent; /* sending or sent but unacked */ u64 out_seq; /* last message queued for send */ - bool out_keepalive_pending; u64 in_seq, in_seq_acked; /* last message received, acked */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 46fbc422ba74..3252ea974e8f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -336,7 +336,6 @@ static void reset_connection(struct ceph_connection *con) ceph_msg_put(con->out_msg); con->out_msg = NULL; } - con->out_keepalive_pending = false; con->in_seq = 0; con->in_seq_acked = 0; } @@ -2019,10 +2018,10 @@ static void ceph_fault(struct ceph_connection *con) /* Requeue anything that hasn't been acked */ list_splice_init(&con->out_sent, &con->out_queue); - /* If there are no messages in the queue, place the connection - * in a STANDBY state (i.e., don't try to reconnect just yet). */ - if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { - dout("fault setting STANDBY\n"); + /* If there are no messages queued or keepalive pending, place + * the connection in a STANDBY state */ + if (list_empty(&con->out_queue) && + !test_bit(KEEPALIVE_PENDING, &con->state)) { set_bit(STANDBY, &con->state); } else { /* retry after a delay. */ -- cgit v1.2.3 From 2f5f9486f8c12e3aa40fe3775a18cb14efc5cea2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 4 Mar 2011 17:36:29 -0800 Subject: mm: change alloc_pages_vma to pass down the policy node for local policy Currently alloc_pages_vma() always uses the local node as policy node for the LOCAL policy. Pass this node down as an argument instead. No behaviour change from this patch, but will be needed for followons. Acked-by: Andrea Arcangeli Signed-off-by: Andi Kleen Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 9 +++++---- mm/huge_memory.c | 2 +- mm/mempolicy.c | 11 +++++------ 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0b84c61607e8..37b8af5db091 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -332,16 +332,17 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) return alloc_pages_current(gfp_mask, order); } extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, - struct vm_area_struct *vma, unsigned long addr); + struct vm_area_struct *vma, unsigned long addr, + int node); #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) -#define alloc_pages_vma(gfp_mask, order, vma, addr) \ +#define alloc_pages_vma(gfp_mask, order, vma, addr, node) \ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -#define alloc_page_vma(gfp_mask, vma, addr) \ - alloc_pages_vma(gfp_mask, 0, vma, addr) +#define alloc_page_vma(gfp_mask, vma, addr) \ + alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3e29781ee762..c7c2cd925599 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -653,7 +653,7 @@ static inline struct page *alloc_hugepage_vma(int defrag, unsigned long haddr) { return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), - HPAGE_PMD_ORDER, vma, haddr); + HPAGE_PMD_ORDER, vma, haddr, numa_node_id()); } #ifndef CONFIG_NUMA diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 49355a970be2..25a5a9146619 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1524,10 +1524,9 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy) } /* Return a zonelist indicated by gfp for node representing a mempolicy */ -static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy) +static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy, + int nd) { - int nd = numa_node_id(); - switch (policy->mode) { case MPOL_PREFERRED: if (!(policy->flags & MPOL_F_LOCAL)) @@ -1679,7 +1678,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, zl = node_zonelist(interleave_nid(*mpol, vma, addr, huge_page_shift(hstate_vma(vma))), gfp_flags); } else { - zl = policy_zonelist(gfp_flags, *mpol); + zl = policy_zonelist(gfp_flags, *mpol, numa_node_id()); if ((*mpol)->mode == MPOL_BIND) *nodemask = &(*mpol)->v.nodes; } @@ -1820,7 +1819,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, */ struct page * alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, - unsigned long addr) + unsigned long addr, int node) { struct mempolicy *pol = get_vma_policy(current, vma, addr); struct zonelist *zl; @@ -1836,7 +1835,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, put_mems_allowed(); return page; } - zl = policy_zonelist(gfp, pol); + zl = policy_zonelist(gfp, pol, node); if (unlikely(mpol_needs_cond_ref(pol))) { /* * slow path: ref counted shared policy -- cgit v1.2.3 From 236344d6b417d05a3080477639234fd9ca97568d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 4 Mar 2011 17:36:30 -0800 Subject: mm: add alloc_page_vma_node() Add a alloc_page_vma_node that allows passing the "local" node in. Used in a followon patch. Acked-by: Andrea Arcangeli Signed-off-by: Andi Kleen Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 37b8af5db091..dca31761b311 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -343,6 +343,8 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) +#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ + alloc_pages_vma(gfp_mask, 0, vma, addr, node) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); -- cgit v1.2.3