Re: [PATCH 4/5] rcusync: introduce rcu_sync_struct->exclusive mode

From: Peter Zijlstra
Date: Fri Oct 04 2013 - 15:30:10 EST


On Fri, Oct 04, 2013 at 08:46:40PM +0200, Oleg Nesterov wrote:
> Add rcu_sync_struct->exclusive boolean set by rcu_sync_init(),
> it obviously controls the exclusiveness of rcu_sync_enter().
> This is what percpu_down_write() actually wants.
>
> We turn ->gp_wait into "struct completion gp_comp", it is used
> as a resource counter in "exclusive" mode. Otherwise we only use
> its completion->wait member for wait_event/wake_up_all. We never
> mix the completion/wait_queue_head_t operations.
>
> Note: it would be more clean to do __complete_locked() under
> ->rss_lock in rcu_sync_exit() in the "else" branch, but we don't
> have this trivial helper.

Something equivalent in available functions would be:

rss->gp_comp.done++;
__wake_up_locked_key(&rss->gp_comp.wait, TASK_NORMAL, NULL);


> struct rcu_sync_struct {
> int gp_state;
> int gp_count;
> - wait_queue_head_t gp_wait;
> + struct completion gp_comp;
>
> int cb_state;
> struct rcu_head cb_head;
>
> + bool exclusive;
> struct rcu_sync_ops *ops;
> };

I suppose we have a hole before or after cb_state to fit exclusive in.,
now it looks like we're going to create another hole before the *ops
pointer.

> @@ -4,7 +4,7 @@
> enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
> enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
>
> -#define rss_lock gp_wait.lock
> +#define rss_lock gp_comp.wait.lock

Should we, for convenience, also do:

#define rss_wait gp_comp.wait

> #ifdef CONFIG_PROVE_RCU
> #define __INIT_HELD(func) .held = func,
> @@ -33,11 +33,13 @@ struct rcu_sync_ops rcu_sync_ops_array[] = {
> },
> };
>
> -void rcu_sync_init(struct rcu_sync_struct *rss, enum rcu_sync_type type)
> +void rcu_sync_init(struct rcu_sync_struct *rss,
> + enum rcu_sync_type type, bool excl)
> {
> memset(rss, 0, sizeof(*rss));
> - init_waitqueue_head(&rss->gp_wait);
> + init_completion(&rss->gp_comp);
> rss->ops = rcu_sync_ops_array + type;
> + rss->exclusive = excl;
> }
>
> void rcu_sync_enter(struct rcu_sync_struct *rss)
> @@ -56,9 +58,13 @@ void rcu_sync_enter(struct rcu_sync_struct *rss)
> if (need_sync) {
> rss->ops->sync();
> rss->gp_state = GP_PASSED;
> - wake_up_all(&rss->gp_wait);
> + if (!rss->exclusive)
> + wake_up_all(&rss->gp_comp.wait);
> } else if (need_wait) {
> - wait_event(rss->gp_wait, rss->gp_state == GP_PASSED);
> + if (!rss->exclusive)
> + wait_event(rss->gp_comp.wait, rss->gp_state == GP_PASSED);
> + else
> + wait_for_completion(&rss->gp_comp);

I'm still not entirely sure why we need the completion; we already have
the gp_count variable and a waitqueue; together those should be able to
implement the condition/semaphore variable, no?

wait_for_completion:

spin_lock_irq(&rss->rss_lock);
if (rss->gp_count > 0) {
__wait_event_locked(rss->gp_wait, (rss->gp_count > 0),
TASK_UNINTERRUPTIBLE, 1, 0);
}
spin_unlock_irq(&rss->rss_lock);


complete:

bool excl = rss->excl;

spin_lock_irq(&rss->rss_lock);
if (!--rss_gp_count) {
__wake_up_locked_key(&rss->gp_comp.wait, TASK_NORMAL, NULL);
}
spin_unlock_irq(&rss->rss_lock);


All we would need would be something like:

--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -422,7 +422,7 @@ do { \
})


-#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
+#define __wait_event_locked(wq, condition, state, exclusive, irq) \
({ \
int __ret = 0; \
DEFINE_WAIT(__wait); \
@@ -431,8 +431,8 @@ do { \
do { \
if (likely(list_empty(&__wait.task_list))) \
__add_wait_queue_tail(&(wq), &__wait); \
- set_current_state(TASK_INTERRUPTIBLE); \
- if (signal_pending(current)) { \
+ set_current_state(state); \
+ if (___wait_signal_pending(state)) { \
__ret = -ERESTARTSYS; \
break; \
} \
@@ -451,6 +451,8 @@ do { \
__ret; \
})

+#define __wait_event_interruptible_locked(wq, condition, exclusive, irq)\
+ __wait_event_locked(wq, condition, TASK_INTERRUPTIBLE, exclusive, irq)

/**
* wait_event_interruptible_locked - sleep until a condition gets true
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/