[RFC PATCH 03/18] sched/isolation: Use RCU to delay successive housekeeping cpumask updates

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Even though there are 2 separate sets of housekeeping cpumasks for access
and update, it is possible that the set of cpumasks to be updated are
still being used by the callers of housekeeping functions resulting in
the use of an intermediate cpumask between the new and old ones.

To reduce the chance of this, we need to introduce delay between
successive housekeeping cpumask updates. One simple way is to make
use of the RCU grace period delay. The callers of the housekeeping APIs
can optionally hold rcu_read_lock to eliminate the chance of using
intermediate housekeeping cpumasks.

Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
 kernel/sched/isolation.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index ee396ae13719..f26708667754 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -23,6 +23,9 @@ EXPORT_SYMBOL_GPL(housekeeping_overridden);
  * The housekeeping cpumasks can now be dynamically updated at run time.
  * Two set of cpumasks are kept. One set can be used while the other set are
  * being updated concurrently.
+ *
+ * rcu_read_lock() can optionally be held by housekeeping API callers to
+ * ensure stability of the cpumasks.
  */
 static DEFINE_RAW_SPINLOCK(cpumask_lock);
 struct housekeeping {
@@ -34,6 +37,8 @@ struct housekeeping {
 
 static struct housekeeping housekeeping;
 static bool sched_tick_offload_inited;
+static struct rcu_head rcu_gp[HK_TYPE_MAX];
+static unsigned long update_flags;
 
 bool housekeeping_enabled(enum hk_type type)
 {
@@ -267,6 +272,18 @@ static int __init housekeeping_isolcpus_setup(char *str)
 }
 __setup("isolcpus=", housekeeping_isolcpus_setup);
 
+/*
+ * Bits in update_flags can only turned on with cpumask_lock held and
+ * cleared by this RCU callback function.
+ */
+static void rcu_gp_end(struct rcu_head *rcu)
+{
+	int type = rcu - rcu_gp;
+
+	/* Atomically clear the corresponding flag bit */
+	clear_bit(type, &update_flags);
+}
+
 /**
  * housekeeping_exclude_cpumask - Update housekeeping cpumasks to exclude only the given cpumask
  * @cpumask:  new cpumask to be excluded from housekeeping cpumasks
@@ -306,8 +323,21 @@ int housekeeping_exclude_cpumask(struct cpumask *cpumask, unsigned long hk_flags
 	}
 #endif
 
+retry:
+	/*
+	 * If the RCU grace period for the previous update with conflicting
+	 * flag bits hasn't been completed yet, we have to wait for it.
+	 */
+	while (READ_ONCE(update_flags) & hk_flags)
+		synchronize_rcu();
+
 	raw_spin_lock(&cpumask_lock);
 
+	if (READ_ONCE(update_flags) & hk_flags) {
+		raw_spin_unlock(&cpumask_lock);
+		goto retry;
+	}
+
 	for_each_set_bit(type, &hk_flags, HK_TYPE_MAX) {
 		int idx = ++housekeeping.seq_nrs[type] & 1;
 		struct cpumask *dst_cpumask = housekeeping.cpumasks[type][idx];
@@ -320,8 +350,11 @@ int housekeeping_exclude_cpumask(struct cpumask *cpumask, unsigned long hk_flags
 			housekeeping.flags |= BIT(type);
 		}
 		WRITE_ONCE(housekeeping.cpumask_ptrs[type], dst_cpumask);
+		set_bit(type, &update_flags);
 	}
 	raw_spin_unlock(&cpumask_lock);
+	for_each_set_bit(type, &hk_flags, HK_TYPE_MAX)
+		call_rcu(&rcu_gp[type], rcu_gp_end);
 
 	if (!housekeeping.flags && static_key_enabled(&housekeeping_overridden))
 		static_key_disable(&housekeeping_overridden.key);
-- 
2.50.0





[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux