From 09735f0624b494c0959f3327af009283567af320 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Jun 2025 13:27:13 +0530 Subject: [PATCH 1/5] smp: Fix typo in comment for raw_smp_processor_id() The comment in `smp.h` incorrectly refers to `raw_processor_id()` instead of the correct function name `raw_smp_processor_id()`. Suggested-by: Boqun Feng Signed-off-by: Viresh Kumar Signed-off-by: Thomas Gleixner Reviewed-by: Boqun Feng Link: https://lore.kernel.org/all/d096779819962c305b85cd12bda41b593e0981aa.1749536622.git.viresh.kumar@linaro.org --- include/linux/smp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/smp.h b/include/linux/smp.h index f1aa0952e8c3..bea8d2826e09 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -234,7 +234,7 @@ static inline int get_boot_cpu_id(void) #endif /* !SMP */ /** - * raw_processor_id() - get the current (unstable) CPU id + * raw_smp_processor_id() - get the current (unstable) CPU id * * For then you know what you are doing and need an unstable * CPU id. From 5f295519b42f100c735a1e8e1a70060e26f30c3f Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Sun, 22 Jun 2025 20:00:06 -0400 Subject: [PATCH 2/5] smp: Improve locality in smp_call_function_any() smp_call_function_any() tries to make a local call as it's the cheapest option, or switches to a CPU in the same node. If it's not possible, the algorithm gives up and searches for any CPU, in a numerical order. Instead, it can search for the best CPU based on NUMA locality, including the 2nd nearest hop (a set of equidistant nodes), and higher. sched_numa_find_nth_cpu() does exactly that, and also helps to drop most of the housekeeping code. Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250623000010.10124-2-yury.norov@gmail.com --- kernel/smp.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/kernel/smp.c b/kernel/smp.c index 974f3a3962e8..7c8cfab0ce55 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -741,32 +741,19 @@ EXPORT_SYMBOL_GPL(smp_call_function_single_async); * * Selection preference: * 1) current cpu if in @mask - * 2) any cpu of current node if in @mask - * 3) any other online cpu in @mask + * 2) nearest cpu in @mask, based on NUMA topology */ int smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, void *info, int wait) { unsigned int cpu; - const struct cpumask *nodemask; int ret; /* Try for same CPU (cheapest) */ cpu = get_cpu(); - if (cpumask_test_cpu(cpu, mask)) - goto call; + if (!cpumask_test_cpu(cpu, mask)) + cpu = sched_numa_find_nth_cpu(mask, 0, cpu_to_node(cpu)); - /* Try for same node. */ - nodemask = cpumask_of_node(cpu_to_node(cpu)); - for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids; - cpu = cpumask_next_and(cpu, nodemask, mask)) { - if (cpu_online(cpu)) - goto call; - } - - /* Any online will do: smp_call_function_single handles nr_cpu_ids. */ - cpu = cpumask_any_and(mask, cpu_online_mask); -call: ret = smp_call_function_single(cpu, func, info, wait); put_cpu(); return ret; From 976e0e3103e463725e19a5493d02ce7b7b380663 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Sun, 22 Jun 2025 20:00:07 -0400 Subject: [PATCH 3/5] smp: Use cpumask_any_but() in smp_call_function_many_cond() smp_call_function_many_cond() opencodes cpumask_any_but(). Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250623000010.10124-3-yury.norov@gmail.com --- kernel/smp.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/smp.c b/kernel/smp.c index 7c8cfab0ce55..5871acf3cd45 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -807,13 +807,8 @@ static void smp_call_function_many_cond(const struct cpumask *mask, run_local = true; /* Check if we need remote execution, i.e., any CPU excluding this one. */ - cpu = cpumask_first_and(mask, cpu_online_mask); - if (cpu == this_cpu) - cpu = cpumask_next_and(cpu, mask, cpu_online_mask); - if (cpu < nr_cpu_ids) + if (cpumask_any_and_but(mask, cpu_online_mask, this_cpu) < nr_cpu_ids) { run_remote = true; - - if (run_remote) { cfd = this_cpu_ptr(&cfd_data); cpumask_and(cfd->cpumask, mask, cpu_online_mask); __cpumask_clear_cpu(this_cpu, cfd->cpumask); From e0e9506523fea415e0d5abaa103fd67dc8a39696 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Sun, 22 Jun 2025 20:00:09 -0400 Subject: [PATCH 4/5] smp: Defer check for local execution in smp_call_function_many_cond() Defer check for local execution to the actual place where it is needed, which removes the extra local variable. Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250623000010.10124-5-yury.norov@gmail.com --- kernel/smp.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/smp.c b/kernel/smp.c index 5871acf3cd45..99d1fd0e9e0e 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -779,7 +779,6 @@ static void smp_call_function_many_cond(const struct cpumask *mask, bool wait = scf_flags & SCF_WAIT; int nr_cpus = 0; bool run_remote = false; - bool run_local = false; lockdep_assert_preemption_disabled(); @@ -801,11 +800,6 @@ static void smp_call_function_many_cond(const struct cpumask *mask, */ WARN_ON_ONCE(!in_task()); - /* Check if we need local execution. */ - if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask) && - (!cond_func || cond_func(this_cpu, info))) - run_local = true; - /* Check if we need remote execution, i.e., any CPU excluding this one. */ if (cpumask_any_and_but(mask, cpu_online_mask, this_cpu) < nr_cpu_ids) { run_remote = true; @@ -851,7 +845,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask, send_call_function_ipi_mask(cfd->cpumask_ipi); } - if (run_local) { + /* Check if we need local execution. */ + if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask) && + (!cond_func || cond_func(this_cpu, info))) { unsigned long flags; local_irq_save(flags); From 946a7281982530d333eaee62bd1726f25908b3a9 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 2 Jul 2025 13:52:54 -0400 Subject: [PATCH 5/5] smp: Wait only if work was enqueued Whenever work is enqueued for a remote CPU, smp_call_function_many_cond() may need to wait for that work to be completed. However, if no work is enqueued for a remote CPU, because the condition func() evaluated to false for all CPUs, there is no need to wait. Set run_remote only if work was enqueued on remote CPUs. Document the difference between "work enqueued", and "CPU needs to be woken up" Suggested-by: Jann Horn Signed-off-by: Rik van Riel Signed-off-by: Thomas Gleixner Reviewed-by: Yury Norov (NVIDIA) Link: https://lore.kernel.org/all/20250703203019.11331ac3@fangorn --- kernel/smp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/smp.c b/kernel/smp.c index 99d1fd0e9e0e..c5e1da7a88da 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -802,7 +802,6 @@ static void smp_call_function_many_cond(const struct cpumask *mask, /* Check if we need remote execution, i.e., any CPU excluding this one. */ if (cpumask_any_and_but(mask, cpu_online_mask, this_cpu) < nr_cpu_ids) { - run_remote = true; cfd = this_cpu_ptr(&cfd_data); cpumask_and(cfd->cpumask, mask, cpu_online_mask); __cpumask_clear_cpu(this_cpu, cfd->cpumask); @@ -816,6 +815,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask, continue; } + /* Work is enqueued on a remote CPU. */ + run_remote = true; + csd_lock(csd); if (wait) csd->node.u_flags |= CSD_TYPE_SYNC; @@ -827,6 +829,10 @@ static void smp_call_function_many_cond(const struct cpumask *mask, #endif trace_csd_queue_cpu(cpu, _RET_IP_, func, csd); + /* + * Kick the remote CPU if this is the first work + * item enqueued. + */ if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) { __cpumask_set_cpu(cpu, cfd->cpumask_ipi); nr_cpus++;