Skip to content

Commit

Permalink
workqueue: Initialize unbound CPU pods later in the boot
Browse files Browse the repository at this point in the history
During boot, to initialize unbound CPU pods, wq_pod_init() was called from
workqueue_init(). This is early enough for NUMA nodes to be set up but
before SMP is brought up and CPU topology information is populated.

Workqueue is in the process of improving CPU locality for unbound workqueues
and will need access to topology information during pod init. This adds a
new init function workqueue_init_topology() which is called after CPU
topology information is available and replaces wq_pod_init().

As unbound CPU pods are now initialized after workqueues are activated, we
need to revisit the workqueues to apply the pod configuration. Workqueues
which are created before workqueue_init_topology() are set up so that they
always use the default worker pool. After pods are set up in
workqueue_init_topology(), wq_update_pod() is called on all existing
workqueues to update the pool associations accordingly.

Note that wq_update_pod_attrs_buf allocation is moved to
workqueue_init_early(). This isn't necessary right now but enables further
generalization of pod handling in the future.

This patch changes the initialization sequence but the end result should be
the same.

Signed-off-by: Tejun Heo <tj@kernel.org>
  • Loading branch information
htejun committed Aug 8, 2023
1 parent a86feae commit 2930155
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 27 deletions.
1 change: 1 addition & 0 deletions include/linux/workqueue.h
Original file line number Diff line number Diff line change
Expand Up @@ -672,5 +672,6 @@ int workqueue_offline_cpu(unsigned int cpu);

void __init workqueue_init_early(void);
void __init workqueue_init(void);
void __init workqueue_init_topology(void);

#endif
1 change: 1 addition & 0 deletions init/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1540,6 +1540,7 @@ static noinline void __init kernel_init_freeable(void)
smp_init();
sched_init_smp();

workqueue_init_topology();
padata_init();
page_alloc_init_late();

Expand Down
68 changes: 41 additions & 27 deletions kernel/workqueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -6256,17 +6256,15 @@ static inline void wq_watchdog_init(void) { }

#endif /* CONFIG_WQ_WATCHDOG */

static void wq_pod_init(void);

/**
* workqueue_init_early - early init for workqueue subsystem
*
* This is the first half of two-staged workqueue subsystem initialization
* and invoked as soon as the bare basics - memory allocation, cpumasks and
* idr are up. It sets up all the data structures and system workqueues
* and allows early boot code to create workqueues and queue/cancel work
* items. Actual work item execution starts only after kthreads can be
* created and scheduled right before early initcalls.
* This is the first step of three-staged workqueue subsystem initialization and
* invoked as soon as the bare basics - memory allocation, cpumasks and idr are
* up. It sets up all the data structures and system workqueues and allows early
* boot code to create workqueues and queue/cancel work items. Actual work item
* execution starts only after kthreads can be created and scheduled right
* before early initcalls.
*/
void __init workqueue_init_early(void)
{
Expand All @@ -6284,6 +6282,9 @@ void __init workqueue_init_early(void)

pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);

wq_update_pod_attrs_buf = alloc_workqueue_attrs();
BUG_ON(!wq_update_pod_attrs_buf);

/* initialize CPU pools */
for_each_possible_cpu(cpu) {
struct worker_pool *pool;
Expand Down Expand Up @@ -6381,11 +6382,11 @@ static void __init wq_cpu_intensive_thresh_init(void)
/**
* workqueue_init - bring workqueue subsystem fully online
*
* This is the latter half of two-staged workqueue subsystem initialization
* and invoked as soon as kthreads can be created and scheduled.
* Workqueues have been created and work items queued on them, but there
* are no kworkers executing the work items yet. Populate the worker pools
* with the initial workers and enable future kworker creations.
* This is the second step of three-staged workqueue subsystem initialization
* and invoked as soon as kthreads can be created and scheduled. Workqueues have
* been created and work items queued on them, but there are no kworkers
* executing the work items yet. Populate the worker pools with the initial
* workers and enable future kworker creations.
*/
void __init workqueue_init(void)
{
Expand All @@ -6395,26 +6396,19 @@ void __init workqueue_init(void)

wq_cpu_intensive_thresh_init();

/*
* It'd be simpler to initialize pods in workqueue_init_early() but CPU
* to node mapping may not be available that early on some archs such as
* power and arm64. As per-cpu pools created previously could be missing
* node hint and unbound pool pod affinity, fix them up.
*
* Also, while iterating workqueues, create rescuers if requested.
*/
wq_pod_init();

mutex_lock(&wq_pool_mutex);

/*
* Per-cpu pools created earlier could be missing node hint. Fix them
* up. Also, create a rescuer for workqueues that requested it.
*/
for_each_possible_cpu(cpu) {
for_each_cpu_worker_pool(pool, cpu) {
pool->node = cpu_to_node(cpu);
}
}

list_for_each_entry(wq, &workqueues, list) {
wq_update_pod(wq, smp_processor_id(), smp_processor_id(), true);
WARN(init_rescuer(wq),
"workqueue: failed to create early rescuer for %s",
wq->name);
Expand All @@ -6437,8 +6431,16 @@ void __init workqueue_init(void)
wq_watchdog_init();
}

static void __init wq_pod_init(void)
/**
* workqueue_init_topology - initialize CPU pods for unbound workqueues
*
* This is the third step of there-staged workqueue subsystem initialization and
* invoked after SMP and topology information are fully initialized. It
* initializes the unbound CPU pods accordingly.
*/
void __init workqueue_init_topology(void)
{
struct workqueue_struct *wq;
cpumask_var_t *tbl;
int node, cpu;

Expand All @@ -6452,8 +6454,7 @@ static void __init wq_pod_init(void)
}
}

wq_update_pod_attrs_buf = alloc_workqueue_attrs();
BUG_ON(!wq_update_pod_attrs_buf);
mutex_lock(&wq_pool_mutex);

/*
* We want masks of possible CPUs of each node which isn't readily
Expand All @@ -6474,6 +6475,19 @@ static void __init wq_pod_init(void)

wq_pod_cpus = tbl;
wq_pod_enabled = true;

/*
* Workqueues allocated earlier would have all CPUs sharing the default
* worker pool. Explicitly call wq_update_pod() on all workqueue and CPU
* combinations to apply per-pod sharing.
*/
list_for_each_entry(wq, &workqueues, list) {
for_each_online_cpu(cpu) {
wq_update_pod(wq, cpu, cpu, true);
}
}

mutex_unlock(&wq_pool_mutex);
}

void __warn_flushing_systemwide_wq(void)
Expand Down

0 comments on commit 2930155

Please sign in to comment.