Analysis of kernel source code based on aarch64 5: idle process (process 0)

1. Reference

linux – Process No. 0, Process No. 1, Process No. 2 – Flowing Light – Blog Park (cnblogs.com)

Linux Process No.0, Process No.1, Process No.2_Process No.0 and Process No.1-CSDN Blog

2. Creation process of idle process

start_kernel
--> arch_call_rest_init
--> rest_init
--> cpu_startup_entry
--> while(1) { do_idle(); }

After the start_kernel function completes system initialization, it will enter an infinite loop and call the do_idle function, which is equivalent to the start_kernel function which will degenerate into idle after completing the initialization. process.

3. idle process control block

The idle process control block is init_task, init_task is a global static variable defined in the init/init_task.c file middle.

/*
 * Set up the first task table, touch at your own risk!. Base=0,
 * limit=0x1fffff (=2MB)
 */
struct task_struct init_task
#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
__init_task_data
#endif
__aligned(L1_CACHE_BYTES)
= {<!-- -->
#ifdef CONFIG_THREAD_INFO_IN_TASK
.thread_info = INIT_THREAD_INFO(init_task),
.stack_refcount = REFCOUNT_INIT(1),
#endif
.__state = 0,
.stack = init_stack, // init_stack value is defined in the link script (init_stack is a static definition of the kernel stack)
.usage = REFCOUNT_INIT(2),
.flags = PF_KTHREAD,
.prio = MAX_PRIO - 20,
.static_prio = MAX_PRIO - 20,
.normal_prio = MAX_PRIO - 20,
.policy = SCHED_NORMAL,
.cpus_ptr = & amp;init_task.cpus_mask,
.user_cpus_ptr = NULL,
.cpus_mask = CPU_MASK_ALL,
.nr_cpus_allowed= NR_CPUS,
.mm = NULL,
.active_mm = & amp;init_mm,
.restart_block = {<!-- -->
.fn = do_no_restart_syscall,
},
.se = {<!-- -->
.group_node = LIST_HEAD_INIT(init_task.se.group_node),
},
.rt = {<!-- -->
.run_list = LIST_HEAD_INIT(init_task.rt.run_list),
.time_slice = RR_TIMESLICE,
},
.tasks = LIST_HEAD_INIT(init_task.tasks),
#ifdef CONFIG_SMP
.pushable_tasks = PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
#endif
#ifdef CONFIG_CGROUP_SCHED
.sched_task_group = &root_task_group,
#endif
.ptraced = LIST_HEAD_INIT(init_task.ptraced),
.ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry),
.real_parent = & amp;init_task,
.parent = &init_task,
.children = LIST_HEAD_INIT(init_task.children),
.sibling = LIST_HEAD_INIT(init_task.sibling),
.group_leader = & amp;init_task,
RCU_POINTER_INITIALIZER(real_cred, & amp;init_cred),
RCU_POINTER_INITIALIZER(cred, & amp;init_cred),
.comm = INIT_TASK_COMM, // #define INIT_TASK_COMM "swapper" - the name of process 0
.thread = INIT_THREAD,
.fs = &init_fs,
.files = &init_files,
#ifdef CONFIG_IO_URING
.io_uring = NULL,
#endif
.signal = &init_signals,
.sighand = &init_sighand,
.nsproxy = &init_nsproxy,
.pending = {<!-- -->
.list = LIST_HEAD_INIT(init_task.pending.list),
.signal = {<!-- -->{<!-- -->0}}
},
.blocked = {<!-- -->{<!-- -->0}},
.alloc_lock = __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
.journal_info = NULL,
INIT_CPU_TIMERS(init_task)
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
.timer_slack_ns = 50000, /* 50 usec default slack */
.thread_pid = & amp;init_struct_pid,
.thread_group = LIST_HEAD_INIT(init_task.thread_group),
.thread_node = LIST_HEAD_INIT(init_signals.thread_head),
#ifdef CONFIG_AUDIT
.loginuid = INVALID_UID,
.sessionid = AUDIT_SID_UNSET,
#endif
#ifdef CONFIG_PERF_EVENTS
.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
.perf_event_list = LIST_HEAD_INIT(init_task.perf_event_list),
#endif
#ifdef CONFIG_PREEMPT_RCU
.rcu_read_lock_nesting = 0,
.rcu_read_unlock_special.s = 0,
.rcu_node_entry = LIST_HEAD_INIT(init_task.rcu_node_entry),
.rcu_blocked_node = NULL,
#endif
#ifdef CONFIG_TASKS_RCU
.rcu_tasks_holdout = false,
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
.rcu_tasks_idle_cpu = -1,
#endif
#ifdef CONFIG_TASKS_TRACE_RCU
.trc_reader_nesting = 0,
.trc_reader_special.s = 0,
.trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
.trc_blkd_node = LIST_HEAD_INIT(init_task.trc_blkd_node),
#endif
#ifdef CONFIG_CPUSETS
.mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq,
& amp;init_task.alloc_lock),
#endif
#ifdef CONFIG_RT_MUTEXES
.pi_waiters = RB_ROOT_CACHED,
.pi_top_task = NULL,
#endif
INIT_PREV_CPUTIME(init_task)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
.vtime.seqcount = SEQCNT_ZERO(init_task.vtime_seqcount),
.vtime.starttime = 0,
.vtime.state = VTIME_SYS,
#endif
#ifdef CONFIG_NUMA_BALANCING
.numa_preferred_nid = NUMA_NO_NODE,
.numa_group = NULL,
.numa_faults = NULL,
#endif
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
.kasan_depth = 1,
#endif
#ifdef CONFIG_KCSAN
.kcsan_ctx = {<!-- -->
.scoped_accesses = {<!-- -->LIST_POISON1, NULL},
},
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
.softirqs_enabled = 1,
#endif
#ifdef CONFIG_LOCKDEP
.lockdep_depth = 0, /* no locks held yet */
.curr_chain_key = INITIAL_CHAIN_KEY,
.lockdep_recursion = 0,
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.ret_stack = NULL,
.tracing_graph_pause = ATOMIC_INIT(0),
#endif
#if defined(CONFIG_TRACING) & amp; & amp; defined(CONFIG_PREEMPTION)
.trace_recursion = 0,
#endif
#ifdef CONFIG_LIVEPATCH
.patch_state = KLP_UNDEFINED,
#endif
#ifdef CONFIG_SECURITY
.security = NULL,
#endif
#ifdef CONFIG_SECCOMP_FILTER
.seccomp = {<!-- --> .filter_count = ATOMIC_INIT(0) },
#endif
};
EXPORT_SYMBOL(init_task);

4. Source code analysis

1. Set process PCB No. 0

/* Code location: /arch/arm64/kernel/head.S */
/*
* Initialize CPU registers with task-specific and cpu-specific context.
*
* Create a final frame record at task_pt_regs(current)->stackframe, so
* that the unwinder can identify the final frame record of any task by
* its location in the task stack. We reserve the entire pt_regs space
* for consistency with user tasks and kthreads.
*/
.macro init_cpu_task tsk, tmp1, tmp2
msr sp_el0, \tsk

ldr \tmp1, [\tsk, #TSK_STACK]
add sp, \tmp1, #THREAD_SIZE
sub sp, sp, #PT_REGS_SIZE

stp xzr, xzr, [sp, #S_STACKFRAME]
add x29, sp, #S_STACKFRAME

scs_load_current

adr_l \tmp1, __per_cpu_offset
ldr w\tmp2, [\tsk, #TSK_TI_CPU]
ldr \tmp1, [\tmp1, \tmp2, lsl #3]
set_this_cpu_offset \tmp1
.endm

/*
 * The following fragment of code is executed with the MMU enabled.
 *
 * x0 = __pa(KERNEL_START)
 */
SYM_FUNC_START_LOCAL(__primary_switched)
adr_l x4, init_task
init_cpu_task x4, x5, x6
…
bl start_kernel
ASM_BUG()
SYM_FUNC_END(__primary_switched)

Save the init_task value to the sp_el0 register in the __primary_switched function and then call the start_kernel function. So far, init_task has been successfully set to the process control block of process No. 0, and process No. 0 has been successfully run.

2. Set the magic number at the bottom of the stack

Set the magic number at the bottom of the stack for stack overflow detection.

/* The function is defined in /init/main.c */

asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(void)
{<!-- -->
char *command_line;
char *after_dashes;

set_task_stack_end_magic( & amp;init_task);
…
}

3. Other configurations

For detailed configuration, see the Linux source code.

5. Get the current PCB

/* The function is defined in the arch/arm64/include/asm/current.h file */

/*
 * We don't use read_sysreg() as we want the compiler to cache the value where
 * possible.
 */
static __always_inline struct task_struct *get_current(void)
{<!-- -->
unsigned long sp_el0;

asm ("mrs %0, sp_el0" : "=r" (sp_el0));

return (struct task_struct *)sp_el0;
}

#define current get_current()