Understand the process deeply by printing information in the process control block

1. You can see process-related information through the top command.

Execute the top command in the terminal:

Explanation of the meaning of the above parameters:

Parameters	Meaning
10:13:26	Indicates the current time
up 3 min	System running time
1 users	Number of currently logged in users
load average: 0.73, 1.00, 0.47	System load, that is, the average length of the task queue. The three values are the average values from 1 minute, 5 minutes, and 15 minutes ago to now.
total	Total number of processes
running	Running processes Count
sleeping	Number of sleeping processes
stopped	Stop Number of processes
zombie	Number of zombie processes
%CPU	Percentage of CPU occupied
us	Percentage of CPU occupied by user space
sy	The percentage of CPU occupied by the kernel space
ni	The percentage of CPU occupied by processes that have changed priorities in the user process space
id	Idle CPU percentage
wa	Percentage of CPU time waiting for input and output
hi	Percentage of CPU occupied by hard interrupts
si	Soft interrupts Percentage of occupied CPU
st	Used when there is a virtual CPU to indicate the CPU time stolen by the virtual machine
KiB Mem	Total physical memory
used	The total amount of physical memory used
free	Total amount of free memory
buff/cache	Amount of memory used as kernel cache
KiB Swap	Total amount of swap area
avail Mem	Represents the amount of physical memory available for the next allocation of the process
PID	Process id
USER	Username of the process owner
PR	Priority
NI	Negative value of nice value indicates high priority, positive value indicates low priority
VIRT	The total amount of virtual memory used by the process, in kb. VIRT=SWAP + RES
RES	The size of the physical memory used by the process that has not been swapped out, in kb. RES=CODE + DATA
SHR	Shared memory size, unit kb
S	Process status. D=Uninterruptible sleep state R=Run S=Sleep T=Track/Stop Z=Zombie process
%CPU	Last updated to now Percentage of CPU time occupied
%MEM	Percentage of physical memory used by the process
TIME +	Total CPU time used by the process, unit 1/100 second
COMMAND	Command name/command line

2. By printing the fields in task_struct, you can see more information. Please print at least 10 fields of information, take screenshots, and provide the relevant source code to view the source code to show your deep understanding of the process control block. know.

(1) Code execution

task_struct.c code:

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h> //task structure
#include <linux/fdtable.h> //files
#include <linux/init_task.h>
#include <linux/fs_struct.h>

MODULE_LICENSE("GPL"); //License
 
//entry function
static int __init print_pcb(void) //init macro is supported by init.h file
{
        struct task_struct *task,*p;
        struct list_head *pos; //Doubly linked list
        int count=0; //Count how many processes there are in the current system
 
        printk("begin...\\
");
 
        //When traversing the linked list, we hope to start from the first one
        task= & amp;init_task; //Point to process pcb No. 0
 
        list_for_each(pos, & amp;task->tasks) //Traverse operation, use pos to point to, the incoming parameter task points to the tasks field. Tasks of process No. 0 for traversal. Tasks link all processes together.
        {
                p=list_entry(pos,struct task_struct,tasks); //If you find a node, you can use the tasks field of this node to find the address of this structure. The corresponding field tasks
                //At this time, the p pointer already points to the head of the task_struct structure, and can be operated through the p pointer later.
                count + + ; //Find a process and add it
                printk("\\
\\
");
                printk("pid: %d; state: %d; flags:%d;ptrace:%d;prior: %d; static_pri: %d;normal_prio:%d; parent_pid: %d; count: %d; umask : %d",p->pid,p->state,p->flags,p->ptrace,p->prio,p->static_prio,p->normal_prio,(p->parent)->pid ,atomic_read( & amp;(p->files)->count),(p->fs)->umask);
        }
 
        printk("Number of processes:%d\\
",count);
 
        return 0;
}
 
static void __exit exit_pcb(void) //Exit function
{
        printk("Exiting...\\
");
}
 
//Specify the entry point and exit point. The entry/exit point is supported by module.h
module_init(print_pcb);
module_exit(exit_pcb);

Makefile code:

obj-m:= task_struct.o

CURRENT_PATH:=$(shell pwd) #The current path where the module is located
LINUX_KERNEL:=$(shell uname -r) #The current version of the linux kernel code
LINUX_KERNEL_PATH:=/usr/src/linux-headers-$(LINUX_KERNEL) #Current version source code path of linux kernel

all:
make -C $(LINUX_KERNEL_PATH) M=$(CURRENT_PATH) modules #Compile modules
# The path of the kernel. Where to put the current directory after compilation. It indicates that the kernel module is compiled.

clean:
make -C $(LINUX_KERNEL_PATH) M=$(CURRENT_PATH) clean #Clean module

Run results:

(2) Source code learning

struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
* For reasons of header soup (see current_thread_info()), this
* must be the first element of task_struct.
*/
struct thread_info thread_info; //thread_info is used to save fields that require frequent and quick access
#endif
/* -1 unrunnable, 0 runnable, >0 stopped: */
volatile long state; //state is its status information

/*
* This begins the randomizable portion of task_struct. Only
* scheduling-critical items should be added above here.
*/
randomized_struct_fields_start

void *stack; //*stack is the kernel stack
refcount_t usage;
/* Per task flags (PF_*), defined further below: */
unsigned int flags; //flags is the flag of the process
unsigned int ptrace; //ptrace field is used to implement breakpoint debugging
\t
...

#endif
int on_rq;

int prio; //pio is a dynamic priority
int static_prio; //static_prio is the static priority
int normal_prio; //normal_prio is the normal priority
unsigned int rt_priority; //rt_priority is the real-time priority
\t
...

unsigned int policy; //policy is the process scheduling policy

...

struct sched_info sched_info; //The scheduler counts the running information of the process sched_info, which is a structure

struct list_head tasks; /*tasks field, this is a doubly linked list. It is the field that connects all processes together, so that we can traverse the processes*/
\t
...

pid_t pid; //process identifier
pid_t tgid; //thread group id

...

/* Real parent process: */
struct task_struct __rcu *real_parent; //Points to the parent process that created it, if its parent process does not exist, points to the init process

/* Recipient of SIGCHLD, wait4() reports: */
struct task_struct __rcu *parent; //Points to the current parent process, usually consistent with real_parent

/*
* Children/sibling form the list of natural children:
*/
struct list_head children; //child process linked list
struct list_head sibling; //Sibling process linked list
struct task_struct *group_leader; //Thread group leader thread pointer

...

unsigned long nvcsw; //nvcsw reflects the number of active context switches
unsigned long nivcsw; //nivcsw reflects the number of passive context switches

...

struct fs_struct *fs; //Current directory

/* Open file information: */
struct files_struct *files; //Points to the file descriptor, all open files of the process will be in a pointer array inside

...

/* Signal handlers: */
struct signal_struct *signal; /*Signal descriptor, used to track the shared pending signal queue, is shared by all processes belonging to the same thread group, that is, the threads of the same thread group point to the same signal descriptor*/
struct sighand_struct *sighand; //Signal processing function descriptor
sigset_t blocked; //Blocked signal mask
sigset_t real_blocked; //Temporary mask for blocked signals
/* Restored if set_restore_sigmask() was used: */
sigset_t saved_sigmask; //Private pending signal queue
\t
...
\t
};

? pid is the process identifier
? tgid, because the Linux kernel supports the kernel as a thread, all threads in each process form a thread group, and the thread group needs a leader, then this leader is the pid of the process. **A process is a thread group, so all threads of each process have the same tgid. **When the program starts running, there is only one main thread, and the tgid of this main thread is equal to pid. When other threads are created, they inherit the tgid of the main thread. In this way, the kernel can use tgid to know which thread group a certain task belongs to, and also which process it belongs to. When we use the ps command or interfaces such as getpid() to query the process ID, the kernel returns us exactly this tgid.

3. Understanding of process control blocks

New understanding of process control block:

? 1. The process control block can be described by a structure task_struct in the kernel.

? 2. The system perceives the existence of the process based on the PCB of the process. The PCB is the only sign of the existence and operation of the process.

? 3. PCB is a data structure that is frequently read and written in the kernel, so it should be resident in memory.

My knowledge:

? The process control block is a very important part of a process. If the process is compared to a person, then the process control block is the heart of the person, controlling various organs of the person.

4. Print relevant information of a specific process

Method 1: Use the method of passing parameters to the kernel module

Additional knowledge points:

module_param(name, type, perm)

Function: Kernel module parameter passing

parameter:

@name variable name/parameter name

@type parameter data type, short, ushort (unsigned short integer), int, uint, charp (character pointer)

@perm permissions. Generally, we do not need to pass parameters after the module is executed, so the perm permissions are generally set to 0.

Modify the above task_struct.c code as follows:

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h> //task structure
#include <linux/fdtable.h> //files
#include <linux/init_task.h>
#include <linux/fs_struct.h>

MODULE_LICENSE("GPL"); //License

int my=0;
module_param(my, int, 0);

//entry function
static int __init print_pcb(void) //init macro is supported by init.h file
{
        struct task_struct *task,*p;
        struct list_head *pos; //Doubly linked list
 
        printk("begin...\\
");
 
        //When traversing the linked list, we hope to start from the first one
        task= & amp;init_task; //Point to process pcb No. 0
 
        list_for_each(pos, & amp;task->tasks) //Traverse operation, use pos to point to, the incoming parameter task points to the tasks field. Tasks of process No. 0 for traversal. Tasks link all processes together.
        {
            p=list_entry(pos,struct task_struct,tasks); //If you find a node, you can use the tasks field of this node to find the address of this structure. The corresponding field tasks
if(p->pid==my)
{
                printk("pid: %d; state: %d; flags:%d;ptrace:%d;prior: %d; static_pri: %d;normal_prio:%d; parent_pid: %d; count: %d; umask : %d\\
",p->pid,p->state,p->flags,p->ptrace,p->prio,p->static_prio,p->normal_prio,(p->parent) ->pid,atomic_read( & amp;(p->files)->count),(p->fs)->umask);
            }
        }
 
        return 0;
}
 
static void __exit exit_pcb(void) //Exit function
{
        printk("Exiting...\\
");
}
 
//Specify the entry point and exit point. The entry/exit point is supported by module.h
module_init(print_pcb);
module_exit(exit_pcb);

Execute the following instructions in sequence:

make

//When loading, if a parameter is passed, the variable value is the passed value, otherwise it is the default initialization value

sudo insmod task_struct.ko my=1 //Here, take the process with pid as 1 as an example

dmesg

Method 2: Specify the printing process directly in the code

Modify the above task_struct.c code as follows:

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h> //task structure
#include <linux/fdtable.h> //files
#include <linux/init_task.h>
#include <linux/fs_struct.h>

MODULE_LICENSE("GPL"); //License

int my=?; //Used to specify the pid of the printing process. Note: Replace ? with pid here.

//entry function
static int __init print_pcb(void) //init macro is supported by init.h file
{
        struct task_struct *task,*p;
        struct list_head *pos; //Doubly linked list
 
        printk("begin...\\
");
 
        //When traversing the linked list, we hope to start from the first one
        task= & amp;init_task; //Point to process pcb No. 0
 
        list_for_each(pos, & amp;task->tasks) //Traverse operation, use pos to point to, the incoming parameter task points to the tasks field. Tasks of process No. 0 for traversal. Tasks link all processes together.
        {
            p=list_entry(pos,struct task_struct,tasks); //If you find a node, you can use the tasks field of this node to find the address of this structure. The corresponding field tasks
if(p->pid==my)
{
                printk("pid: %d; state: %d; flags:%d;ptrace:%d;prior: %d; static_pri: %d;normal_prio:%d; parent_pid: %d; count: %d; umask : %d\\
",p->pid,p->state,p->flags,p->ptrace,p->prio,p->static_prio,p->normal_prio,(p->parent) ->pid,atomic_read( & amp;(p->files)->count),(p->fs)->umask);
            }
        }
 
        return 0;
}
 
static void __exit exit_pcb(void) //Exit function
{
        printk("Exiting...\\
");
}
 
//Specify the entry point and exit point. The entry/exit point is supported by module.h
module_init(print_pcb);
module_exit(exit_pcb);

**Doubt:** No matter which of the above methods is used, the process information with pid 1 will be output only after the module is uninstalled.

After inserting the module dmesg:

dmesg after uninstalling the module:

**Solved:** \\
is not added to the printk statement. After adding it, it can print normally (the above code has been modified). It feels that the output content (in the buffer) is through \\
(that is, carriage return ) is written into the log file, and each output content is recorded as one line in the log file.