Driver development: the kernel implements process assembly and disassembly

In the author’s previous article "Driver Development: Kernel MDL Read and Write Process Memory" briefly introduced how to implement process read and write operations through MDL mapping. This chapter will implement the remote process disassembly function through the above case , this kind of function is also one of the most common functions in ARK tools. Usually, the implementation of this kind of function is divided into two parts. The kernel part is only responsible for reading and writing byte sets, and the application layer part cooperates with the disassembly engine to decode the byte set. , here we will use the capstone engine to achieve this function.

The first is to implement the driver part. The implementation of the driver program is immutable, just do a read and write function. The complete code is as follows;

// authorship
// right to sign one's name on a piece of work
// PowerBy: LyShark
// Email: [email protected]
#include <ntifs.h>
#include <windef.h>

#define READ_PROCESS_CODE CTL_CODE(FILE_DEVICE_UNKNOWN,0x800,METHOD_BUFFERED,FILE_ALL_ACCESS)
#define WRITE_PROCESS_CODE CTL_CODE(FILE_DEVICE_UNKNOWN,0x801,METHOD_BUFFERED,FILE_ALL_ACCESS)

#define DEVICENAME L"\Device\ReadWriteDevice"
#define SYMBOLNAME L"\\ReadWriteSymbolName"

typedef struct
{<!-- -->
DWORD pid; // process PID
UINT64 address; // Read and write address
DWORD size; // read and write length
BYTE* data; // read and write data set
}ProcessData;

// MDL read package
BOOLEAN ReadProcessMemory(ProcessData* ProcessData)
{<!-- -->
BOOLEAN bRet = TRUE;
PEPROCESS process = NULL;

// Convert PID to EProcess
PsLookupProcessByProcessId(ProcessData->pid, & amp;process);
if (process == NULL)
{<!-- -->
return FALSE;
}

BYTE* GetProcessData = NULL;
__try
{<!-- -->
// Allocate heap space NonPagedPool non-paged memory
GetProcessData = ExAllocatePool(NonPagedPool, ProcessData->size);
}
__except (1)
{<!-- -->
return FALSE;
}

KAPC_STATE stack = {<!-- --> 0 };
// attach to process
KeStackAttachProcess(process, &stack);

__try
{<!-- -->
// Check if process memory is readable
ProbeForRead(ProcessData->address, ProcessData->size, 1);

// complete copy
RtlCopyMemory(GetProcessData, ProcessData->address, ProcessData->size);
}
__except (1)
{<!-- -->
bRet = FALSE;
}

// close reference
ObDereferenceObject(process);

// unattach
KeUnstackDetachProcess( & amp;stack);

// copy data
RtlCopyMemory(ProcessData->data, GetProcessData, ProcessData->size);

// free the heap
ExFreePool(GetProcessData);
return bRet;
}

// MDL write package
BOOLEAN WriteProcessMemory(ProcessData* ProcessData)
{<!-- -->
BOOLEAN bRet = TRUE;
PEPROCESS process = NULL;

// Convert PID to EProcess
PsLookupProcessByProcessId(ProcessData->pid, & amp;process);
if (process == NULL)
{<!-- -->
return FALSE;
}

BYTE* GetProcessData = NULL;
__try
{<!-- -->
// allocate heap
GetProcessData = ExAllocatePool(NonPagedPool, ProcessData->size);
}
__except (1)
{<!-- -->
return FALSE;
}

// loop to write
for (int i = 0; i < ProcessData->size; i ++ )
{<!-- -->
GetProcessData[i] = ProcessData->data[i];
}

KAPC_STATE stack = {<!-- --> 0 };

// attach process
KeStackAttachProcess(process, &stack);

// allocate MDL object
PMDL mdl = IoAllocateMdl(ProcessData->address, ProcessData->size, 0, 0, NULL);
if (mdl == NULL)
{<!-- -->
return FALSE;
}

MmBuildMdlForNonPagedPool(mdl);

BYTE* ChangeProcessData = NULL;

__try
{<!-- -->
// lock address
ChangeProcessData = MmMapLockedPages(mdl, KernelMode);

// start copying
RtlCopyMemory(ChangeProcessData, GetProcessData, ProcessData->size);
}
__except (1)
{<!-- -->
bRet = FALSE;
goto END;
}

// end release MDL close reference unattach
END:
IoFreeMdl(mdl);
ExFreePool(GetProcessData);
KeUnstackDetachProcess( & amp;stack);
ObDereferenceObject(process);

return bRet;
}

NTSTATUS DriverIrpCtl(PDEVICE_OBJECT device, PIRP pirp)
{<!-- -->
PIO_STACK_LOCATION stack;
stack = IoGetCurrentIrpStackLocation(pirp);
ProcessData* ProcessData;

switch (stack->MajorFunction)
{<!-- -->

case IRP_MJ_CREATE:
{<!-- -->
break;
}

case IRP_MJ_CLOSE:
{<!-- -->
break;
}

case IRP_MJ_DEVICE_CONTROL:
{<!-- -->
// Get the application layer pass value
ProcessData = pirp->AssociatedIrp.SystemBuffer;

DbgPrint("Process ID: %d | Read and write address: %p | Read and write length: %d \
", ProcessData->pid, ProcessData->address, ProcessData->size);

switch (stack->Parameters.DeviceIoControl.IoControlCode)
{<!-- -->
// read function
case READ_PROCESS_CODE:
{<!-- -->
ReadProcessMemory(ProcessData);
break;
}
// write function
case WRITE_PROCESS_CODE:
{<!-- -->
WriteProcessMemory(ProcessData);
break;
}

}

pirp->IoStatus.Information = sizeof(ProcessData);
break;
}

}

pirp->IoStatus.Status = STATUS_SUCCESS;
IoCompleteRequest(pirp, IO_NO_INCREMENT);
return STATUS_SUCCESS;
}

VOID UnDriver(PDRIVER_OBJECT driver)
{<!-- -->
if (driver->DeviceObject)
{<!-- -->
UNICODE_STRING SymbolName;
RtlInitUnicodeString( &SymbolName, SYMBOLNAME);

// remove symbolic link
IoDeleteSymbolicLink( &SymbolName);
IoDeleteDevice(driver->DeviceObject);
}
}

NTSTATUS DriverEntry(IN PDRIVER_OBJECT Driver, PUNICODE_STRING RegistryPath)
{<!-- -->
NTSTATUS status = STATUS_SUCCESS;
PDEVICE_OBJECT device = NULL;
UNICODE_STRING DeviceName;

DbgPrint("[LyShark] hello lyshark.com\
");

// Initialize the device name
RtlInitUnicodeString( & amp;DeviceName, DEVICENAME);

// create device
status = IoCreateDevice(Driver, sizeof(Driver->DriverExtension), &DeviceName, FILE_DEVICE_UNKNOWN, FILE_DEVICE_SECURE_OPEN, FALSE, &device);
if (status == STATUS_SUCCESS)
{<!-- -->
UNICODE_STRING SymbolName;
RtlInitUnicodeString( &SymbolName, SYMBOLNAME);

// create symbolic link
status = IoCreateSymbolicLink( &SymbolName, &DeviceName);

// Fail to delete the device
if (status != STATUS_SUCCESS)
{<!-- -->
IoDeleteDevice(device);
}
}

// dispatch function initialization
Driver->MajorFunction[IRP_MJ_CREATE] = DriverIrpCtl;
Driver->MajorFunction[IRP_MJ_CLOSE] = DriverIrpCtl;
Driver->MajorFunction[IRP_MJ_DEVICE_CONTROL] = DriverIrpCtl;

// Uninstall the driver
Driver->DriverUnload = UnDriver;

return STATUS_SUCCESS;
}

The driver program above is very simple and the key parts have been remarked. It is not difficult to change this kind of driver. The next step is the focus of this lesson. Let us start to understand the Capstone A disassembly engine, Capstone is a lightweight multi-platform, multi-architecture disassembly framework. Capstone aims to be the ultimate disassembly engine for binary analysis and disassembly in the security community. This engine supports disassembly for multiple platforms and is highly recommended.

  • Disassembly engine download address: https://cdn.lyshark.com/sdk/capstone_msvc12.zip

If you want to use this disassembly engine, the first step is to call cs_open(). The official explanation is to open a handle. The parameters of this open function are as follows;

  • Parameter 1: The specified mode CS_ARCH_X86 represents the Windows platform
  • Parameter 2: Execution bits CS_MODE_32 is 32-bit mode, CS_MODE_64 is 64-bit
  • Parameter 3: The handle saved after opening & dasm_handle

The second and most important step is to call the cs_disasm() disassembly function. The explanation of this function is as follows;

  • Parameter 1: Specify the dasm_handle disassembly handle
  • Parameter 2: Specify the dataset you want to disassemble or a buffer
  • Parameter 3: Specify the length you want to disassemble 64
  • Parameter 4: The starting position of the output memory address 0x401000
  • Parameter 5: The default padding is 0
  • Parameter 6: A pointer for output data

If these two functions can be understood, then the following disassembly of the complete code can be understood.

#define _CRT_SECURE_NO_WARNINGS
#include <Windows.h>
#include <iostream>
#include <inttypes.h>
#include <capstone/capstone.h>

#pragma comment(lib,"capstone64.lib")

#define READ_PROCESS_CODE CTL_CODE(FILE_DEVICE_UNKNOWN,0x800,METHOD_BUFFERED,FILE_ALL_ACCESS)
#define WRITE_PROCESS_CODE CTL_CODE(FILE_DEVICE_UNKNOWN,0x801,METHOD_BUFFERED,FILE_ALL_ACCESS)

typedef struct
{<!-- -->
DWORD pid;
UINT64 address;
DWORD size;
BYTE* data;
}ProcessData;

int main(int argc, char* argv[])
{<!-- -->
// connect to the driver
HANDLE handle = CreateFileA("\\ReadWriteSymbolName", GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);

ProcessData data;
DWORD dwSize = 0;

// Specify the process that needs to read and write
data.pid = 6932;
data.address = 0x401000;
data.size = 64;

// Read machine code to BYTE byte array
data.data = new BYTE[data.size];
DeviceIoControl(handle, READ_PROCESS_CODE, &data, sizeof(data), &data, sizeof(data), &dwSize, NULL);
for (int i = 0; i < data.size; i ++ )
{<!-- -->
printf("0x X ", data. data[i]);
}

printf("\
");

// start disassembly
csh dasm_handle;
cs_insn *insn;
size_t count;

// open the handle
if (cs_open(CS_ARCH_X86, CS_MODE_32, & dasm_handle) != CS_ERR_OK)
{<!-- -->
return 0;
}

// Disassembly code
count = cs_disasm(dasm_handle, (unsigned char *)data.data, data.size, data.address, 0, &insn);

if (count > 0)
{<!-- -->
size_t index;
for (index = 0; index < count; index ++ )
{<!-- -->
/*
for (int x = 0; x < insn[index].size; x ++ )
{
printf("Machine code: %d -> X \
", x, insn[index].bytes[x]);
}
*/

printf("Address: 0x%"PRIx64" | Length: %d Disassembly: %s %s \
", insn[index].address, insn[index].size, insn[index].mnemonic, insn[index ].op_str);
}
cs_free(insn, count);
}
cs_close( & dasm_handle);

getchar();
CloseHandle(handle);
return 0;
}

Load WinDDK.sys through the driver loading tool and then run this program, you will see the correct output result, which is 64 bytes down from the current position of the disassembly.

After talking about disassembly, we need to explain how to assemble the memory. The assembly engine uses XEDParse. The engine is small and concise. The famous x64dbg is using this engine to perform Assembly replacement, the use of this engine is very simple, you only need to pass a standard structure to the XEDParseAssemble() function to complete the conversion, the complete code is as follows.

  • Assembly engine download address: https://cdn.lyshark.com/sdk/XEDParse.zip
#define _CRT_SECURE_NO_WARNINGS
#include <Windows.h>
#include <iostream>

extern "C"
{<!-- -->
#include "D:/XEDParse/XEDParse.h"
#pragma comment(lib, "D:/XEDParse/XEDParse_x64.lib")
}

using namespace std;

#define READ_PROCESS_CODE CTL_CODE(FILE_DEVICE_UNKNOWN,0x800,METHOD_BUFFERED,FILE_ALL_ACCESS)
#define WRITE_PROCESS_CODE CTL_CODE(FILE_DEVICE_UNKNOWN,0x801,METHOD_BUFFERED,FILE_ALL_ACCESS)

typedef struct
{<!-- -->
DWORD pid;
UINT64 address;
DWORD size;
BYTE* data;
}ProcessData;

int main(int argc, char* argv[])
{<!-- -->
// connect to the driver
HANDLE handle = CreateFileA("\\ReadWriteSymbolName", GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);

ProcessData data;
DWORD dwSize = 0;

// Specify the process that needs to read and write
data.pid = 6932;
data.address = 0x401000;
data.size = 0;

XEDPARSE xed = {<!-- --> 0 };
xed.x64 = FALSE;

// Enter an assembly instruction and convert
scanf_s("%llx", &xed.cip);
gets_s(xed.instr, XEDPARSE_MAXBUFSIZE);
if (XEDPARSE_OK != XEDParseAssemble( & xed))
{<!-- -->
printf("Command error: %s\
", xed.error);
}

// generate heap
data.data = new BYTE[xed.dest_size];

// set the length
data.size = xed.dest_size;

for (size_t i = 0; i < xed.dest_size; i ++ )
{<!-- -->
// replace in the heap
printf(" X ", xed. dest[i]);
data.data[i] = xed.dest[i];
}

// Call the controller and write to the remote memory
DeviceIoControl(handle, WRITE_PROCESS_CODE, &data, sizeof(data), &data, sizeof(data), &dwSize, NULL);

printf("[LyShark] instruction set has been replaced. \
");
getchar();
CloseHandle(handle);
return 0;
}

Load WinDDK.sys through the driver loading tool and then run this program, you will see the correct output, you can open the anti-kernel tool to verify whether the rewriting is successful.

Open the anti-kernel tool, and switch to observe whether an instruction set machine code of mov eax,1 has been written, as shown in the figure below, it has been perfectly written.