Question number 5 on page 35 of Practical Reverse Engineering is as follows:
Decompile the following kernel routines in Windows:
- KeInitializeDpc
- KeInitializeApc
- ObFastDereferenceObject (explain its calling convention)
- KeInitializeQueue
- KxWaitForLockChainValid
- KeReadyThread
- KiInitializeTSS
- RtlValidateUnicodeString
KeInitializeDpc
Inside ntoskrnl.exe, KeInitializeDpc has the following prototype:
VOID NTAPI KeInitializeDpc(
PRKDPC Dpc,
PKDEFERRED_ROUTINE DeferredRoutine,
PVOID DeferredContext);
This has a parameter for the KDPC struct, which contains a LIST_ENTRY. These are defined as:
typedef struct _LIST_ENTRY {
struct _LIST_ENTRY *Flink; /* 0x0 */
struct _LIST_ENTRY *Blink; /* 0x8 */
} LIST_ENTRY, *PLIST_ENTRY;
typedef struct _KDPC
{
UCHAR Type; /* 0x0 */
UCHAR Importance; /* 0x1 */
WORD Number; /* 0x2 */
BYTE Unknown[4]; /* 0x4 */
LIST_ENTRY DpcListEntry; /* 0x8 */
PVOID DeferredRoutine; /* 0x18 */
PVOID DeferredContext; /* 0x20 */
PVOID SystemArgument1; /* 0x28 */
PVOID SystemArgument2; /* 0x30 */
PVOID DpcData; /* 0x38 */
} KDPC, *PKDPC;
Here is the disassembly:
KeInitializeDpc:
xor eax, eax
mov dword ptr [rcx], 113h
mov [rcx+18h], rdx
mov [rcx+38h], rax
mov [rcx+10h], rax
mov [rcx+20h], r8
retn
The first MOV is an optimization which sets the first 3 variables in the struct, as it sets a dword to 0x113 (0b100010011). Everything else lines up easily enough. Here is the fully decompiled function.
VOID NTAPI KeInitializeDpc(
PRKDPC Dpc,
PKDEFERRED_ROUTINE DeferredRoutine,
PVOID DeferredContext)
{
Dpc->Type = 19; /* mov dword ptr [rcx],113h */
Dpc->Importance = 1;
Dpc->Number = 0;
Dpc->DeferredRoutine = DeferredRoutine; /* mov [rcx+18h], rdx */
Dpc->DpcData = 0; /* mov [rcx+38h], rax */
Dpc->DpcListEntry.Blink = 0; /* mov [rcx+10h], rax */
Dpc->DeferredContext = DeferredContext; /* mov [rcx+20h], r8 */
}
KeInitializeApc
Inside ntoskrnl.exe, KeInitializeApc has the following prototype:
VOID NTAPI KeInitializeApc(
_In_ PKAPC Apc,
_In_ PKTHREAD Thread,
_In_ KAPC_ENVIRONMENT TargetEnvironment,
_In_ PKKERNEL_ROUT_In_E KernelRoutine,
_In_Opt_ PKRUNDOWN_ROUT_In_E RundownRoutine ,
_In_ PKNORMAL_ROUT_In_E NormalRoutine,
_In_ KPROCESSOR_MODE Mode,
_In_ PVOID Context);
Here is the KAPC struct with added offsets:
typedef struct _KAPC
{
UCHAR Type; /* 0x0 */
UCHAR SpareByte0; /* 0x1 */
UCHAR Size; /* 0x2 */
UCHAR SpareByte1; /* 0x3 */
ULONG SpareLong0; /* 0x4 */
PKTHREAD Thread; /* 0x8 */
LIST_ENTRY ApcListEntry; /* 0x10 */
PVOID KernelRoutine; /* 0x20 */
PVOID RundownRoutine; /* 0x28 */
PVOID NormalRoutine; /* 0x30 */
PVOID NormalContext; /* 0x38 */
PVOID SystemArgument1; /* 0x40 */
PVOID SystemArgument2; /* 0x48 */
CHAR ApcStateIndex; /* 0x50 */
CHAR ApcMode; /* 0x51 */
UCHAR Inserted; /* 0x52 */
} KAPC, *PKAPC;
And here is the disassembly:
KeInitializeApc:
mov byte ptr [rcx], 12h
mov byte ptr [rcx+2], 58h
cmp r8d, 2
jz short loc_1400BAAAF
mov [rcx+50h], r8b
loc_1400BAA71:
mov rax, [rsp+28h]
mov [rcx+8], rdx
xor edx, edx
mov [rcx+28h], rax
mov rax, [rsp+30h]
mov [rcx+20h], r9
mov [rcx+30h], rax
test rax, rax
jnz short loc_1400BAA9D
mov [rcx+51h], dl
mov [rcx+38h], rdx
loc_1400BAA99:
mov [rcx+52h], dl
retn
loc_1400BAA9D:
mov al, [rsp+38h]
mov [rcx+51h], al
mov rax, [rsp+40h]
mov [rcx+38h], rax
jmp short loc_1400BAA99
loc_1400BAAAF:
mov al, [rdx+242h]
mov [rcx+50h], al
jmp short loc_1400BAA71
This routine contains a couple if statements, but otherwise it's just writing the arguments and some constants to the struct.
VOID NTAPI KeInitializeApc(
_In_ PKAPC Apc,
_In_ PKTHREAD Thread,
_In_ KAPC_ENVIRONMENT TargetEnvironment,
_In_ PKKERNEL_ROUT_In_E KernelRoutine,
_In_Opt_ PKRUNDOWN_ROUT_In_E RundownRoutine ,
_In_ PKNORMAL_ROUT_In_E NormalRoutine,
_In_ KPROCESSOR_MODE Mode,
_In_ PVOID Context)
{
Apc->Type = 0x12; /* mov byte ptr [rcx], 12h */
Apc->Size = 0x58; /* mov byte ptr [rcx+2], 58h */
/* cmp r8d, 2 */
if ((DWORD)TargetEnvironment == CurrentApcEnvironment)
Apc->ApcStateIndex = Thread->ApcStateIndex;/* mov [rcx+50h], al */
else
Apc->ApcStateIndex = TargetEnvironment; /* mov [rcx+50h], r8b */
Apc->Thread = Thread; /* mov [rcx+8], rdx */
Apc->RundownRoutine = RundownRoutine; /* mov [rcx+28h], rax */
Apc->KernelRoutine = KernelRoutine; /* mov [rcx+20h], r9 */
Apc->NormalRoutine = NormalRoutine; /* mov [rcx+30h], rax */
/* test rax, rax */
if (NormalRoutine != 0)
{
Apc->ApcMode = Mode; /* mov [rcx+51h], al */
Apc->NormalContext = Context; /* mov [rcx+38h], rax */
}
else
{
Apc->ApcMode = 0; /* mov [rcx+51h], dl */
Apc->NormalContext = 0; /* mov [rcx+38h], rdx */
}
Apc->Inserted = 0; /* mov [rcx+52h], dl */
}
ObFastDereferenceObject
Inside ntoskrnl.exe, ObFastDereferenceObject has the following prototype:
void __fastcall ObFastDereferenceObject(
_In_ PEX_FAST_REF FastRef,
_In_ PVOID Object
)
Here is the struct that is passed in the first argument:
typedef struct _EX_FAST_REF
{
union
{
PVOID Object;
ULONG RefCnt: 4;
UINT64 RefCnt;
};
} EX_FAST_REF, *PEX_FAST_REF;
Here is the disassembly, which shows that there are fastcall optimizations on the 1st parameter for certain processors:
ObFastDereferenceObject:
mov r9, rcx
prefetchw byte ptr [rcx]
mov rax, [rcx]
mov r8, rax
xor r8, rdx
cmp r8, 0Fh
jnb short loc_140062C29
loc_140062C1D:
lea r8, [rax+1]
lock cmpxchg [r9], r8
jnz short loc_140062C31
retn
loc_140062C29:
mov rcx, rdx
jmp ObfDereferenceObject
loc_140062C31:
mov rcx, rax
xor rcx, rdx
cmp rcx, 0Fh
jb short loc_140062C1D
jmp short loc_140062C29
The function is one big loop that increments the FastRef->Object pointer. There is also a precondition test. If the loop fails, another function is called.
void __fastcall ObFastDereferenceObject(
_In_ PEX_FAST_REF FastRef,
_In_ PVOID Object
)
{
for ( EX_FAST_REF a = *FastRef, /* mov rax, [rcx] */
b = *FastRef; /* mov r8, rax */
*b->Object ^ Object /* xor r8, rdx */
<= 0x0F; /* cmp rcx, 0Fh */
b->Object = *(a->Object) + 1 /* lea r8, [rax+1] */
)
{
/* lock cmpxchg [r9], r8 */
if (atomic_compare_exchange_strong(FastRef, &a, b));
return;
}
/* mov rcx, rdx */
ObfDereferenceObject(Object); /* jmp ObfDereferenceObject */
}
KeInitializeQueue
Inside ntoskrnl.exe, KeInitializeQueue has the following prototype:
VOID NTAPI KeInitializeQueue(
_Out_ PRKQUEUE Queue,
_In_ ULONG Count);
Here are the relevant structs which make up our Queue parameter:
typedef struct _DISPATCHER_HEADER
{
union
{
struct
{
UCHAR Type;
union
{
UCHAR Abandoned;
UCHAR Absolute;
UCHAR NpxIrql;
UCHAR Signalling;
};
union
{
UCHAR Size;
UCHAR Hand;
};
union
{
UCHAR Inserted;
UCHAR DebugActive;
UCHAR DpcActive;
};
};
LONG Lock;
};
LONG SignalState;
LIST_ENTRY WaitListHead;
} DISPATCHER_HEADER, *PDISPATCHER_HEADER;
typedef struct _KQUEUE {
DISPATCHER_HEADER Header; /* 0x0 */
LIST_ENTRY EntryListHead; /* 0x18 */
ULONG CurrentCount; /* 0x28 */
ULONG MaximumCount; /* 0x2c */
LIST_ENTRY ThreadListHead; /* 0x30 */
} KQUEUE, *PKQUEUE, *RESTRICTED_POINTER PRKQUEUE;
The disassembly for the function is:
KeInitializeQueue:
mov word ptr [rcx], 4
mov byte ptr [rcx+2], 10h
lea rax, [rcx+8]
xor r8d, r8d
mov [rcx+4], r8d
mov [rax+8], rax
mov [rax], rax
lea rax, [rcx+18h]
mov [rax+8], rax
mov [rax], rax
lea rax, [rcx+30h]
mov [rax+8], rax
mov [rax], rax
mov [rcx+28h], r8d
test edx, edx
jz short loc_1400DF8A9
mov [rcx+2Ch], edx
retn
loc_1400DF8A9:
mov eax, cs:KeNumberProcessors_0
mov [rcx+2Ch], eax
retn
This function is again just basically filling in a struct with some constants.
VOID NTAPI KeInitializeQueue(
_Out_ PRKQUEUE Queue,
_In_ ULONG Count)
{
Queue->Header.Type = 4; /* mov word ptr [rcx], 4 */
Queue->Header.Abandoned = FALSE;
Queue->Header.Size = 0x10; /* mov byte ptr [rcx+2], 10h */
Queue->Header.SignalState = 0; /* mov [rcx+4], r8d */
/* lea rax, [rcx+8] */
Queue->Header.WaitListHead->Blink = &Queue->Header.WaitListHead;
Queue->Header.WaitListHead->Flink = &Queue->Header.WaitListHead;
/* lea rax, [rcx+18h] */
Queue->EntryListHead->Blink = &Queue->EntryListHead;
Queue->EntryListHead->Flink = &Queue->EntryListHead;
/* lea rax, [rcx+30h] */
Queue->ThreadListHead->Blink = &Queue->ThreadListHead;
Queue->ThreadListHead->Flink = &Queue->ThreadListHead;
Queue.CurrentCount = 0;
/* test edx, edx */
if (Count == 0)
Queue->MaximumCount = KeNumberProcessors; /* cs:_0 */
else
Queue->MaximumCount = Count; /* mov [rcx+2Ch], edx */
}
KxWaitForLockChainValid
Inside ntoskrnl.exe, KxWaitForLockChainValid has the following prototype:
PKSPIN_LOCK_QUEUE KxWaitForLockChainValid(
__inout PKSPIN_LOCK_QUEUE LockQueue);
Here is the definition for the struct parameter:
typedef struct _KSPIN_LOCK_QUEUE
{
struct _KSPIN_LOCK_QUEUE * volatile Next;
PKSPIN_LOCK volatile Lock;
} KSPIN_LOCK_QUEUE, *PKSPIN_LOCK_QUEUE;
Here is the disassembly of the function:
KxWaitForLockChainValid:
mov [rsp+8], rbx
push rdi
sub rsp, 20h
mov rdi, rcx
xor ebx, ebx
loc_1400DA7F7:
inc ebx
test cs:HvlLongSpinCountMask, ebx
jz loc_14019DCAC
loc_1400DA805:
pause
loc_1400DA807:
mov rax, [rdi]
test rax, rax
jz short loc_1400DA7F7
mov rbx, [rsp+28h+8]
add rsp, 20h
pop rdi
retn
loc_14019DCAC:
mov eax, cs:HvlEnlightenments
test al, 40h
jz loc_1400DA805
mov ecx, ebx
call HvlNotifyLongSpinWait
nop
jmp loc_1400DA807
This is a spinlock implementation. It's interesting that the last label is in a distant memory area. This is usually an indication of an optimization by the compiler that the code is rarely used.
PKSPIN_LOCK_QUEUE KxWaitForLockChainValid(
__inout PKSPIN_LOCK_QUEUE LockQueue)
{
UINT32 i = 0; /* xor ebx, ebx */
do /* loc_1400DA7F7 */
{
++i; /* inc ebx */
/* test cs:HvlLongSpinCountMask, ebx */
/* test al, 40h */
if (i == HvlLongSpinCountMask && HvlEnlightenments != 0x40))
HvlNotifyLongSpinWait(i); /* mov ecx, ebx */
else
_mm_pause(); /* pause */
} while(LockQueue->Next != 0); /* test rax, rax */
}
KeReadyThread
Inside ntoskrnl.exe, KeReadyThread has the following prototype:
VOID NTAPI KeReadyThread(_In_ PKTHREAD Thread);
Here is the disassembly:
KeReadyThread:
push rbx
sub rsp, 20h
mov rdx, [rcx+0B8h]
mov rbx, rcx
mov eax, [rdx+234h]
test al, 7
jnz short loc_1400F6684
loc_1400F6676:
mov rcx, rbx
call KiFastReadyThread
loc_1400F667E:
add rsp, 20h
pop rbx
retn
loc_1400F6684:
call KiInSwapSingleProcess
test al, al
jnz short loc_1400F667E
jmp short loc_1400F6676
Until I calculate the offsets the struct values are unknown.
VOID NTAPI KeReadyThread(_In_ PKTHREAD Thread)
{
/* mov rdx, [rcx+0B8h] */
/* mov eax, [rdx+234h] */
/* test al, 7 */
if ((BYTE)Thread->UnknownB8.Unknown234 == 7)
if (KiInSwapSingleProcess(Thread)) /* call KiInSwapSingle */
return; /* jnz loc_1400F667E */
KiFastReadyThread(Thread); /* call KiFastReadyThread */
}
KiInitializeTSS
Inside ntoskrnl.exe, KiInitializeTSS has the following prototype:
VOID NTAPI KiInitializeTSS(_In_ PKTSS Tss);
This has a parameter for the PKTSS struct. It is defined as:
typedef struct _KTSS
{
WORD Backlink;
WORD Reserved0;
ULONG Esp0;
WORD Ss0; /* 0x8 */
WORD Reserved1;
ULONG NotUsed1[4];
ULONG CR3;
ULONG Eip;
ULONG EFlags;
ULONG Eax;
ULONG Ecx;
ULONG Edx;
ULONG Ebx;
ULONG Esp;
ULONG Ebp;
ULONG Esi;
ULONG Edi;
WORD Es;
WORD Reserved2;
WORD Cs;
WORD Reserved3;
WORD Ss;
WORD Reserved4;
WORD Ds;
WORD Reserved5;
WORD Fs;
WORD Reserved6;
WORD Gs;
WORD Reserved7;
WORD LDT; /* 0x60 */
WORD Reserved8;
WORD Flags; /* 0x64 */
WORD IoMapBase; /* 0x66 */
KiIoAccessMap IoMaps[1];
UCHAR IntDirectionMap[32]; /* 0x208c */
} KTSS, *PKTSS;
Here is the disassembly:
KiInitializeTSS:
mov edi, edi
push ebp
mov ebp, esp
mov eax, dword ptr [ebp+8]
and word ptr [eax+64h], 0
and word ptr [eax+60h], 0
mov word ptr [eax+66h], 20ACh
mov word ptr [eax+8], 10h
pop ebp
ret 4
This function fills in the structure with constants.
VOID NTAPI KiInitializeTSS(_In_ PKTSS Tss)
{
Tss->Flags = 0; /* and word ptr [eax+64h], 0 */
Tss->LDT = 0; /* and word ptr [eax+60h], 0 */
/* mov word ptr [eax+66h], 20ACh */
Tss->IoMapBase = sizeof(KTSS);
Tss->Ss0 = 16; /* mov word ptr [eax+8], 10h */
}
RtlValidateUnicodeString
Inside ntoskrnl.exe, RtlValidateUnicodeString has the following prototype:
NTSTATUS NTAPI RtlValidateUnicodeString(
_In_ ULONG Flags,
_In_ PCUNICODE_STRING UnicodeString);
The UNICODE_STRING struct in a 64-bit system context is defined as:
typedef struct _UNICODE_STRING {
USHORT Length; /* 0x0 */
USHORT MaximumLength; /* 0x2 */
DWORD Reserved; /* 0x4 */
PWSTR Buffer; /* 0x8 */
} UNICODE_STRING, *PUNICODE_STRING;
Here's the disassembly of the function:
RtlValidateUnicodeString:
xor eax, eax
test ecx, ecx
jnz short loc_1400D23BB
test rdx, rdx
jz short locret_1400D23BA
movzx r8d, word ptr [rdx]
test r8b, 1
jnz short loc_1400D23BB
movzx ecx, word ptr [rdx+2]
test cl, 1
jnz short loc_1400D23BB
cmp r8w, cx
ja short loc_1400D23BB
mov r9d, 0FFFEh
cmp cx, r9w
ja short loc_1400D23BB
cmp [rdx+8], rax
jz loc_14019BAF4
locret_1400D23BA:
retn
loc_1400D23BB:
mov eax, 0C000000Dh
retn
loc_14019BAF4:
test r8w, r8w
jnz loc_1400D23BB
test cx, cx
jz locret_1400D23BA
jmp loc_1400D23BB
The function, true to its name, follows the traditional validation pattern of executing tests and returning false (NTSTATUS: INVALID_PARAMETER) or true (NTSTATUS: SUCCESS) depending on if the conditions are met or not. Note that the last test case in the main body of the function can jump to a distant memory space for more tests, an optimization that likely means it is rarely branched to.
/* test ecx, ecx */
if (Flags != 0)
return STATUS_INVALID_PARAMETER; /* mov eax, 0C000000Dh */
/* test rdx, rdx */
if (!UnicodeString)
return STATUS_SUCCESS; /* xor eax, eax */
/* movzx r8d, word ptr [rdx] */
/* test r8b, 1 */
if (UnicodeString->Length & 1 != 0)
return STATUS_INVALID_PARAMETER;
/* movzx ecx, word ptr [rdx+2] */
/* test cl, 1 */
if (UnicodeString->MaximumLength & 1 != 0)
return STATUS_INVALID_PARAMETER;
/* cmp r8w, cx */
if (UnicodeString->Length > UnicodeString.MaximumLength)
return STATUS_INVALID_PARAMETER;
/* mov r9d, 0FFFEh */
/* cmp cx, r9w */
if (UnicodeString->MaximumLength > 65534)
return STATUS_INVALID_PARAMETER;
/* cmp [rdx+8], rax */
if (UnicodeString->Buffer == 0)
{
/* test r8w, r8w */
if (UnicodeString->Length != 0)
return STATUS_INVALID_PARAMETER;
/* test cx, cx */
if (UnicodeString->MaximumLength != 0)
return STATUS_INVALID_PARAMETER;
}
return STATUS_SUCCESS;