Tuesday, January 6, 2015

Practical Reverse Engineering p. 35 #7

Question number 7 on page 35 of Practical Reverse Engineering is as follows:

Sample H. The function sub_10BB6 has a loop searching for something. First recover the function prototype and then infer the types based on the context. Hint: You should probably have a copy of the PE specification nearby.

Here is an image that details the PE file format, which is useful for referencing offsets.


The structs that make up the PE file format are:

typedef struct _IMAGE_DOS_HEADER
{
    WORD e_magic;              /* 0x0 */
    WORD e_cblp;               /* 0x2 */
    WORD e_cp;                 /* 0x4 */
    WORD e_crlc;               /* 0x6 */
    WORD e_cparhdr;            /* 0x8 */
    WORD e_minalloc;           /* 0xa */
    WORD e_maxalloc;           /* 0xc */
    WORD e_ss;                 /* 0xe */
    WORD e_sp;                 /* 0x10 */
    WORD e_csum;               /* 0x12 */
    WORD e_ip;                 /* 0x14 */
    WORD e_cs;                 /* 0x16 */
    WORD e_lfarlc;             /* 0x18 */
    WORD e_ovno;               /* 0x1a */
    WORD e_res[4];             /* 0x1c */
    WORD e_oemid;              /* 0x24 */
    WORD e_oeminfo;            /* 0x26 */
    WORD e_res2[10];           /* 0x28 */
    LONG e_lfanew;             /* 0x3c */
} IMAGE_DOS_HEADER, *PIMAGE_DOS_HEADER;

typedef struct _IMAGE_NT_HEADERS {
    DWORD                 Signature;         /* 0x0 */
    union
    {
        IMAGE_FILE_HEADER     FileHeader;    /* 0x4 */
        struct
        {
            WORD  Machine;                   /* 0x4 */
            WORD  NumberOfSections;          /* 0x6 */
            DWORD TimeDateStamp;             /* 0x8 */
            DWORD PointerToSymbolTable;      /* 0xc */
            DWORD NumberOfSymbols;           /* 0x10 */
            WORD  SizeOfOptionalHeader;      /* 0x14 */
            WORD  Characteristics;           /* 0x16 */
        }
    }
    IMAGE_OPTIONAL_HEADER OptionalHeader;    /* 0x18 */
} IMAGE_NT_HEADERS, *PIMAGE_NT_HEADERS;

Here is the disassembly of the function in Sample H:

sub_10BB2:
    mov     eax, [esp+4]
    push    ebx
    push    esi
    mov     esi, [eax+3Ch]
    add     esi, eax
    movzx   eax, word ptr [esi+14h]
    xor     ebx, ebx
    cmp     [esi+6], bx
    push    edi
    lea     edi, [eax+esi+18h]
    jbe     short loc_10BEB

loc_10BCE:                             
    push    [esp+0Ch+8]     ; _DWORD
    push    edi             ; _DWORD
    call    ds:dword_169A4
    test    eax, eax
    pop     ecx
    pop     ecx
    jz      short loc_10BF3
    movzx   eax, word ptr [esi+6]
    add     edi, 28h
    inc     ebx
    cmp     ebx, eax
    jb      short loc_10BCE

loc_10BEB:                              
    xor     eax, eax

loc_10BED:                            
    pop     edi
    pop     esi
    pop     ebx
    retn    8 

loc_10BF3:                             
    mov     eax, edi
    jmp     short loc_10BED

Making the assumption that our first argument is a pointer to the DOS header (from the hint in the question), we can use a little bit of math to calculate the offsets and see everything lines up. The search code could be written in a for loop but I used a while loop for more clarity.

PIMAGE_SECTION_HEADER sub_10BB2(PVOID pPE, PVOID arg2)
{
    PIMAGE_DOS_HEADER pDOS;
    PIMAGE_NT_HEADERS pNT;
    PIMAGE_SECTION_HEADER pSection;
    WORD dwOptHdrSize;
    DWORD dwCurrentSection;

    /* mov eax, [esp+4] */
    pDOS = (PIMAGE_DOS_HEADER) pPE;

    /* mov esi, [eax+3Ch] */
    /* add esi, eax */
    pNT = (pDOS + pDOS->e_lfanew);

    /* movzx eax, word ptr [esi+14h] */
    dwOptHdrSize = pNT->FileHeader.SizeOfOptionalHeader;

    /* xor ebx, ebx */
    dwCurrentSection = 0;

    /* cmp [esi+6], bx */
    if (pNT->NumberOfSections == 0)
        return NULL;

    /* lea edi, [eax+esi+18h] */
    pSection = IMAGE_FIRST_SECTION(pNT);

    while (1)
    {
        /* push [esp+0Ch+8] */
        /* push edi  */
        /* call ds:dword_169A4 */
        /* test eax, eax */
        if (*(BOOL)dword_169A4)(pSection, arg2))
            return pSection;

        /* add edi, 28h */
        ++pSection; /* struct increment */

        /* inc ebx */
        ++dwCurrentSection;

        /* movzx eax, word ptr [esi+6] */
        /* cmp ebx, eax */
        if (dwCurrentSection > pNT->NumberOfSections)
            return NULL;
    }
}

Practical Reverse Engineering p. 35 #6

Question number 6 on page 35 of Practical Reverse Engineering is as follows:

Sample H. The function sub_13846 references several structures whose types are not entirely clear. Your task is to first recover the function prototype and then try to reconstruct the structure fields. After reading Chapter 3, return to this exercise to see if your understanding has changed. (Note: This sample is targeting Windows XP x86.)

The function disassembles to:

sub_13842:
     mov     eax, [ecx+60h]
     push    esi
     mov     esi, [edx+8]
     dec     byte ptr [ecx+23h]
     sub     eax, 24h
     mov     [ecx+60h], eax
     mov     [eax+14h], edx
     movzx   eax, byte ptr [eax]
     push    ecx
     push    edx
     call    dword ptr [esi+eax*4+38h]
     pop     esi
     retn

It's a fastcall convention which has two struct pointer arguments, and the return type is not readily available. There are 3 locals which are stored in registers, two of them pointers within the struct and a char which is an index to a function call table.

PVOID __fastcall sub_13841(
     struct *arg1, struct *arg2)
{
     ULONG_PTR v1, v2;
     CHAR index;

     v1 = arg1->Unkown0x60;    /* mov eax, [ecx+60h] */
     v2 = arg2->Unknown0x8;    /* mov esi, [edx+8] */

     --arg1->Unknown0x23;      /* dec byte ptr [ecx+23h] */

     v1 -= 0x24;               /* sub eax, 24h */

     arg1->Unknown0x60 = v1;   /* mov [ecx+60h], eax */
     v1->Unknown0x14 = arg2;   /* mov [eax+14h], edx */

     index = v1->index;        /* movzx eax, byte ptr [eax] */

     /* call dword ptr [esi+eax*4+38h] */
     return v2->Unknown0x38[index](arg2, arg1);
}

Here is a basic idea so far on the struct meanings:

struct arg1 {
     BYTE Unknown0x0[0x23];   /* 0x0 */
     CHAR decremented;        /* 0x23 */
     BYTE Unknown0x24[0x18];  /* 0x24 */
     union
     {
         struct *v1[9];           /* 0x3c - 0x60 */
         struct
         {
             struct v1_1[0x3c];   /* 0x3c */
             struct v1_2[0x60];   /* 0x60 */
         };
     };
};

struct arg2 {
     BYTE Unknown0x0[0x8];    /* 0x0 */
     struct *v2;              /* 0x8 */    
};

/* size = 0x24 */
struct v1 {
     CHAR index;              /* 0x0 */
     BYTE Unknown0x1[0x13];   /* 0x1 */
     struct *arg2;            /* 0x14 */
     CHAR Unknown0x18[0xc];   /* 0x18 */
};

struct v2 {
     BYTE Unknown0x0[0x38];   /* 0x0 */
     ULONG_PTR function;      /* 0x38 */   
};

Saturday, January 3, 2015

Practical Reverse Engineering p. 35 #5

Question number 5 on page 35 of Practical Reverse Engineering is as follows:

Decompile the following kernel routines in Windows:
  • KeInitializeDpc 
  • KeInitializeApc 
  • ObFastDereferenceObject (explain its calling convention)
  • KeInitializeQueue 
  • KxWaitForLockChainValid 
  • KeReadyThread 
  • KiInitializeTSS 
  • RtlValidateUnicodeString

KeInitializeDpc

Inside ntoskrnl.exe, KeInitializeDpc has the following prototype:

VOID NTAPI KeInitializeDpc(
    PRKDPC Dpc, 
    PKDEFERRED_ROUTINE DeferredRoutine, 
    PVOID DeferredContext);

This has a parameter for the KDPC struct, which contains a LIST_ENTRY. These are defined as:

typedef struct _LIST_ENTRY {
  struct _LIST_ENTRY  *Flink;   /* 0x0 */
  struct _LIST_ENTRY  *Blink;   /* 0x8 */
} LIST_ENTRY, *PLIST_ENTRY;

typedef struct _KDPC
{
     UCHAR Type;                /* 0x0 */   
     UCHAR Importance;          /* 0x1 */
     WORD Number;               /* 0x2 */
     BYTE Unknown[4];           /* 0x4 */
     LIST_ENTRY DpcListEntry;   /* 0x8 */
     PVOID DeferredRoutine;     /* 0x18 */
     PVOID DeferredContext;     /* 0x20 */
     PVOID SystemArgument1;     /* 0x28 */
     PVOID SystemArgument2;     /* 0x30 */
     PVOID DpcData;             /* 0x38 */
} KDPC, *PKDPC;

Here is the disassembly:

KeInitializeDpc:
    xor     eax, eax
    mov     dword ptr [rcx], 113h
    mov     [rcx+18h], rdx
    mov     [rcx+38h], rax
    mov     [rcx+10h], rax
    mov     [rcx+20h], r8
    retn

The first MOV is an optimization which sets the first 3 variables in the struct, as it sets a dword to 0x113 (0b100010011). Everything else lines up easily enough. Here is the fully decompiled function.

VOID NTAPI KeInitializeDpc(
    PRKDPC Dpc, 
    PKDEFERRED_ROUTINE DeferredRoutine, 
    PVOID DeferredContext)
{
    Dpc->Type = 19;                       /* mov dword ptr [rcx],113h */
    Dpc->Importance = 1;                     
    Dpc->Number = 0;
    Dpc->DeferredRoutine = DeferredRoutine; /* mov [rcx+18h], rdx */
    Dpc->DpcData = 0;                       /* mov [rcx+38h], rax */
    Dpc->DpcListEntry.Blink = 0;            /* mov [rcx+10h], rax */
    Dpc->DeferredContext = DeferredContext; /* mov [rcx+20h], r8 */
}



KeInitializeApc

Inside ntoskrnl.exe, KeInitializeApc has the following prototype:

VOID NTAPI KeInitializeApc( 
    _In_ PKAPC  Apc,
    _In_ PKTHREAD   Thread,
    _In_ KAPC_ENVIRONMENT   TargetEnvironment,
    _In_ PKKERNEL_ROUT_In_E     KernelRoutine,
    _In_Opt_ PKRUNDOWN_ROUT_In_E RundownRoutine ,
    _In_ PKNORMAL_ROUT_In_E     NormalRoutine,
    _In_ KPROCESSOR_MODE    Mode,
    _In_ PVOID  Context);   

Here is the KAPC struct with added offsets:

typedef struct _KAPC
{
     UCHAR Type;                /* 0x0 */
     UCHAR SpareByte0;          /* 0x1 */
     UCHAR Size;                /* 0x2 */
     UCHAR SpareByte1;          /* 0x3 */
     ULONG SpareLong0;          /* 0x4 */
     PKTHREAD Thread;           /* 0x8 */
     LIST_ENTRY ApcListEntry;   /* 0x10 */
     PVOID KernelRoutine;       /* 0x20 */
     PVOID RundownRoutine;      /* 0x28 */
     PVOID NormalRoutine;       /* 0x30 */
     PVOID NormalContext;       /* 0x38 */
     PVOID SystemArgument1;     /* 0x40 */
     PVOID SystemArgument2;     /* 0x48 */
     CHAR ApcStateIndex;        /* 0x50 */
     CHAR ApcMode;              /* 0x51 */
     UCHAR Inserted;            /* 0x52 */
} KAPC, *PKAPC;

And here is the disassembly:

KeInitializeApc:
    mov     byte ptr [rcx], 12h
    mov     byte ptr [rcx+2], 58h
    cmp     r8d, 2
    jz      short loc_1400BAAAF
    mov     [rcx+50h], r8b

loc_1400BAA71:                         
    mov     rax, [rsp+28h]
    mov     [rcx+8], rdx
    xor     edx, edx
    mov     [rcx+28h], rax
    mov     rax, [rsp+30h]
    mov     [rcx+20h], r9
    mov     [rcx+30h], rax
    test    rax, rax
    jnz     short loc_1400BAA9D
    mov     [rcx+51h], dl
    mov     [rcx+38h], rdx

loc_1400BAA99:                          
    mov     [rcx+52h], dl
    retn 

loc_1400BAA9D:                          
    mov     al, [rsp+38h]
    mov     [rcx+51h], al
    mov     rax, [rsp+40h]
    mov     [rcx+38h], rax
    jmp     short loc_1400BAA99 

loc_1400BAAAF:                         
    mov     al, [rdx+242h]
    mov     [rcx+50h], al
    jmp     short loc_1400BAA71

This routine contains a couple if statements, but otherwise it's just writing the arguments and some constants to the struct.

VOID NTAPI KeInitializeApc( 
    _In_ PKAPC  Apc,
    _In_ PKTHREAD   Thread,
    _In_ KAPC_ENVIRONMENT   TargetEnvironment,
    _In_ PKKERNEL_ROUT_In_E     KernelRoutine,
    _In_Opt_ PKRUNDOWN_ROUT_In_E RundownRoutine ,
    _In_ PKNORMAL_ROUT_In_E     NormalRoutine,
    _In_ KPROCESSOR_MODE    Mode,
    _In_ PVOID  Context) 
{
    Apc->Type = 0x12;        /* mov byte ptr [rcx], 12h */
    Apc->Size = 0x58;        /* mov byte ptr [rcx+2], 58h */

    /* cmp r8d, 2 */
    if ((DWORD)TargetEnvironment == CurrentApcEnvironment)
      Apc->ApcStateIndex = Thread->ApcStateIndex;/* mov [rcx+50h], al */
    else
      Apc->ApcStateIndex = TargetEnvironment;  /* mov [rcx+50h], r8b */

    Apc->Thread = Thread;                    /* mov [rcx+8], rdx */
    Apc->RundownRoutine = RundownRoutine;    /* mov [rcx+28h], rax */
    Apc->KernelRoutine = KernelRoutine;      /* mov [rcx+20h], r9 */
    Apc->NormalRoutine = NormalRoutine;      /* mov [rcx+30h], rax */

    /* test rax, rax */
    if (NormalRoutine != 0)
    {
        Apc->ApcMode = Mode;                 /* mov [rcx+51h], al */
        Apc->NormalContext = Context;        /* mov [rcx+38h], rax */
    }
    else
    {
        Apc->ApcMode = 0;                    /* mov [rcx+51h], dl */
        Apc->NormalContext = 0;              /* mov [rcx+38h], rdx */    
    }

    Apc->Inserted = 0;                       /* mov [rcx+52h], dl */
}



ObFastDereferenceObject

Inside ntoskrnl.exe, ObFastDereferenceObject has the following prototype:

void __fastcall ObFastDereferenceObject(
    _In_ PEX_FAST_REF FastRef,
    _In_ PVOID Object 
)

Here is the struct that is passed in the first argument:

typedef struct _EX_FAST_REF
{
    union
    {
        PVOID Object;
        ULONG RefCnt: 4;
        UINT64 RefCnt;
    };    
} EX_FAST_REF, *PEX_FAST_REF;

Here is the disassembly, which shows that there are fastcall optimizations on the 1st parameter for certain processors:

ObFastDereferenceObject:
    mov     r9, rcx
    prefetchw byte ptr [rcx]
    mov     rax, [rcx]
    mov     r8, rax
    xor     r8, rdx
    cmp     r8, 0Fh
    jnb     short loc_140062C29

loc_140062C1D:                          
    lea     r8, [rax+1]
    lock cmpxchg [r9], r8
    jnz     short loc_140062C31
    retn

loc_140062C29:                                              
    mov     rcx, rdx        
    jmp     ObfDereferenceObject

loc_140062C31:                          
    mov     rcx, rax
    xor     rcx, rdx
    cmp     rcx, 0Fh
    jb      short loc_140062C1D
    jmp     short loc_140062C29

The function is one big loop that increments the FastRef->Object pointer. There is also a precondition test. If the loop fails, another function is called.

void __fastcall ObFastDereferenceObject(
    _In_ PEX_FAST_REF FastRef,
    _In_ PVOID Object 
)
{
    for (   EX_FAST_REF  a = *FastRef,      /* mov rax, [rcx] */
                         b = *FastRef;      /* mov r8, rax */
            *b->Object ^ Object             /* xor r8, rdx */
            <= 0x0F;                        /* cmp rcx, 0Fh */
            b->Object = *(a->Object) + 1    /* lea r8, [rax+1] */
        )
    {
        /* lock cmpxchg [r9], r8 */
        if (atomic_compare_exchange_strong(FastRef, &a, b));  
            return;
    }
                                        /* mov rcx, rdx */
    ObfDereferenceObject(Object);       /* jmp ObfDereferenceObject */
}



KeInitializeQueue

Inside ntoskrnl.exe, KeInitializeQueue has the following prototype:

VOID NTAPI KeInitializeQueue(
  _Out_  PRKQUEUE Queue,
  _In_   ULONG Count);

Here are the relevant structs which make up our Queue parameter:

typedef struct _DISPATCHER_HEADER
{
     union
     {
          struct
          {
               UCHAR Type;
               union
               {
                    UCHAR Abandoned;
                    UCHAR Absolute;
                    UCHAR NpxIrql;
                    UCHAR Signalling;
               };
               union
               {
                    UCHAR Size;
                    UCHAR Hand;
               };
               union
               {
                    UCHAR Inserted;
                    UCHAR DebugActive;
                    UCHAR DpcActive;
               };
          };
          LONG Lock;
     };
     LONG SignalState;
     LIST_ENTRY WaitListHead;
} DISPATCHER_HEADER, *PDISPATCHER_HEADER;

typedef struct _KQUEUE {
    DISPATCHER_HEADER Header;         /* 0x0 */
    LIST_ENTRY EntryListHead;         /* 0x18 */
    ULONG CurrentCount;               /* 0x28 */
    ULONG MaximumCount;               /* 0x2c */
    LIST_ENTRY ThreadListHead;        /* 0x30 */
} KQUEUE, *PKQUEUE, *RESTRICTED_POINTER PRKQUEUE;

The disassembly for the function is:

KeInitializeQueue:
    mov     word ptr [rcx], 4
    mov     byte ptr [rcx+2], 10h
    lea     rax, [rcx+8]
    xor     r8d, r8d
    mov     [rcx+4], r8d
    mov     [rax+8], rax
    mov     [rax], rax
    lea     rax, [rcx+18h]
    mov     [rax+8], rax
    mov     [rax], rax
    lea     rax, [rcx+30h]
    mov     [rax+8], rax
    mov     [rax], rax
    mov     [rcx+28h], r8d
    test    edx, edx
    jz      short loc_1400DF8A9
    mov     [rcx+2Ch], edx
    retn

loc_1400DF8A9:                        
    mov     eax, cs:KeNumberProcessors_0
    mov     [rcx+2Ch], eax
    retn

This function is again just basically filling in a struct with some constants.

VOID NTAPI KeInitializeQueue(
  _Out_  PRKQUEUE Queue,
  _In_   ULONG Count)
{
    Queue->Header.Type = 4;              /* mov word ptr [rcx], 4 */
    Queue->Header.Abandoned = FALSE;
    Queue->Header.Size = 0x10;           /* mov byte ptr [rcx+2], 10h */
    Queue->Header.SignalState = 0;       /* mov [rcx+4], r8d */

    /* lea rax, [rcx+8] */ 
    Queue->Header.WaitListHead->Blink = &Queue->Header.WaitListHead; 
    Queue->Header.WaitListHead->Flink = &Queue->Header.WaitListHead;

    /* lea rax, [rcx+18h] */
    Queue->EntryListHead->Blink = &Queue->EntryListHead; 
    Queue->EntryListHead->Flink = &Queue->EntryListHead; 

    /* lea rax, [rcx+30h] */
    Queue->ThreadListHead->Blink = &Queue->ThreadListHead; 
    Queue->ThreadListHead->Flink = &Queue->ThreadListHead;

    Queue.CurrentCount = 0;

    /* test edx, edx */
    if (Count == 0)
        Queue->MaximumCount = KeNumberProcessors; /* cs:_0 */
    else
        Queue->MaximumCount = Count;            /* mov [rcx+2Ch], edx */
}



KxWaitForLockChainValid

Inside ntoskrnl.exe, KxWaitForLockChainValid has the following prototype:

PKSPIN_LOCK_QUEUE KxWaitForLockChainValid(
       __inout PKSPIN_LOCK_QUEUE LockQueue);

Here is the definition for the struct parameter:

typedef struct _KSPIN_LOCK_QUEUE 
{
    struct _KSPIN_LOCK_QUEUE * volatile Next;
    PKSPIN_LOCK volatile Lock;
} KSPIN_LOCK_QUEUE, *PKSPIN_LOCK_QUEUE;

Here is the disassembly of the function:

KxWaitForLockChainValid:
    mov     [rsp+8], rbx
    push    rdi
    sub     rsp, 20h
    mov     rdi, rcx
    xor     ebx, ebx

loc_1400DA7F7:                        
    inc     ebx
    test    cs:HvlLongSpinCountMask, ebx
    jz      loc_14019DCAC

loc_1400DA805:                       
    pause

loc_1400DA807:                        
    mov     rax, [rdi]
    test    rax, rax
    jz      short loc_1400DA7F7
    mov     rbx, [rsp+28h+8]
    add     rsp, 20h
    pop     rdi
    retn

loc_14019DCAC:                    
    mov     eax, cs:HvlEnlightenments
    test    al, 40h
    jz      loc_1400DA805
    mov     ecx, ebx
    call    HvlNotifyLongSpinWait
    nop
    jmp     loc_1400DA807

This is a spinlock implementation. It's interesting that the last label is in a distant memory area. This is usually an indication of an optimization by the compiler that the code is rarely used.

PKSPIN_LOCK_QUEUE KxWaitForLockChainValid(
       __inout PKSPIN_LOCK_QUEUE LockQueue)
{
    UINT32 i = 0;  /* xor ebx, ebx */

    do             /* loc_1400DA7F7 */
    {
        ++i;       /* inc ebx */ 

        /* test cs:HvlLongSpinCountMask, ebx */ 
        /* test al, 40h */
        if (i == HvlLongSpinCountMask && HvlEnlightenments != 0x40))   
            HvlNotifyLongSpinWait(i);       /* mov ecx, ebx */
        else
            _mm_pause();                    /* pause */

    } while(LockQueue->Next != 0);  /* test rax, rax */
}



KeReadyThread

Inside ntoskrnl.exe, KeReadyThread has the following prototype:

VOID NTAPI KeReadyThread(_In_ PKTHREAD Thread);

Here is the disassembly:

KeReadyThread:              
    push    rbx
    sub     rsp, 20h
    mov     rdx, [rcx+0B8h]
    mov     rbx, rcx
    mov     eax, [rdx+234h]
    test    al, 7
    jnz     short loc_1400F6684

loc_1400F6676:                         
    mov     rcx, rbx
    call    KiFastReadyThread

loc_1400F667E:                          
    add     rsp, 20h
    pop     rbx
    retn

loc_1400F6684:                          
    call    KiInSwapSingleProcess
    test    al, al
    jnz     short loc_1400F667E
    jmp     short loc_1400F6676

Until I calculate the offsets the struct values are unknown.

VOID NTAPI KeReadyThread(_In_ PKTHREAD Thread)
{
    /* mov rdx, [rcx+0B8h] */
    /* mov eax, [rdx+234h] */
    /* test al, 7 */
    if ((BYTE)Thread->UnknownB8.Unknown234 == 7)
        if (KiInSwapSingleProcess(Thread))  /* call KiInSwapSingle */
            return;                         /* jnz loc_1400F667E */

    KiFastReadyThread(Thread);              /* call KiFastReadyThread */
}



KiInitializeTSS

Inside ntoskrnl.exe, KiInitializeTSS has the following prototype:

VOID NTAPI KiInitializeTSS(_In_ PKTSS Tss);

This has a parameter for the PKTSS struct. It is defined as:

typedef struct _KTSS
{
     WORD Backlink;
     WORD Reserved0;
     ULONG Esp0;
     WORD Ss0;                  /* 0x8 */
     WORD Reserved1;
     ULONG NotUsed1[4];
     ULONG CR3;
     ULONG Eip;
     ULONG EFlags;
     ULONG Eax;
     ULONG Ecx;
     ULONG Edx;
     ULONG Ebx;
     ULONG Esp;
     ULONG Ebp;
     ULONG Esi;
     ULONG Edi;
     WORD Es;
     WORD Reserved2;
     WORD Cs;
     WORD Reserved3;
     WORD Ss;
     WORD Reserved4;
     WORD Ds;
     WORD Reserved5;
     WORD Fs;
     WORD Reserved6;
     WORD Gs;
     WORD Reserved7;
     WORD LDT;                  /* 0x60 */
     WORD Reserved8;
     WORD Flags;                /* 0x64 */
     WORD IoMapBase;            /* 0x66 */
     KiIoAccessMap IoMaps[1];
     UCHAR IntDirectionMap[32]; /* 0x208c */
} KTSS, *PKTSS;

Here is the disassembly:

KiInitializeTSS:
     mov     edi, edi
     push    ebp
     mov     ebp, esp
     mov     eax, dword ptr [ebp+8]
     and     word ptr [eax+64h], 0
     and     word ptr [eax+60h], 0
     mov     word ptr [eax+66h], 20ACh
     mov     word ptr [eax+8], 10h
     pop     ebp
     ret     4

This function fills in the structure with constants.

VOID NTAPI KiInitializeTSS(_In_ PKTSS Tss)
{
     Tss->Flags = 0;     /* and word ptr [eax+64h], 0 */
     Tss->LDT = 0;       /* and word ptr [eax+60h], 0 */

     /* mov word ptr [eax+66h], 20ACh */
     Tss->IoMapBase = sizeof(KTSS);

     Tss->Ss0 = 16;      /* mov word ptr [eax+8], 10h */
}



RtlValidateUnicodeString

Inside ntoskrnl.exe, RtlValidateUnicodeString has the following prototype:

NTSTATUS NTAPI RtlValidateUnicodeString(   
    _In_ ULONG Flags,
    _In_ PCUNICODE_STRING UnicodeString);

The UNICODE_STRING struct in a 64-bit system context is defined as:

typedef struct _UNICODE_STRING {
  USHORT Length;            /* 0x0 */
  USHORT MaximumLength;     /* 0x2 */
  DWORD  Reserved;          /* 0x4 */
  PWSTR  Buffer;            /* 0x8 */
} UNICODE_STRING, *PUNICODE_STRING;

Here's the disassembly of the function:

RtlValidateUnicodeString:
    xor     eax, eax
    test    ecx, ecx
    jnz     short loc_1400D23BB
    test    rdx, rdx
    jz      short locret_1400D23BA
    movzx   r8d, word ptr [rdx]
    test    r8b, 1
    jnz     short loc_1400D23BB
    movzx   ecx, word ptr [rdx+2]
    test    cl, 1
    jnz     short loc_1400D23BB
    cmp     r8w, cx
    ja      short loc_1400D23BB
    mov     r9d, 0FFFEh
    cmp     cx, r9w
    ja      short loc_1400D23BB
    cmp     [rdx+8], rax
    jz      loc_14019BAF4

locret_1400D23BA:       
    retn 

loc_1400D23BB:    
    mov     eax, 0C000000Dh
    retn
    
loc_14019BAF4:
    test    r8w, r8w
    jnz     loc_1400D23BB
    test    cx, cx
    jz      locret_1400D23BA
    jmp     loc_1400D23BB

The function, true to its name, follows the traditional validation pattern of executing tests and returning false (NTSTATUS: INVALID_PARAMETER) or true (NTSTATUS: SUCCESS) depending on if the conditions are met or not. Note that the last test case in the main body of the function can jump to a distant memory space for more tests, an optimization that likely means it is rarely branched to.

/* test ecx, ecx */
if (Flags != 0)
    return STATUS_INVALID_PARAMETER; /* mov eax, 0C000000Dh */

/* test rdx, rdx */
if (!UnicodeString)
    return STATUS_SUCCESS;           /* xor eax, eax */

/* movzx r8d, word ptr [rdx] */
/* test r8b, 1 */
if (UnicodeString->Length & 1 != 0)
    return STATUS_INVALID_PARAMETER; 

/* movzx ecx, word ptr [rdx+2] */
/* test cl, 1 */
if (UnicodeString->MaximumLength & 1 != 0)
    return STATUS_INVALID_PARAMETER; 

/* cmp r8w, cx */
if (UnicodeString->Length > UnicodeString.MaximumLength)
    return STATUS_INVALID_PARAMETER; 

/* mov r9d, 0FFFEh */
/* cmp cx, r9w */
if (UnicodeString->MaximumLength > 65534)
    return STATUS_INVALID_PARAMETER; 

/* cmp [rdx+8], rax */
if (UnicodeString->Buffer == 0)
{
    /* test r8w, r8w */
    if (UnicodeString->Length != 0)
        return STATUS_INVALID_PARAMETER; 

    /* test cx, cx */
    if (UnicodeString->MaximumLength != 0)
        return STATUS_INVALID_PARAMETER; 
}

return STATUS_SUCCESS;

Friday, January 2, 2015

Practical Reverse Engineering p. 35 #4

Question number 4 on page 35 of Practical Reverse Engineering is as follows:

Implement the following functions in x86 assembly:
  • strlen
  • strchr
  • memcpy
  • memset
  • strcmp
  • strset

Here is the C prototype for strlen:

size_t strlen(const char *str);

This function returns the number of characters found before the null-byte character in a C-string. We can set AL to null and loop over EDI using REPNE SCASB. We will set ECX to -1 to begin, and then NOT the bytes and subtract by 1 to get the positive number length result.

_strlen:    
    push edi

    mov edi, [esp + 0x8]  ; char *str
    xor eax, eax          ; al = '\0'
    xor ecx, ecx          ; ecx = -1
    not ecx

    cld
    repne scasb

    not ecx               ; correct ecx
    lea eax, [ecx - 0x1]

    pop edi
    ret



Here is the C prototype for strchr:

char *strchr(char *str, int character);

The method of searching in the strchr function seems like it would be very similar to strlen. The are two main differences. Instead of searching for the null byte, we are searching for a user passed argument. And instead of returning the length, we return a pointer to the first time the search character is found.

We could be naive and just change some code in strlen, but looping with REPNE here means we could overrun the string buffer into unknown memory space. We need logic to check for the null byte as well, making the function more complicated. We could implement a strlen lookup beforehand, but it is more efficient to revert to more generic looping and comparison constructs.

_strchr:   
    mov eax, [esp + 0x4]   ; char *str
    mov ecx, [esp + 0x8]   ; char c
 
chr_loop:
    mov dl, [eax]          ; edx is caller-saved

    cmp cl, dl             ; *eax == c
    je chr_leave

    inc eax

    test dl, dl            ; check '\0'
    jnz chr_loop

    xor eax, eax           ; nullptr on fail

chr_leave:  
    ret



Here is the C prototype for memcpy: 

void *memcpy (void *destination, const void *source, size_t num);

This implementation is very convenient in x86. We use the REP MOVSB operation, which will copy ESI to EDI byte by byte until ECX reaches 0.

_memcpy:
    push edi                ; non-volatiles
    push esi

    mov edi, [esp + 0xc]    ; void *dest
    mov esi, [esp + 0x10]   ; const void *src
    mov ecx, [esp + 0x14]   ; size_t num

    mov eax, edi            ; return dest

    cld                     ; DF = 0 (++)
    rep movsb               ; ends at ecx = 0

    pop esi
    pop edi
    ret



Here is the C prototype for memset:

void *memset (void *ptr, int value, size_t num);

This implementation is similar to memcpy. We change to the REP STOSB operation, which will copy AL into EDI until ECX reaches 0.

_memset:
    push edi

    mov edi, [esp + 0x8]
    mov eax, [esp + 0xc]    ; char in al
    mov ecx, [esp + 0x10]

    push edi                ; store dest

    cld
    rep stosb               ; ends at ecx = 0

    pop eax                 ; return dest

    pop edi
    ret



Here is the C prototype for strcmp:

int strcmp(const char *str1, const char *str2);

The strcmp function returns 0 if both strings are equal, > 0 if the first character that does not match has a greater value in str1 than in str2, and < 0 in the other case.

 For this function we need to change our strategy. The REP CMPSB instruction looks like it would be great, but we would need to precompute both strings lengths and then use the minimum for our ECX value. We can achieve a better implementation using more generic looping and comparison constructs. Even though it seems to be more code, by skipping two strlen calls it ends up being less.

_strcmp:
    push edi
    push esi

    mov esi, [esp + 0xc]
    mov edi, [esp + 0x10]
    
    xor eax, eax    ; clear ret

cmp_loop:
    mov al, [esi]
    mov cl, [edi] 
    sub al, cl      ; al = *esi - *edi
    jne cmp_leave

    test cl, cl     ; check for '\0'
    jz cmp_leave

    inc esi         ; ++esi, ++edi
    inc edi
    jmp cmp_loop

cmp_leave:
    pop esi
    pop edi
    ret



Here is the C prototype for strset:

char *strset(char *str, int ch);

This is a function that isn't part of the standard library. We assume it works similar to memset, except that it will stop at the null terminator. There are two strategies we could take: precompute the strlen and then do a memset, or use more generic looping and comparison for a tighter implementation.

_strset:
    push edi

    mov edi, [esp + 0x8]
    mov ecx, [esp + 0xc]    ; char in ecx

    push edi                ; store dest

set_loop:
    mov al, [edi]
    test al, al             ; check '\0'
    jz set_leave

    mov [edi], cl           ; *edi = ch
    inc edi
    
    jmp set_loop

set_leave:
    pop eax                 ; return dest

    pop edi
    ret

Thursday, January 1, 2015

Practical Reverse Engineering p. 35 #3

Question number 3 on page 35 of Practical Reverse Engineering is as follows:

In some of the assembly listings, the function name has a @ prefix followed by a number. Explain when and why this decoration exists.

This is a compiler ABI decoration used with stdcall. The function name is also prefixed with an underscore. The number after the @ sign tells us the amount of bytes in the parameters.

BOOL WINAPI DllMain(
  _In_  HINSTANCE hinstDLL,
  _In_  DWORD fdwReason,
  _In_  LPVOID lpvReserved
);

The C function becomes the following symbol after compilation:

_DllMain@12

Practical Reverse Engineering p. 35 #2

Question number 2 on page 35 of Practical Reverse Engineering is as follows:

In the example walk-through, we did a nearly one-to-one translation of the assembly code to C. As an exercise, re-decompile this whole function so that it looks more natural. What can you say about the developer’s skill level/experience? Explain your reasons. Can you do a better job?

The function itself is fairly straightforward outside of the intrinsics library, which is basically just anti-VM code. It's hard to judge the competency of the developer in terms of software engineering skills since the compiler can change a lot of structure around. It's obvious he or she at least has a decent understanding of the Windows API, and has an idea about where a virtual machine CPU might store the Interrupt Descriptor Table.

Here is the function reverse engineered to C:

#include <Windows.h> 
#include <tlhelp32.h> 
#include <intrin.h> 
 
typedef struct _IDTR {
    DWORD base;
    SHORT limit;
} IDTR, *PIDTR;
 
BOOL APIENTRY DllMain(HMODULE hMod, DWORD dwReason, LPVOID lpRes) 
{
    IDTR idtr;
    PROCESSENTRY32 procentry;
    HANDLE hToolSnap;
    BOOL bProc32;

    __sidt(&idtr); 
    
    if (idtr.base > 0x8003F400 && idtr.base < 0x80047400)
        return FALSE;

    memset(&procentry, 0, sizeof(PROCESSENTRY32));
    procentry.dwSize = sizeof(procentry);

    hToolSnap = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
    if (hToolSnap == INVALID_HANDLE_VALUE)
        return FALSE;
    
    for (   bProc32 = Process32First(hToolSnap, &procentry); 
            bProc32 != FALSE;
            bProc32 = Process32Next(hToolSnap, &procentry)) 
    {
        if (wcscmp(procentry.szExeFile, _T("explorer.exe")) == 0)
            break;
    }

    if (!bProc32) 
        return FALSE;

    if (procentry.th32ParentProcessID == procentry.th32ProcessID)
        return FALSE; 

    if (dwReason == DLL_PROCESS_ATTACH)
        CreateThread(0, 0, (LPTHREAD_START_ROUTINE)0x100032D0, 0, 0, 0);

    return TRUE;
}

Practical Reverse Engineering p. 35 #1

Question number 1 on page 35 of Practical Reverse Engineering is as follows:

Repeat the walk-through by yourself. Draw the stack layout, including parameters and local variables.

The function in question is DllMain, which is a WINAPI (stdcall) convention. The stack pointer with regards to function entry looks like the following:

0x0 &return
+0x4 hModule
+0x8 dwReason
+0xc lpReserved

The function's instructions immediately modify the stack:

push ebp
mov ebp, esp
sub esp, 130h
push edi
sidt fword ptr [ebp-8]
mov eax, [ebp-6]

So the stack for local variables becomes:

-0x138 saved EDI
-0x134 reserved
-0xc struct IDT
-0x4 saved EBP
0x0 &return

The reserved bytes are for a PROCESSENTRY32 struct, which is defined on MSDN as follows:

typedef struct tagPROCESSENTRY32 {
  DWORD     dwSize;
  DWORD     cntUsage;
  DWORD     th32ProcessID;
  ULONG_PTR th32DefaultHeapID;
  DWORD     th32ModuleID;
  DWORD     cntThreads;
  DWORD     th32ParentProcessID;
  LONG      pcPriClassBase;
  DWORD     dwFlags;
  TCHAR     szExeFile[MAX_PATH];
} PROCESSENTRY32, *PPROCESSENTRY32;

So a more accurate and complete stack layout showing the local variables and parameters may be:

-0x138 saved EDI
-0x134 PROCESSENTRY32.dwSize
-0x130 PROCESSENTRY32.cntUsage
-0x12c PROCESSENTRY32.th32ProcessID
-0x128 PROCESSENTRY32.th32DefaultHeapID
-0x124 PROCESSENTRY32.th32ModuleID
-0x120 PROCESSENTRY32.cntThreads
-0x11c PROCESSENTRY32.th32ParentProcessID
-0x118 PROCESSENTRY32.pcPriClassBase
-0x114 PROCESSENTRY32.dwFlags
-0x10 PROCESSENTRY32.szExeFile[0x104]
-0xc IDT.limit
-0xa IDT.base
-0x4 saved EBP
0x0 &return
+0x4 hModule
+0x8 dwReason
+0xc lpReserved