目录
- 0x01 Syscall & Sysret
- 0x02 KiSystemCall64
- 构造TrapFrame
- _kthread->header->DebugActive.ActiveDR7|Instrumented
- user-mode scheduling(???这部分暂时还不明白)
- SSDT & ShadowSSDT API地址计算
- KiInitiateUserApc Apc分发
- InstrumentationCallback用户层回调
0x01 Syscall & Sysret
-
Syscall
Syscall
IF (CS.L ≠ 1 ) or (IA32_EFER.LMA ≠ 1) or (IA32_EFER.SCE ≠ 1) (* Not in 64-Bit Mode or SYSCALL/SYSRET not enabled in IA32_EFER *) THEN #UD; FI; RCX ← RIP; (* Will contain address of next instruction *) RIP ← IA32_LSTAR; R11 ← RFLAGS; RFLAGS ← RFLAGS AND NOT(IA32_FMASK); CS.Selector ← IA32_STAR[47:32] AND FFFCH (* Operating system provides CS; RPL forced to 0 *) (* Set rest of CS to a fixed value *) CS.Base ← 0; (* Flat segment *) CS.Limit ← FFFFFH; (* With 4-KByte granularity, implies a 4-GByte limit *) CS.Type ← 11; (* Execute/read code, accessed *) CS.S ← 1; CS.DPL ← 0; CS.P ← 1; CS.L ← 1; (* Entry is to 64-bit mode *) CS.D ← 0; (* Required if CS.L = 1 *) CS.G ← 1; (* 4-KByte granularity *) CPL ← 0;
从上面的Syscall的伪代码可以看到,rcx储存的是下一条指令的地址,所以windows在Syscall之前会将rcx储存到r10中。r11中储存的是rflags。rip是从IA32_LSTAR中获取的,cs的选择子是从IA32_STAR[47:32] 读取的,但是cs的其他段属性并没有根据选择子在内存中进行读取,而是设置的固定值,因此OS有义务让段选择对应的段描述符与设置的固定值相对应。可以发现上面伪代码并没有进行堆栈切换,因此需要OS在系统调用例程中自己进行堆栈切换。
-
Sysret
IF (CS.L ≠ 1 ) or (IA32_EFER.LMA ≠ 1) or (IA32_EFER.SCE ≠ 1) (* Not in 64-Bit Mode or SYSCALL/SYSRET not enabled in IA32_EFER *) THEN #UD; FI; IF (CPL ≠ 0) THEN #GP(0); FI; IF (operand size is 64-bit) THEN (* Return to 64-Bit Mode *) IF (RCX is not canonical) THEN #GP(0); RIP ← RCX; ELSE (* Return to Compatibility Mode *) RIP ← ECX; FI; RFLAGS ← (R11 & 3C7FD7H) | 2; (* Clear RF, VM, reserved bits; set bit 2 *) IF (operand size is 64-bit) THEN CS.Selector ← IA32_STAR[63:48]+16; ELSE CS.Selector ← IA32_STAR[63:48]; FI; CS.Selector ← CS.Selector OR 3; (* RPL forced to 3 *) (* Set rest of CS to a fixed value *) CS.Base ← 0; (* Flat segment *) CS.Limit ← FFFFFH; (* With 4-KByte granularity, implies a 4-GByte limit *) CS.Type ← 11; (* Execute/read code, accessed *) CS.S ← 1; CS.DPL ← 3; CS.P ← 1; IF (operand size is 64-bit) THEN (* Return to 64-Bit Mode *) CS.L ← 1; (* 64-bit code segment *) CS.D ← 0; (* Required if CS.L = 1 *) ELSE (* Return to Compatibility Mode *) CS.L ← 0; (* Compatibility mode *) CS.D ← 1; (* 32-bit code segment *) FI; CS.G ← 1; (* 4-KByte granularity *) CPL ← 3; SS.Selector ← (IA32_STAR[63:48]+8) OR 3; (* RPL forced to 3 *) (* Set rest of SS to a fixed value *) SS.Base ← 0; (* Flat segment *) SS.Limit ← FFFFFH; (* With 4-KByte granularity, implies a 4-GByte limit *) SS.Type ← 3; (* Read/write data, accessed *) SS.S ← 1; SS.DPL ← 3; SS.P ← 1; SS.B ← 1; (* 32-bit stack segment*) SS.G ← 1; (* 4-KByte granularity *)
SYSRET loads the CS and SS selectors with values derived from bits 63:48 of the IA32_STAR MSR. However, the CS and SS descriptor caches are not loaded from the descriptors (in GDT or LDT) referenced by those selectors. Instead, the descriptor caches are loaded with fixed values.
- rip<-rcx
- rflags<-(R11 & 3C7FD7H) | 2
0x02 KiSystemCall64
-
- 构造TrapFrame
.text:000000014007F640 swapgs ; switch to _kpcr .text:000000014007F643 mov gs:10h, rsp ; UserRsp .text:000000014007F64C mov rsp, gs:1A8h ; RspBase .text:000000014007F655 push 2Bh ; '+' ; ------------------ .text:000000014007F655 ; ss .text:000000014007F655 ; rsp .text:000000014007F655 ; rflags .text:000000014007F655 ; cs .text:000000014007F655 ; rsp--> rip .text:000000014007F657 push qword ptr gs:10h .text:000000014007F65F push r11 .text:000000014007F661 push 33h ; '3' .text:000000014007F663 push rcx ; ------------------ .text:000000014007F664 mov rcx, r10 ; move first param from r10 to rcx .text:000000014007F667 sub rsp, 8 ; reserve 8byte for errorCode .text:000000014007F66B push rbp ; 以上是构造异常发生时的堆栈环境 .text:000000014007F66C sub rsp, 158h .text:000000014007F673 lea rbp, [rsp+80h] .text:000000014007F67B mov [rbp+0C0h], rbx .text:000000014007F682 mov [rbp+0C8h], rdi .text:000000014007F689 mov [rbp+0D0h], rsi
首先在函数开头调用swapgs将指向_teb的gs指向_kpcr,然后切换0环堆栈。在堆栈上构造出类似于中断/异常的环境。然后sub rsp, 158h给_TrapFrame申请空间,后面一段代码都是在填充_TrapFrame。这里给出ReactOS中的amd64下的_TrapFrame结构。
typedef struct _KTRAP_FRAME { UINT64 P1Home; UINT64 P2Home; UINT64 P3Home; UINT64 P4Home; UINT64 P5; CHAR PreviousMode; UCHAR PreviousIrql; UCHAR FaultIndicator; UCHAR ExceptionActive; ULONG MxCsr; UINT64 Rax; UINT64 Rcx; UINT64 Rdx; UINT64 R8; UINT64 R9; UINT64 R10; UINT64 R11; union { UINT64 GsBase; UINT64 GsSwap; }; M128A Xmm0; M128A Xmm1; M128A Xmm2; M128A Xmm3; M128A Xmm4; M128A Xmm5; union { UINT64 FaultAddress; UINT64 ContextRecord; UINT64 TimeStampCKCL; }; UINT64 Dr0; UINT64 Dr1; UINT64 Dr2; UINT64 Dr3; UINT64 Dr6; UINT64 Dr7; union { struct { UINT64 DebugControl; UINT64 LastBranchToRip; UINT64 LastBranchFromRip; UINT64 LastExceptionToRip; UINT64 LastExceptionFromRip; }; struct { UINT64 LastBranchControl; ULONG LastBranchMSR; }; }; USHORT SegDs; USHORT SegEs; USHORT SegFs; USHORT SegGs; UINT64 TrapFrame; UINT64 Rbx; UINT64 Rdi; UINT64 Rsi; UINT64 Rbp; union { UINT64 ErrorCode; UINT64 ExceptionFrame; UINT64 TimeStampKlog; }; UINT64 Rip; UINT64 SegCs; UINT64 EFlags; UINT64 Rsp; UINT64 SegSs; } KTRAP_FRAME, *PKTRAP_FRAME;
-
- _kthread->header->DebugActive.ActiveDR7|Instrumented
.text:000000014007F6B1 cmp byte ptr [rbx+3], 0 ; Debug_active .text:000000014007F6B5 mov word ptr [rbp+80h], 0 ; dr7 .text:000000014007F6BE jz no_debug .text:000000014007F6C4 mov [rbp-50h], rax .text:000000014007F6C8 mov [rbp-48h], rcx .text:000000014007F6CC mov [rbp-40h], rdx .text:000000014007F6D0 test byte ptr [rbx+3], 3 ; ActiveDR7|Instrumented .text:000000014007F6D0 ; _kprocess.InstrumentationCallback回调被设置时,_kthread.header.DebugActive.Instrumented
会被置位 .text:000000014007F6D4 mov [rbp-38h], r8 .text:000000014007F6D8 mov [rbp-30h], r9 .text:000000014007F6DC jz short loc_14007F6E3 ; 如果没有设置InstrumentationCallback或者dr7没有设置, .text:000000014007F6DC ; 则不会保存调试寄存器 .text:000000014007F6DE call KiSaveDebugRegisterState ; 保存Last Branch Recording(LBR)的五个msr寄存器和dr调试寄存器 .text:000000014007F6DE ; struct .text:000000014007F6DE ; { .text:000000014007F6DE ; UINT64 DebugControl; .text:000000014007F6DE ; UINT64 LastBranchToRip; .text:000000014007F6DE ; UINT64 LastBranchFromRip; .text:000000014007F6DE ; UINT64 LastExceptionToRip; .text:000000014007F6DE ; UINT64 LastExceptionFromRip; .text:000000014007F6DE ; };
这段代码可以看到它检测了_kthread.DebugActive是否为0,如果为0,则会直接跳转到查SSDT表的代码,如果不为0,则会进行下一步判断其是否等于3,通过在Windbg中查看,可以发现这个DebugActive其实是一个位域结构体,3代表ActiveDR7 | Instrumented,也就是说如果dr7没有设置或者没有设置InstrumentationCallback,那么在系统调用中将不会保存dr寄存器。
分析到这里,我终于明白了上次写的那个逆向题中在第一个seh中设置了dr0~dr3,但是在第二个seh中检查却发现dr0~dr3都为0,原来是因为没有设置dr7,在系统调用时系统认为没有设置dr7,也就是没有启用硬件断点,自然dr寄存器就不用保存。
[+0x003] DebugActive : 0x3 [Type: unsigned char] [+0x003 ( 0: 0)] ActiveDR7 : 0x1 [Type: unsigned char] [+0x003 ( 1: 1)] Instrumented : 0x1 [Type: unsigned char] [+0x003 ( 5: 2)] Reserved2 : 0x0 [Type: unsigned char] [+0x003 ( 6: 6)] UmsScheduled : 0x0 [Type: unsigned char] [+0x003 ( 7: 7)] UmsPrimary : 0x0 [Type: unsigned char]
-
- user-mode scheduling(???这部分暂时还不明白)
.text:000000014007F6B1 cmp byte ptr [rbx+3], 0 ; Debug_active .text:000000014007F6B5 mov word ptr [rbp+80h], 0 ; dr7 .text:000000014007F6BE jz no_debug .text:000000014007F6C4 mov [rbp-50h], rax .text:000000014007F6C8 mov [rbp-48h], rcx .text:000000014007F6CC mov [rbp-40h], rdx .text:000000014007F6D0 test byte ptr [rbx+3], 3 ; ActiveDR7|Instrumented .text:000000014007F6D0 ; _kprocess.InstrumentationCallback回调被设置时,_kthread.header.DebugActive.Instrumented
会被置位 .text:000000014007F6D4 mov [rbp-38h], r8 .text:000000014007F6D8 mov [rbp-30h], r9 .text:000000014007F6DC jz short loc_14007F6E3 ; 如果没有设置InstrumentationCallback或者dr7没有设置, .text:000000014007F6DC ; 则不会保存调试寄存器 .text:000000014007F6DE call KiSaveDebugRegisterState ; 保存Last Branch Recording(LBR)的五个msr寄存器和dr调试寄存器 .text:000000014007F6DE ; struct .text:000000014007F6DE ; { .text:000000014007F6DE ; UINT64 DebugControl; .text:000000014007F6DE ; UINT64 LastBranchToRip; .text:000000014007F6DE ; UINT64 LastBranchFromRip; .text:000000014007F6DE ; UINT64 LastExceptionToRip; .text:000000014007F6DE ; UINT64 LastExceptionFromRip; .text:000000014007F6DE ; }; .text:000000014007F6E3 .text:000000014007F6E3 loc_14007F6E3: ; CODE XREF: KiSystemCall64+9C↑j .text:000000014007F6E3 test byte ptr [rbx+3], 80h ; kthread._DISPATCHER_HEADER.UmsPrimary .text:000000014007F6E7 jz short loc_14007F72B ; kthread._DISPATCHER_HEADER.UmsScheduled .text:000000014007F6E9 mov ecx, 0C0000102h ; IA32_KERNEL_GS_BASE .text:000000014007F6EE rdmsr .text:000000014007F6F0 shl rdx, 20h .text:000000014007F6F4 or rax, rdx .text:000000014007F6F7 cmp [rbx+0B8h], rax ; _kthread.teb .text:000000014007F6FE jz short loc_14007F72B ; kthread._DISPATCHER_HEADER.UmsScheduled .text:000000014007F700 cmp [rbx+1B0h], rax ; kthread.TebMappedLowVa .text:000000014007F707 jz short loc_14007F72B ; kthread._DISPATCHER_HEADER.UmsScheduled .text:000000014007F709 mov rdx, [rbx+1B8h] .text:000000014007F710 bts dword ptr [rbx+4Ch], 0Bh .text:000000014007F715 dec word ptr [rbx+1C4h] .text:000000014007F71C mov [rdx+80h], rax .text:000000014007F723 sti .text:000000014007F724 call KiUmsCallEntry ; user-mode scheduling
如果_kthread.DebugActive不为0并且其第7位UmsPrimary置位,如果IA32_KERNEL_GS_BASE和kthread.TebMappedLowVa & _kthread.teb相等则会调用KiUmsCallEntry,这个函数似乎于user-mode scheduling有关,这一机制允许用户在用户层进行任务调度。(具体的东西我也不懂......)
-
- SSDT & ShadowSSDT API地址计算
text:000000014007F750 sti .text:000000014007F751 mov [rbx+1E0h], rcx ; _kthread.FirstArgument .text:000000014007F758 mov [rbx+1F8h], eax ; _kthread.SystemCallNumber .text:000000014007F75E .text:000000014007F75E KiSystemServiceStart: ; DATA XREF: KiServiceInternal+5A↑o .text:000000014007F75E ; .data:00000001401EE648↓o .text:000000014007F75E mov [rbx+1D8h], rsp ; _kthread._KTRAP_FRAME .text:000000014007F765 mov edi, eax .text:000000014007F767 shr edi, 7 .text:000000014007F76A and edi, 20h ; 判断传入的序号是不是0x1xxx类型的,是否是ShdowSSDT的api .text:000000014007F76D and eax, 0FFFh .text:000000014007F772 .text:000000014007F772 KiSystemServiceRepeat: ; CODE XREF: KiSystemCall64+47B↓j .text:000000014007F772 lea r10, KeServiceDescriptorTable .text:000000014007F779 lea r11, KeServiceDescriptorTableShadow .text:000000014007F780 test dword ptr [rbx+100h], 80h ; _kthread.GuiThread .text:000000014007F78A cmovnz r10, r11 .text:000000014007F78E cmp eax, [rdi+r10+10h] .text:000000014007F793 jnb loc_14007FA82 ; 函数序号超过ssdt中存在的: .text:000000014007F793 ; 1.可能是传入的序号出错 .text:000000014007F793 ; 2.或者是还没有转换成gui线程调用了shadowSSDT的函数 .text:000000014007F799 mov r10, [rdi+r10] ; rdi为0x20或0x00很巧妙,刚好可以作为ssdt和Shadow ssdt的偏移 .text:000000014007F79D movsxd r11, dword ptr [r10+rax*4] .text:000000014007F7A1 mov rax, r11 .text:000000014007F7A4 sar r11, 4 .text:000000014007F7A8 add r10, r11 ; ssdt_func_addtr = &ssdt + ssdt.func_addr>>4 .text:000000014007F7AB cmp edi, 20h ; ' ' .text:000000014007F7AE jnz short loc_14007F800 ; 不是ShadowSSDT调用则跳转
这段代码首先会判断eax的第12位是否为1设置edi的值,然后判断当前线程是否是GUI线程,如果当前线程不是GUI线程,则cmovnz r10, r11则不会将r11传给r10,r10中也就是存放的KeServiceDescriptorTable,因此如果这个线程是第一次调用ShadowSSDT中的api,那么还不是GUI线程,cmp eax, [rdi+r10+10h]这里的eax肯定就会大于后面的[rdi+r10+10h],因为还不是GUI线程,那么r10这里是指向的KeServiceDescriptorTable,而rdi等于0x20,又因为在非GUI线程中的第二张SSDT表是空的,所以下面的跳转就会生效。这个跳转函数主要是判断是否是win32k的调用还是序号传错了,如果是win32k,则将线程转为GUI线程,否则退出系统调用。
可以从上面发现64位下的ssdt表中的函数地址计算方法为:ssdt_func_addtr = &ssdt + ssdt.func_addr>>4,也就是说ssdt.func_addr的低4位在计算地址上并没有用上,但是在后面其实是用上了的,详情请看后面分析。
0: kd> dq KeServiceDescriptorTableShadow fffff800`04117880 fffff800`03ee7300 00000000`00000000 fffff800`04117890 00000000`00000191 fffff800`03ee7f8c fffff800`041178a0 fffff960`00181f00 00000000`00000000 fffff800`041178b0 00000000`0000033b fffff960`00183c1c fffff800`041178c0 00000000`77b31206 00000000`00000000 fffff800`041178d0 fffff800`00a014a0 fffff800`00a01450 fffff800`041178e0 00000000`00000002 00000000`00007010 fffff800`041178f0 00000000`00078ed0 00000000`00000000 0: kd> dq KeServiceDescriptorTable //可以看到第二张SSDT表是空的,全为0 fffff800`04117840 fffff800`03ee7300 00000000`00000000 fffff800`04117850 00000000`00000191 fffff800`03ee7f8c fffff800`04117860 00000000`00000000 00000000`00000000 fffff800`04117870 00000000`00000000 00000000`00000000 fffff800`04117880 fffff800`03ee7300 00000000`00000000 fffff800`04117890 00000000`00000191 fffff800`03ee7f8c fffff800`041178a0 fffff960`00181f00 00000000`00000000 fffff800`041178b0 00000000`0000033b fffff960`00183c1c
text:000000014007FA82 cmp edi, 20h ; ' ' .text:000000014007FA85 jnz short loc_14007FAE2 .text:000000014007FA87 mov [rbp-80h], eax .text:000000014007FA8A mov [rbp-78h], rcx .text:000000014007FA8E mov [rbp-70h], rdx .text:000000014007FA92 mov [rbp-68h], r8 .text:000000014007FA96 mov [rbp-60h], r9 .text:000000014007FA9A call KiConvertToGuiThread .text:000000014007FA9F or eax, eax .text:000000014007FAA1 mov eax, [rbp-80h] .text:000000014007FAA4 mov rcx, [rbp-78h] .text:000000014007FAA8 mov rdx, [rbp-70h] .text:000000014007FAAC mov r8, [rbp-68h] .text:000000014007FAB0 mov r9, [rbp-60h] .text:000000014007FAB4 mov [rbx+1D8h], rsp .text:000000014007FABB jz KiSystemServiceRepeat .text:000000014007FAC1 lea rdi, unk_1402B18A0 .text:000000014007FAC8 mov esi, [rdi+10h] .text:000000014007FACB mov rdi, [rdi] .text:000000014007FACE cmp eax, esi .text:000000014007FAD0 jnb short loc_14007FAE2 .text:000000014007FAD2 lea rdi, [rdi+rsi*4] .text:000000014007FAD6 movsx eax, byte ptr [rax+rdi] .text:000000014007FADA or eax, eax .text:000000014007FADC jle KiSystemServiceExit .text:000000014007FAE2 .text:000000014007FAE2 loc_14007FAE2: ; CODE XREF: KiSystemCall64+445↑j .text:000000014007FAE2 ; KiSystemCall64+490↑j .text:000000014007FAE2 mov eax, 0C000001Ch .text:000000014007FAE7 jmp KiSystemServiceExit
.text:000000014007F800 and eax, 0Fh ; 上面提到了ssdt_func_addtr = &ssdt + ssdt.func_addr>>4
.text:000000014007F800 ; 所以ssdt.func_addr的低4字节用来表示除了rcx,rdx,r8,r9这四个参数外
.text:000000014007F800 ; api剩余的需要复制到栈中的参数个数
.text:000000014007F803 jz KiSystemServiceCopyEnd ; ???似乎和ums有关,暂时还不懂
.text:000000014007F809 shl eax, 3 ; 参数个数*8byte
.text:000000014007F80C lea rsp, [rsp-70h]
.text:000000014007F811 lea rdi, [rsp+18h]
.text:000000014007F816 mov rsi, [rbp+100h] ; syscall之前的user.esp
.text:000000014007F81D lea rsi, [rsi+20h] ; 因为64的叶函数要给自己调用的函数保留0x20大小的空间保存rcx rdx r8 r9
.text:000000014007F821 test byte ptr [rbp+0F0h], 1 ; cs如果第0位为1,代表这个调用是从3环过来的,
.text:000000014007F821 ; 因此要检查一下esp是否超过的用户空间的最大值
.text:000000014007F828 jz short loc_14007F840
.text:000000014007F82A cmp rsi, cs:MmUserProbeAddress
.text:000000014007F831 cmovnb rsi, cs:MmUserProbeAddress
.text:000000014007F839 nop dword ptr [rax+00000000h]
.text:000000014007F840
.text:000000014007F840 loc_14007F840: ; CODE XREF: KiSystemCall64+1E8↑j
.text:000000014007F840 lea r11, KiSystemServiceCopyEnd ; ???似乎和ums有关,暂时还不懂
.text:000000014007F847 sub r11, rax ; 这里设计的比较巧妙吧,KiSystemServiceCopyEnd上面就是复制参数,
.text:000000014007F847 ; 根据减去rax的值来定位具体要复制多少参数
.text:000000014007F84A jmp r11
.text:000000014007F84A ; ---------------------------------------------------------------------------
.text:000000014007F84D align 10h
.text:000000014007F850
.text:000000014007F850 KiSystemServiceCopyStart: ; DATA XREF: KiSystemServiceHandler+1A↑o
.text:000000014007F850 mov rax, [rsi+70h]
.text:000000014007F854 mov [rdi+70h], rax
.text:000000014007F858 mov rax, [rsi+68h]
.text:000000014007F85C mov [rdi+68h], rax
.text:000000014007F860 mov rax, [rsi+60h]
.text:000000014007F864 mov [rdi+60h], rax
.text:000000014007F868 mov rax, [rsi+58h]
.text:000000014007F86C mov [rdi+58h], rax
.text:000000014007F870 mov rax, [rsi+50h]
.text:000000014007F874 mov [rdi+50h], rax
.text:000000014007F878 mov rax, [rsi+48h]
.text:000000014007F87C mov [rdi+48h], rax
.text:000000014007F880 mov rax, [rsi+40h]
.text:000000014007F884 mov [rdi+40h], rax
.text:000000014007F888 mov rax, [rsi+38h]
.text:000000014007F88C mov [rdi+38h], rax
.text:000000014007F890 mov rax, [rsi+30h]
.text:000000014007F894 mov [rdi+30h], rax
.text:000000014007F898 mov rax, [rsi+28h]
.text:000000014007F89C mov [rdi+28h], rax
.text:000000014007F8A0 mov rax, [rsi+20h]
.text:000000014007F8A4 mov [rdi+20h], rax
.text:000000014007F8A8 mov rax, [rsi+18h]
.text:000000014007F8AC mov [rdi+18h], rax
.text:000000014007F8B0 mov rax, [rsi+10h]
.text:000000014007F8B4 mov [rdi+10h], rax
.text:000000014007F8B8 mov rax, [rsi+8]
.text:000000014007F8BC mov [rdi+8], rax
.text:000000014007F8C0 KiSystemServiceCopyEnd: ; CODE XREF: KiSystemCall64+1C3↑j
.text:000000014007F8C0 ; DATA XREF: KiSystemServiceHandler+27↑o ...
.text:000000014007F8C0 test cs:dword_140207688, 40h ; ???似乎和ums有关,暂时还不懂
.text:000000014007F8CA jnz loc_14007FB20
.text:000000014007F8D0 call r10 ; 正式调用api
这块代码可以分析出来ssdt.func_addr的低四位其实是用来表示这个api除了rcx rdx r8 r9四个参数外剩余的参数,通过乘8得到的偏移和KiSystemServiceCopyStart地址相减来从用户的栈空间复制参数,因为从硬编码来看KiSystemServiceCopyStart每一个参数的复制语句正好占8字节。当然这里还涉及到了cs选择子的权限检查,如果是从3环发起的调用,则会检查栈地址是否超过用户空间最大地址。在复制完参数后就通过call r10正式调用api了。
-
- KiInitiateUserApc Apc分发
.text:000000014007F8DB mov rbx, [rbp+0C0h] .text:000000014007F8E2 mov rdi, [rbp+0C8h] .text:000000014007F8E9 mov rsi, [rbp+0D0h] .text:000000014007F8F0 mov r11, gs:188h .text:000000014007F8F9 test byte ptr [rbp+0F0h], 1 .text:000000014007F900 jz loc_14007FA55 ; 如果是从0环来的调用,ret返回 .text:000000014007F906 mov rcx, cr8 .text:000000014007F90A or cl, [r11+1F0h] .text:000000014007F911 or ecx, [r11+1C4h] .text:000000014007F918 jnz loc_14007FAEC .text:000000014007F91E cli .text:000000014007F91F mov rcx, gs:188h .text:000000014007F928 cmp byte ptr [rcx+7Ah], 0 ; _kthread.SavedApcState.UserApcPending .text:000000014007F92C jz short no_user_apc ; 如果该标志位置位,则准备进行用户层apc分发 .text:000000014007F92E mov [rbp-50h], rax .text:000000014007F932 xor eax, eax .text:000000014007F934 mov [rbp-48h], rax .text:000000014007F938 mov [rbp-40h], rax .text:000000014007F93C mov [rbp-38h], rax .text:000000014007F940 mov [rbp-30h], rax .text:000000014007F944 mov [rbp-28h], rax .text:000000014007F948 mov [rbp-20h], rax .text:000000014007F94C pxor xmm0, xmm0 .text:000000014007F950 movaps xmmword ptr [rbp-10h], xmm0 .text:000000014007F954 movaps xmmword ptr [rbp+0], xmm0 .text:000000014007F958 movaps xmmword ptr [rbp+10h], xmm0 .text:000000014007F95C movaps xmmword ptr [rbp+20h], xmm0 .text:000000014007F960 movaps xmmword ptr [rbp+30h], xmm0 .text:000000014007F964 movaps xmmword ptr [rbp+40h], xmm0 .text:000000014007F968 mov ecx, 1 .text:000000014007F96D mov cr8, rcx ; 将irql转为apc级别 .text:000000014007F971 sti .text:000000014007F972 call KiInitiateUserApc .text:000000014007F977 cli .text:000000014007F978 mov ecx, 0 .text:000000014007F97D mov cr8, rcx .text:000000014007F981 mov rax, [rbp-50h]
在上面的api调用返回后进入KiSystemServiceExit流程,可以看到如果当前线程是一个从3环来的调用并且_kthread.SavedApcState.UserApcPending被置位,就会尝试进行apc分发。
-
- InstrumentationCallback用户层回调
text:000000014007F9C4 loc_14007F9C4: ; CODE XREF: KiSystemCall64+354↑j .text:000000014007F9C4 ldmxcsr dword ptr [rbp-54h] .text:000000014007F9C8 xor r10, r10 .text:000000014007F9CB cmp word ptr [rbp+80h], 0 ; 如果DebugAcitve为0,则不恢复调试寄存器 .text:000000014007F9D3 jz short loc_14007FA13 .text:000000014007F9D5 mov [rbp-50h], rax .text:000000014007F9D9 call KiRestoreDebugRegisterState .text:000000014007F9DE mov rax, gs:188h .text:000000014007F9E7 mov rax, [rax+70h] ; _kthread.apcState._kprocess .text:000000014007F9EB mov rax, [rax+100h] ; _kprocess.InstrumentationCallback .text:000000014007F9F2 or rax, rax .text:000000014007F9F5 jz short loc_14007FA0F .text:000000014007F9F7 cmp word ptr [rbp+0F0h], 33h ; '3' .text:000000014007F9FF jnz short loc_14007FA0F .text:000000014007FA01 mov r10, [rbp+0E8h] ; user.rip .text:000000014007FA08 mov [rbp+0E8h], rax .text:000000014007FA0F .text:000000014007FA0F loc_14007FA0F: ; CODE XREF: KiSystemCall64+3B5↑j .text:000000014007FA0F ; KiSystemCall64+3BF↑j .text:000000014007FA0F mov rax, [rbp-50h] .text:000000014007FA13 .text:000000014007FA13 loc_14007FA13: ; CODE XREF: KiSystemCall64+393↑j .text:000000014007FA13 mov r8, [rbp+100h] .text:000000014007FA1A mov r9, [rbp+0D8h] .text:000000014007FA21 xor edx, edx .text:000000014007FA23 pxor xmm0, xmm0 .text:000000014007FA27 pxor xmm1, xmm1 .text:000000014007FA2B pxor xmm2, xmm2 .text:000000014007FA2F pxor xmm3, xmm3 .text:000000014007FA33 pxor xmm4, xmm4 .text:000000014007FA37 pxor xmm5, xmm5 .text:000000014007FA3B mov rcx, [rbp+0E8h] .text:000000014007FA42 mov r11, [rbp+0F8h] .text:000000014007FA49 mov rbp, r9 .text:000000014007FA4C mov rsp, r8 .text:000000014007FA4F swapgs .text:000000014007FA52 sysret
text:000000014007FA01 mov r10, [rbp+0E8h] ; user.rip
text:000000014007FA08 mov [rbp+0E8h], rax
这里会根据DebugActive的值来决定是否恢复dr寄存器,随后判断了_kprocess.InstrumentationCallback是否有值,如果有值,前面说的DebugActive.Instrumented也会置位,同时最重要的是可以看到这两句代码,他把3环要返回的rip保存到了r10,把_kprocess.InstrumentationCallback的值赋给了3环的rip(当然是操作的_TrapFrame),这也就说明了我们可以通过设置这个回调在api返回时劫持程序流程。后面代码基本上就说从_TrapFrame中还原寄存器,然后swapgs还原gs,sysret返回3环。
通过查找资料发现可以在3环通过ZwSetInformationProcess的40功能号设置该回调。通过编写代码测试发现,如下代码在win10上可以正常的在api返回时调用回调。但是在win7中测试的时候发现一直返回下面的错误码。
// // MessageId: STATUS_PRIVILEGE_NOT_HELD // // MessageText: // // A required privilege is not held by the client. // #define STATUS_PRIVILEGE_NOT_HELD ((NTSTATUS)0xC0000061L)
然后在ntoskrl中看了一下ZwSetInformationProcess的40功能号的实现发现有个SE_DEBUG_NAME的权限检查。在win7下非管理员启动的程序是不持有SE_DEBUG_NAME权限的,所以我们需要管理员权限启动并且在代码中手动开启SE_DEBUG_NAME权限。下面的代码在win7/win10中测试成功在api返回时调用回调。但是程序肯定会崩溃,我没有保存原来的寄存器环境,这只是这个回调可用性的demo,具体详细的写法请参照https://secrary.com/Random/InstrumentationCallback/
v24 = ProcessInformationClass - ProcessInstrumentationCallback; if ( !v24 ) { if ( ProcessInformationLength != 8 ) return 0xC0000004i64; if ( SeSinglePrivilegeCheck(SeDebugPrivilege, v8) ) JUMPOUT(0x1403D483Ei64); return 0xC0000061i64;
#include<stdio.h> #include <windows.h> typedef enum _PROCESSINFOCLASS { ProcessBasicInformation = 0, ProcessQuotaLimits = 1, ProcessIoCounters = 2, ProcessVmCounters = 3, ProcessTimes = 4, ProcessBasePriority = 5, ProcessRaisePriority = 6, ProcessDebugPort = 7, ProcessExceptionPort = 8, ProcessAccessToken = 9, ProcessLdrInformation = 10, ProcessLdtSize = 11, ProcessDefaultHardErrorMode = 12, ProcessIoPortHandlers = 13, ProcessPooledUsageAndLimits = 14, ProcessWorkingSetWatch = 15, ProcessUserModeIOPL = 16, ProcessEnableAlignmentFaultFixup = 17, ProcessPriorityClass = 18, ProcessWx86Information = 19, ProcessHandleCount = 20, ProcessAffinityMask = 21, ProcessPriorityBoost = 22, ProcessDeviceMap = 23, ProcessSessionInformation = 24, ProcessForegroundInformation = 25, ProcessWow64Information = 26, ProcessImageFileName = 27, ProcessLUIDDeviceMapsEnabled = 28, ProcessBreakOnTermination = 29, ProcessDebugObjectHandle = 30, ProcessDebugFlags = 31, ProcessHandleTracing = 32, ProcessIoPriority = 33, ProcessExecuteFlags = 34, ProcessTlsInformation = 35, ProcessCookie = 36, ProcessImageInformation = 37, ProcessCycleTime = 38, ProcessPagePriority = 39, ProcessInstrumentationCallback = 40, // that's what we need ProcessThreadStackAllocation = 41, ProcessWorkingSetWatchEx = 42, ProcessImageFileNameWin32 = 43, ProcessImageFileMapping = 44, ProcessAffinityUpdateMode = 45, ProcessMemoryAllocationMode = 46, ProcessGroupInformation = 47, ProcessTokenVirtualizationEnabled = 48, ProcessConsoleHostProcess = 49, ProcessWindowInformation = 50, MaxProcessInfoClass } PROCESSINFOCLASS; typedef NTSTATUS(NTAPI* pNtSetInformationProcess)( HANDLE ProcessHandle, PROCESS_INFORMATION_CLASS ProcessInformationClass, PVOID ProcessInformation, ULONG ProcessInformationLength ); typedef NTSTATUS (NTAPI* pRtlAdjustPrivilege)( ULONG Privilege, BOOLEAN Enable, BOOLEAN CurrentThread, PBOOLEAN Enabled); typedef struct _PROCESS_INSTRUMENTATION_CALLBACK_INFORMATION { PVOID Callback; } PROCESS_INSTRUMENTATION_CALLBACK_INFORMATION, * PPROCESS_INSTRUMENTATION_CALLBACK_INFORMATION; void medium() { while (1) { } } void AdjustPrivileges() { HMODULE hModule = GetModuleHandle("ntdll.dll"); pRtlAdjustPrivilege RtlAdjustPrivilege = (pNtSetInformationProcess)GetProcAddress(hModule, "RtlAdjustPrivilege"); RtlAdjustPrivilege(0x14, 1, 0, NULL); return; } int main() { pNtSetInformationProcess NtSetInformationProcess = NULL; HMODULE hModule = GetModuleHandle("ntdll.dll"); NtSetInformationProcess = (pNtSetInformationProcess)GetProcAddress(hModule, "NtSetInformationProcess"); PROCESS_INSTRUMENTATION_CALLBACK_INFORMATION nirvana; nirvana.Callback = (PVOID)(ULONG_PTR)medium; AdjustPrivileges(); NTSTATUS n = NtSetInformationProcess( GetCurrentProcess(), (PROCESS_INFORMATION_CLASS)ProcessInstrumentationCallback, &nirvana, 0x8); printf("%x ", n); OpenProcess(PROCESS_ALL_ACCESS, FALSE, GetCurrentProcessId()); OpenProcess(PROCESS_ALL_ACCESS, FALSE, GetCurrentProcessId()); OpenProcess(PROCESS_ALL_ACCESS, FALSE, GetCurrentProcessId()); OpenProcess(PROCESS_ALL_ACCESS, FALSE, GetCurrentProcessId()); OpenProcess(PROCESS_ALL_ACCESS, FALSE, GetCurrentProcessId()); OpenProcess(PROCESS_ALL_ACCESS, FALSE, GetCurrentProcessId()); OpenProcess(PROCESS_ALL_ACCESS, FALSE, GetCurrentProcessId()); return 0; }
-
- 总结
这次win7 64位的系统调用算是粗略的分析完了,同时也解决了之前一直困惑的东西,也学到了新东西。博客园排版好像有问题,这里附上pdf附件https://files.cnblogs.com/files/DreamoneOnly/syscall.7z